SVN: toys/fun: . rsget.pl
sparky
sparky at pld-linux.org
Sun Dec 28 01:11:14 CET 2008
Author: sparky
Date: Sun Dec 28 01:11:14 2008
New Revision: 10041
Added:
toys/fun/
toys/fun/rsget.pl (contents, props changed)
Log:
- not related to PLD
Added: toys/fun/rsget.pl
==============================================================================
--- (empty file)
+++ toys/fun/rsget.pl Sun Dec 28 01:11:14 2008
@@ -0,0 +1,205 @@
+#!/usr/bin/perl
+#
+# 2008 (c) Przemysław Iskra <sparky at pld-linux.org>
+# Use/modify/distribute under GPL v2 or newer.
+#
+use strict;
+use warnings;
+use WWW::Curl::Easy;
+
+$| = 1;
+
+my $curl_headers = [
+ 'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
+ 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+ 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
+ ];
+
+sub body_file {
+ my ($chunk, $self) = @_;
+
+ unless ( exists $self->{total} ) {
+ my $info = $self->{curl}->getinfo(
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD
+ );
+ $self->{total} = $info || '?';
+ $self->{start} = time;
+ }
+ my $len = length $chunk;
+ $self->{got} += $len;
+
+ my $file = $self->{file};
+ print $file $chunk;
+
+ my $size = "$self->{got} / $self->{total}";
+ if ( int $self->{total} > 0 ) {
+ $size .= sprintf " [%.2f%%]", $self->{got} * 100 / $self->{total}
+ }
+ my $speed = "???";
+ my $time = time;
+ if ( $time > $self->{start} ) {
+ $speed = sprintf "%.2f", $self->{got} / ($time - $self->{start}) / 1024;
+ }
+
+ print "\r$self->{fn}: $size; ${speed}KB/s ";
+
+ return $len;
+}
+
+sub body_scalar {
+ my ($chunk, $scalar) = @_;
+ $$scalar .= $chunk;
+ return length $chunk;
+}
+
+open NULL, '>', '/dev/null';
+sub curl
+{
+ my $url = shift;
+ my %opts = @_;
+
+ my $curl = new WWW::Curl::Easy;
+
+ $curl->setopt(CURLOPT_WRITEHEADER, \*NULL);
+ $curl->setopt(CURLOPT_MAXREDIRS, 10);
+ $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
+ $curl->setopt(CURLOPT_HTTPHEADER, $curl_headers);
+ $curl->setopt(CURLOPT_URL, $url);
+ $curl->setopt(CURLOPT_REFERER, $opts{referer})
+ if $opts{referer};
+ $curl->setopt(CURLOPT_ENCODING, 'gzip,deflate');
+
+ if ( $opts{post} ) {
+ my $post = $opts{post};
+ $curl->setopt(CURLOPT_POST, 1 );
+ $curl->setopt(CURLOPT_POSTFIELDS, $post );
+ }
+
+ my $body;
+ if ( $opts{file} ) {
+ $curl->setopt(CURLOPT_WRITEFUNCTION, \&body_file);
+ open my $f_out, '>', $opts{file};
+ $curl->setopt(CURLOPT_FILE, { curl => $curl, got => 0, file => $f_out,
+ fn => ($opts{net} || "").$opts{file} });
+ } else {
+ $curl->setopt(CURLOPT_WRITEFUNCTION, \&body_scalar);
+ $curl->setopt(CURLOPT_FILE, \$body);
+ }
+
+ if ( $curl->perform != 0 ) {
+ my $err = $curl->errbuf;
+ print "error: $err\n";
+ return undef;
+ }
+
+ if ( $opts{file} ) {
+ return $curl->getinfo(
+ CURLINFO_CONTENT_LENGTH_DOWNLOAD
+ );
+ }
+ return $body;
+}
+
+sub rsget
+{
+ my $file = shift;
+ (my $fn = $file) =~ s#^.*/##;
+ my $try = 10;
+
+rsget_restart:
+ $try -= 1;
+ if ( $try <= 0 ) {
+ return undef;
+ }
+
+ print "\r[RS] $fn: (re)starting... ";
+ my $body = curl( $file );
+ unless ( $body ) {
+ goto rsget_restart;
+ }
+ $body =~ /form id="ff" action="(.*?)"/m;
+ my $link = $1;
+
+ sleep 1 + rand 5;
+
+ $body = curl( $link, post => 'dl.start=Free' );
+ unless ( $body ) {
+ goto rsget_restart;
+ }
+ if ( $body =~ /Please wait until the download is completed/m ) {
+ die "Multi-download not allowed\n";
+ }
+ if ( $body =~ /You have reached the download limit for free-users\./m ) {
+ $body =~ /Instant download access! Or try again in about ([0-9]+) minutes\./m;
+ my $m = $1;
+ for ( my $i = $m; $i > 0; $i-- ) {
+ print "\r[RS] $fn: waiting $i minutes ";
+ sleep 60;
+ }
+ sleep 20;
+ goto rsget_restart;
+ }
+ $body =~ /var c=([0-9]+);/m;
+ my $wait = $1;
+ print "\r[RS] $fn: waiting $wait seconds ";
+ sleep $wait;
+
+ $body =~ /form name="dlf" action="(.*?)"/m;
+ $link = $1;
+
+ curl( $link, post => 'mirror=on', file => $fn, net => '[RS] ' );
+ print "DONE!\n";
+
+ return $fn;
+}
+
+my $get_list = shift @ARGV || 'get.list';
+print "Using '$get_list' file list\n";
+-r $get_list || die "Can't read the list\n";
+my %gotlist;
+
+for (;;) {
+ my @newlist;
+ my $mtime = (stat $get_list)[9];
+ my $get_url = undef;
+ my $get_func = undef;
+ open my $list, '<', $get_list;
+ while ( my $line = <$list> ) {
+ if ( $get_func or $line =~ /^\s*(#.*)?$/ ) {
+ push @newlist, $line;
+ next;
+ }
+ if ( $line =~ m/^\s*(http:\/\/rapidshare\.com\/.*?)\s*$/ ) {
+ $get_url = $1;
+ if ( exists $gotlist{$get_url} ) {
+ push @newlist, "# " . $line;
+ } else {
+ $get_func = \&rsget;
+ push @newlist, $get_url . "\n";
+ }
+ next;
+ }
+ push @newlist, "# invalid url: $line";
+ print "\rinvalid url: $line";
+ }
+ close $list;
+ if ( -e ".${get_list}.swp" ) {
+ print "\rvim swap exists, not rewriting\n";
+ } else {
+ open my $newlist, '>', $get_list;
+ print $newlist @newlist;
+ close $newlist;
+ }
+
+ if ( $get_url and $get_func ) {
+ if ( my $file = &$get_func( $get_url ) ) {
+ $gotlist{$get_url} = $file;
+ }
+ } else {
+ print "\rwaiting for urls";
+ while ( $mtime == (stat $get_list)[9] ) {
+ sleep 5;
+ }
+ }
+}
More information about the pld-cvs-commit
mailing list