SVN: toys/fun/rsget.pl
sparky
sparky at pld-linux.org
Mon Jan 5 17:30:07 CET 2009
Author: sparky
Date: Mon Jan 5 17:30:06 2009
New Revision: 10049
Modified:
toys/fun/rsget.pl
Log:
- parrallel download
- added netload support (needs gocr and ocrad for captchas)
Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl (original)
+++ toys/fun/rsget.pl Mon Jan 5 17:30:06 2009
@@ -5,9 +5,67 @@
#
use strict;
use warnings;
-use WWW::Curl::Easy;
+use Time::HiRes;
+
+my $checklist = 1;
+my %gotlist;
+
+package Line; # {{{
$| = 1;
+my $actual_line = 0;
+my $max_line = 0;
+my $columns = $ENV{COLUMNS} || 124; # XXX
+
+sub new
+{
+ my $proto = shift;
+ my $class = ref( $proto ) || $proto;
+
+ my $steps = $max_line - $actual_line;
+ $actual_line = $max_line;
+ my $move = "";
+
+ if ( $steps < 0 ) {
+ return undef;
+ } elsif ( $steps > 0 ) {
+ $move = "\033[" . $steps . "B";
+ }
+
+ print $move . "\n\r\033[K";
+
+ my $line = $max_line++;
+ my $self = \$line;
+ return bless $self, $class;
+}
+
+sub print
+{
+ my $self = shift;
+ my $text = shift;
+ my $line = $$self;
+
+ return undef if $line >= $max_line;
+ my $steps = $line - $actual_line;
+ $actual_line = $line;
+ my $move = "";
+
+ if ( $steps < 0 ) {
+ $move = "\033[" . (-$steps) . "A";
+ } elsif ( $steps > 0 ) {
+ $move = "\033[" . $steps . "B";
+ }
+ if ( length $text > $columns ) {
+ $text = (substr $text, 0, $columns - 4) . '...';
+ }
+
+ print $move . "\r\033[K" . $text;
+}
+
+# }}}
+package Curl; # {{{
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
my $curl_headers = [
'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
@@ -16,61 +74,106 @@
'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
];
-sub body_file {
- my ($chunk, $self) = @_;
+sub file_init
+{
+ my $self = shift;
+ my $curl = $self->{curl};
- unless ( exists $self->{total} ) {
- my $info = $self->{curl}->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
- $self->{total} = $info || '?';
- $self->{start} = time;
+ {
+ my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+ $self->{size_total} = $f_len || 0;
+ $self->{size_got} = 0;
}
- my $len = length $chunk;
- $self->{got} += $len;
- my $file = $self->{file};
- print $file $chunk;
+ $self->{time_start} = time;
- my $size = "$self->{got} / $self->{total}";
- my $eta = "";
- my $time = time;
- if ( int $self->{total} > 0 ) {
- $size .= sprintf " [%.2f%%]", $self->{got} * 100 / $self->{total};
- if ( $time > $self->{start} ) {
- my $tleft = ($self->{total} - $self->{got}) / $self->{got} * ($time - $self->{start});
- $eta = sprintf "%2d:%.2d", $tleft / 60, $tleft % 60;
+ if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
+ $self->{file_name} = $1;
+ } else {
+ my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+ $eurl =~ s#^.*/##;
+ $self->{file_name} = $eurl;
+ }
+
+ {
+ my $fn = $self->{file_name};
+ if ( length $fn >= $columns - 30 ) {
+ my $l = int ( ( $columns - 34 ) / 2 );
+ $fn =~ s/^(.{$l}).*(.{$l})$/$1...$2/;
}
+
+ my $net = $self->{obj}->{net};
+ $self->{obj}->{netname} = "[$net] $fn: ";
}
- my $speed = "???";
- $speed = sprintf "%.2f", $self->{got} / ($time - $self->{start}) / 1024
- if $time > $self->{start};
- print "\r$self->{fn}: $size; ${speed}KB/s $eta ";
+ {
+ open my $f_out, '>', $self->{file_name};
+ $self->{file} = $f_out;
+ }
+}
+
+sub body_file
+{
+ my ($chunk, $self) = @_;
+
+ file_init( $self ) unless exists $self->{size_total};
+
+ my $len = length $chunk;
+ $self->{size_got} += $len;
+
+ my $file = $self->{file};
+ print $file $chunk;
return $len;
}
-sub body_scalar {
+sub body_scalar
+{
my ($chunk, $scalar) = @_;
$$scalar .= $chunk;
return length $chunk;
}
-open NULL, '>', '/dev/null';
-sub curl
+
+my $mcurl = new WWW::Curl::Multi;
+my %curling;
+
+sub start
{
my $url = shift;
+ my $obj = shift;
my %opts = @_;
my $curl = new WWW::Curl::Easy;
- $curl->setopt( CURLOPT_WRITEHEADER, \*NULL);
- $curl->setopt( CURLOPT_MAXREDIRS, 10);
- $curl->setopt( CURLOPT_FOLLOWLOCATION, 1);
- $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers);
- $curl->setopt( CURLOPT_URL, $url);
- $curl->setopt( CURLOPT_REFERER, $opts{referer})
+ my $id = 1;
+ ++$id while exists $curling{ $id };
+
+ my $ecurl = {
+ curl => $curl,
+ id => $id,
+ got => 0,
+ head => "",
+ body => "",
+ obj => $obj,
+ };
+
+ $curl->setopt( CURLOPT_PRIVATE, $id );
+
+ if ( $opts{cookies} ) {
+ $curl->setopt( CURLOPT_COOKIEJAR, $opts{cookies} );
+ $curl->setopt( CURLOPT_COOKIEFILE, $opts{cookies} )
+ if -r $opts{cookies};
+ }
+ $curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
+ $curl->setopt( CURLOPT_WRITEHEADER, \$ecurl->{head} );
+ $curl->setopt( CURLOPT_MAXREDIRS, 10 );
+ $curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
+ $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
+ $curl->setopt( CURLOPT_URL, $url );
+ $curl->setopt( CURLOPT_REFERER, $opts{referer} )
if $opts{referer};
- $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate');
+ $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
if ( $opts{post} ) {
my $post = $opts{post};
@@ -78,103 +181,654 @@
$curl->setopt( CURLOPT_POSTFIELDS, $post );
}
- my $body;
- if ( $opts{file} ) {
- $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file);
- $body = {
- curl => $curl,
- got => 0,
- fn => ($opts{net} || "").$opts{file}
- };
- open my $f_out, '>', $opts{file};
- $body->{file} = $f_out;
- $curl->setopt( CURLOPT_FILE, $body );
+ if ( $opts{save} ) {
+ $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
+ $curl->setopt( CURLOPT_WRITEDATA, $ecurl );
} else {
- $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar);
- $curl->setopt( CURLOPT_FILE, \$body);
+ $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
+ $curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
}
- if ( $curl->perform != 0 ) {
- my $err = $curl->errbuf;
- print "error: $err\n";
+ $curling{ $id } = $ecurl;
+ $mcurl->add_handle( $curl );
+}
+
+sub finish
+{
+ my $id = shift;
+ my $err = shift;
+
+ my $ecurl = $curling{ $id };
+ delete $curling{ $id };
+
+ my $curl = $ecurl->{curl};
+ delete $ecurl->{curl}; # remove circular dep
+
+ my $obj = $ecurl->{obj};
+ delete $ecurl->{obj};
+
+ if ( $ecurl->{file} ) {
+ close $ecurl->{file};
+ $obj->print( donemsg( $ecurl ) );
+ }
+
+ if ( $err ) {
+ my $error = $curl->errbuf;
+ $obj->print( "error: $err" );
+ $obj->stage1();
return undef;
}
- if ( $opts{file} ) {
- close $body->{file};
- delete $body->{curl}; # remove circular dep
- return $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+ if ( $obj->{curl_next} ) {
+ my $func = $obj->{curl_next};
+ my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+ my $body = $ecurl->{body};
+
+ &$func( $obj, $body, $eurl );
}
- return $body;
}
-sub rsget
+sub perform
{
- my $file = shift;
- (my $fn = $file) =~ s#^.*/##;
- my $try = 10;
+ my $running = scalar keys %curling;
+ return unless $running;
+ my $act = $mcurl->perform();
+ return if $act == $running;
+
+ while ( my ($id, $rv) = $mcurl->info_read() ) {
+ next unless $id;
-rsget_restart:
- $try -= 1;
- if ( $try <= 0 ) {
- return "problems";
+ finish( $id, $rv );
}
+}
+
+sub print_status
+{
+ my $time = time;
+
+ foreach my $ecurl ( values %curling ) {
+ next unless exists $ecurl->{size_total};
+ my $size_got = $ecurl->{size_got};
+ my $size_total = $ecurl->{size_total};
+
+ my $size = "$size_got / $size_total";
+ my $eta = "";
+ my $time_diff = $time - $ecurl->{time_start};
+ if ( $size_total > 0 ) {
+ $size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total;
+ if ( $time_diff > 0 ) {
+ my $tleft = ($size_total - $size_got) * $time_diff / $size_got;
+ $eta = main::s2string( $tleft );
+ }
+ }
+ my $speed = "???";
+ $speed = sprintf "%.2f", $size_got / ( $time_diff * 1024 )
+ if $time_diff > 0;
+
+ $ecurl->{obj}->print( "$size; ${speed}KB/s $eta" );
+ }
+}
+
+sub donemsg
+{
+ my $ecurl = shift;
+
+ my $size_total = $ecurl->{size_total};
+
+ my $time_diff = time() - $ecurl->{time_start};
+ my $eta = main::s2string( $time_diff );
+ my $speed = sprintf "%.2f", $size_total / ( $time_diff * 1024 );
+
+ return "DONE $size_total; ${speed}KB/s $eta";
+}
+
+
+# }}}
+package Wait; # {{{
+
+my %waiting;
+
+sub start
+{
+ my $obj = shift;
+ my $time = shift;
+ $obj->{wait_until} = time + $time;
+
+ my $id = 0;
+ ++$id while exists $waiting{ $id };
+ $waiting{ $id } = $obj;
+}
+
+sub finish
+{
+ my $id = shift;
+
+ my $obj = $waiting{ $id };
+ delete $waiting{ $id };
+
+ my $func = $obj->{wait_next};
+
+ &$func( $obj );
+}
+
+sub perform
+{
+ my $time = time;
+
+ foreach my $id ( keys %waiting ) {
+ my $obj = $waiting{ $id };
+ my $left = $obj->{wait_until} - $time;
+ if ( $left <= 0 ) {
+ finish( $id );
+ } else {
+ $obj->print( $obj->{wait_msg} . main::s2string( $left ) );
+ }
+ }
+}
+
+# }}}
+package Get; # {{{
+
+my $slots = 1;
+my %running;
+sub makenew
+{
+ my $net = shift;
+ my $class = shift;
+ my $url = shift;
+
+ return {} if $gotlist{ $url };
+ $running{ $net } = {} unless exists $running{ $net };
+ my $rn = $running{ $net };
+ return {} if $slots <= scalar keys %$rn;
+
+ my $id = 1;
+ ++$id while exists $rn->{ $id };
+
+ my $line = new Line;
+
+ ( my $fn = $url ) =~ s#/+$##; ## vim
+ $fn =~ s#^.*/##;
+
+ my $self = {
+ @_,
+ url => $url,
+ id => $id,
+ try => 10,
+ line => $line,
+ net => $net,
+ netname => "[$net] $fn: ",
+ };
+
+ $rn->{ $id } = bless $self, $class;
+
+ $self->stage1();
+ return $self;
+}
+
+sub print
+{
+ my $self = shift;
+ my $text = shift;
+ my $line = $self->{line};
+ $line->print( $self->{netname} . $text );
+}
+
+sub curl
+{
+ my $self = shift;
+ my $url = shift;
+ my $next_stage = shift;
+
+ $self->{curl_next} = $next_stage;
+ Curl::start( $url, $self, @_ );
+}
+
+sub wait
+{
+ my $self = shift;
+ my $time = shift;
+ my $next_stage = shift;
+ my $msg = shift || "waiting";
+
+ $self->{wait_next} = $next_stage;
+ $self->{wait_msg} = $msg . " ";
+ Wait::start( $self, $time );
+}
+
+sub finish
+{
+ my $self = shift;
+
+ my $url = $self->{url};
+ $gotlist{ $url } = 1;
+
+ my $net = $self->{net};
+ my $id = $self->{id};
+ delete $running{ $net }->{ $id };
+
+ $checklist = 1;
+}
+
+sub download
+{
+ my $self = shift;
+
+ $self->{curl_next} = \&finish;
+ $self->print("starting download");
+ Curl::start( $self->{file_url}, $self, save => 1, @_ );
+}
+
+sub error
+{
+ my $self = shift;
+ my $msg = shift;
+
+ $self->print( $msg );
+}
+
+# }}}
+package Get::RapidShare; # {{{
+
+BEGIN {
+ our @ISA;
+ @ISA = qw(Get);
+}
+
+sub new
+{
+ my $proto = shift;
+ my $class = ref $proto || $proto;
+ my $url = shift;
<<diff output has been trimmed to 500 lines, 509 line(s) remained.>>
More information about the pld-cvs-commit
mailing list