SVN: toys/fun/rsget.pl

sparky sparky at pld-linux.org
Mon Jan 5 17:30:07 CET 2009


Author: sparky
Date: Mon Jan  5 17:30:06 2009
New Revision: 10049

Modified:
   toys/fun/rsget.pl
Log:
- parrallel download
- added netload support (needs gocr and ocrad for captchas)


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl	(original)
+++ toys/fun/rsget.pl	Mon Jan  5 17:30:06 2009
@@ -5,9 +5,67 @@
 #
 use strict;
 use warnings;
-use WWW::Curl::Easy;
+use Time::HiRes;
+
+my $checklist = 1;
+my %gotlist;
+
+package Line; # {{{
 
 $| = 1;
+my $actual_line = 0;
+my $max_line = 0;
+my $columns = $ENV{COLUMNS} || 124; # XXX
+
+sub new
+{
+	my $proto = shift;
+    my $class = ref( $proto ) || $proto;
+
+	my $steps = $max_line - $actual_line;
+	$actual_line = $max_line;
+	my $move = "";
+
+	if ( $steps < 0 ) {
+		return undef;
+	} elsif ( $steps > 0 ) {
+		$move = "\033[" . $steps . "B";
+	}
+
+	print $move . "\n\r\033[K";
+
+	my $line = $max_line++;
+	my $self = \$line;
+	return bless $self, $class;
+}
+
+sub print
+{
+	my $self = shift;
+	my $text = shift;
+	my $line = $$self;
+
+	return undef if $line >= $max_line;
+	my $steps = $line - $actual_line;
+	$actual_line = $line;
+	my $move = "";
+
+	if ( $steps < 0 ) {
+		$move = "\033[" . (-$steps) . "A";
+	} elsif ( $steps > 0 ) {
+		$move = "\033[" . $steps . "B";
+	}
+	if ( length $text > $columns ) {
+		$text = (substr $text, 0, $columns - 4) . '...';
+	}
+	
+	print $move . "\r\033[K" . $text;
+}
+
+# }}}
+package Curl; # {{{
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
 
 my $curl_headers = [
 	'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
@@ -16,61 +74,106 @@
 	'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
 	];
 
-sub body_file {
-	my ($chunk, $self) = @_;
+sub file_init
+{
+	my $self = shift;
+	my $curl = $self->{curl};
 
-	unless ( exists $self->{total} ) {
-		my $info = $self->{curl}->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
-		$self->{total} = $info || '?';
-		$self->{start} = time;
+	{
+		my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+		$self->{size_total} = $f_len || 0;
+		$self->{size_got} = 0;
 	}
-	my $len = length $chunk;
-	$self->{got} += $len;
 
-	my $file = $self->{file};
-	print $file $chunk;
+	$self->{time_start} = time;
 
-	my $size = "$self->{got} / $self->{total}";
-	my $eta = "";
-	my $time = time;
-	if ( int $self->{total} > 0 ) {
-		$size .= sprintf " [%.2f%%]", $self->{got} * 100 / $self->{total};
-		if ( $time > $self->{start} ) {
-			my $tleft = ($self->{total} - $self->{got}) / $self->{got} * ($time - $self->{start});
-			$eta = sprintf "%2d:%.2d", $tleft / 60, $tleft % 60;
+	if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
+		$self->{file_name} = $1;
+	} else {
+		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+		$eurl =~ s#^.*/##;
+		$self->{file_name} = $eurl;
+	}
+
+	{
+		my $fn = $self->{file_name};
+		if ( length $fn >= $columns - 30 ) {
+			my $l = int ( ( $columns - 34 ) / 2 );
+			$fn =~ s/^(.{$l}).*(.{$l})$/$1...$2/;
 		}
+
+		my $net = $self->{obj}->{net};
+		$self->{obj}->{netname} = "[$net] $fn: ";
 	}
-	my $speed = "???";
-	$speed = sprintf "%.2f", $self->{got} / ($time - $self->{start}) / 1024
-		if $time > $self->{start};
 
-	print "\r$self->{fn}: $size; ${speed}KB/s $eta  ";
+	{
+		open my $f_out, '>', $self->{file_name};
+		$self->{file} = $f_out;
+	}
+}
+
+sub body_file
+{
+	my ($chunk, $self) = @_;
+
+	file_init( $self ) unless exists $self->{size_total};
+
+	my $len = length $chunk;
+	$self->{size_got} += $len;
+
+	my $file = $self->{file};
+	print $file $chunk;
 
 	return $len;
 }
 
-sub body_scalar {
+sub body_scalar
+{
 	my ($chunk, $scalar) = @_;
 	$$scalar .= $chunk;
 	return length $chunk;
 }
 
-open NULL, '>', '/dev/null';
-sub curl
+
+my $mcurl = new WWW::Curl::Multi;
+my %curling;
+
+sub start
 {
 	my $url = shift;
+	my $obj = shift;
 	my %opts = @_;
 
 	my $curl = new WWW::Curl::Easy;
 
-	$curl->setopt( CURLOPT_WRITEHEADER, \*NULL);
-	$curl->setopt( CURLOPT_MAXREDIRS, 10);
-	$curl->setopt( CURLOPT_FOLLOWLOCATION, 1);
-	$curl->setopt( CURLOPT_HTTPHEADER, $curl_headers);
-	$curl->setopt( CURLOPT_URL, $url);
-	$curl->setopt( CURLOPT_REFERER, $opts{referer})
+	my $id = 1;
+	++$id while exists $curling{ $id };
+
+	my $ecurl = {
+		curl => $curl,
+		id => $id,
+		got => 0,
+		head => "",
+		body => "",
+		obj => $obj,
+	};
+
+    $curl->setopt( CURLOPT_PRIVATE, $id );
+
+	if ( $opts{cookies} ) {
+		$curl->setopt( CURLOPT_COOKIEJAR, $opts{cookies} );
+		$curl->setopt( CURLOPT_COOKIEFILE, $opts{cookies} )
+			if -r $opts{cookies};
+	}
+	$curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
+	$curl->setopt( CURLOPT_WRITEHEADER, \$ecurl->{head} );
+	$curl->setopt( CURLOPT_MAXREDIRS, 10 );
+	$curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
+	$curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
+	$curl->setopt( CURLOPT_URL, $url );
+	$curl->setopt( CURLOPT_REFERER, $opts{referer} )
 		if $opts{referer};
-	$curl->setopt( CURLOPT_ENCODING, 'gzip,deflate');
+	$curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
 
 	if ( $opts{post} ) {
 		my $post = $opts{post};
@@ -78,103 +181,654 @@
 		$curl->setopt( CURLOPT_POSTFIELDS, $post );
 	}
 
-	my $body;
-	if ( $opts{file} ) {
-		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file);
-		$body = {
-			curl => $curl,
-			got => 0,
-			fn => ($opts{net} || "").$opts{file}
-		};
-		open my $f_out, '>', $opts{file};
-		$body->{file} = $f_out;
-		$curl->setopt( CURLOPT_FILE, $body );
+	if ( $opts{save} ) {
+		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
+		$curl->setopt( CURLOPT_WRITEDATA, $ecurl );
 	} else {
-		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar);
-		$curl->setopt( CURLOPT_FILE, \$body);
+		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
+		$curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
 	}
 
-	if ( $curl->perform != 0 ) {
-		my $err = $curl->errbuf;
-		print "error: $err\n";
+	$curling{ $id } = $ecurl;
+    $mcurl->add_handle( $curl );
+}
+
+sub finish
+{
+	my $id = shift;
+	my $err = shift;
+
+	my $ecurl = $curling{ $id };
+	delete $curling{ $id };
+
+	my $curl = $ecurl->{curl};
+	delete $ecurl->{curl}; # remove circular dep
+
+	my $obj = $ecurl->{obj};
+	delete $ecurl->{obj};
+
+	if ( $ecurl->{file} ) {
+		close $ecurl->{file};
+		$obj->print( donemsg( $ecurl ) );
+	}
+
+	if ( $err ) {
+		my $error = $curl->errbuf;
+		$obj->print( "error: $err" );
+		$obj->stage1();
 		return undef;
 	}
 
-	if ( $opts{file} ) {
-		close $body->{file};
-		delete $body->{curl}; # remove circular dep
-		return $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
+	if ( $obj->{curl_next} ) {
+		my $func = $obj->{curl_next};
+		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+		my $body = $ecurl->{body};
+		
+		&$func( $obj, $body, $eurl );
 	}
-	return $body;
 }
 
-sub rsget
+sub perform
 {
-	my $file = shift;
-	(my $fn = $file) =~ s#^.*/##;
-	my $try = 10;
+	my $running = scalar keys %curling;
+	return unless $running;
+	my $act = $mcurl->perform();
+	return if $act == $running;
+
+	while ( my ($id, $rv) = $mcurl->info_read() ) {
+		next unless $id;
 
-rsget_restart:
-	$try -= 1;
-	if ( $try <= 0 ) {
-		return "problems";
+		finish( $id, $rv );
 	}
+}
+
+sub print_status
+{
+	my $time = time;
+
+	foreach my $ecurl ( values %curling ) {
+		next unless exists $ecurl->{size_total};
+		my $size_got = $ecurl->{size_got};
+		my $size_total = $ecurl->{size_total};
+
+		my $size = "$size_got / $size_total";
+		my $eta = "";
+		my $time_diff = $time - $ecurl->{time_start};
+		if ( $size_total > 0 ) {
+			$size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total;
+			if ( $time_diff > 0 ) {
+				my $tleft = ($size_total - $size_got) * $time_diff / $size_got;
+				$eta = main::s2string( $tleft );
+			}
+		}
+		my $speed = "???";
+		$speed = sprintf "%.2f", $size_got / ( $time_diff * 1024 )
+			if $time_diff > 0;
+
+		$ecurl->{obj}->print( "$size; ${speed}KB/s $eta" );
+	}
+}
+
+sub donemsg
+{
+	my $ecurl = shift;
+
+	my $size_total = $ecurl->{size_total};
+
+	my $time_diff = time() - $ecurl->{time_start};
+	my $eta = main::s2string( $time_diff );
+	my $speed = sprintf "%.2f", $size_total / ( $time_diff * 1024 );
+
+	return "DONE $size_total; ${speed}KB/s $eta";
+}
+
+
+# }}}
+package Wait; # {{{
+
+my %waiting;
+
+sub start
+{
+	my $obj = shift;
+	my $time = shift;
+	$obj->{wait_until} = time + $time;
+
+	my $id = 0;
+	++$id while exists $waiting{ $id };
+	$waiting{ $id } = $obj;
+}
+
+sub finish
+{
+	my $id = shift;
+
+	my $obj = $waiting{ $id };
+	delete $waiting{ $id };
+
+	my $func = $obj->{wait_next};
+		
+	&$func( $obj );
+}
+
+sub perform
+{
+	my $time = time;
+
+	foreach my $id ( keys %waiting ) {
+		my $obj = $waiting{ $id };
+		my $left = $obj->{wait_until} - $time;
+		if ( $left <= 0 ) {
+			finish( $id );
+		} else {
+			$obj->print( $obj->{wait_msg} . main::s2string( $left ) );
+		}
+	}
+}
+
+# }}}
+package Get; # {{{
+
+my $slots = 1;
+my %running;
+sub makenew
+{
+	my $net = shift;
+	my $class = shift;
+	my $url = shift;
+
+	return {} if $gotlist{ $url };
+	$running{ $net } = {} unless exists $running{ $net };
+	my $rn = $running{ $net };
+	return {} if $slots <= scalar keys %$rn;
+
+	my $id = 1;
+	++$id while exists $rn->{ $id };
+
+	my $line = new Line;
+
+	( my $fn = $url ) =~ s#/+$##; ## vim
+	$fn =~ s#^.*/##;
+
+	my $self = {
+		@_,
+		url => $url,
+		id => $id,
+		try => 10,
+		line => $line,
+		net => $net,
+		netname => "[$net] $fn: ",
+	};
+
+	$rn->{ $id } = bless $self, $class;
+
+	$self->stage1();
+	return $self;
+}
+
+sub print
+{
+	my $self = shift;
+	my $text = shift;
+	my $line = $self->{line};
+	$line->print( $self->{netname} . $text );
+}
+
+sub curl
+{
+	my $self = shift;
+	my $url = shift;
+	my $next_stage = shift;
+
+	$self->{curl_next} = $next_stage;
+	Curl::start( $url, $self, @_ );
+}
+
+sub wait
+{
+	my $self = shift;
+	my $time = shift;
+	my $next_stage = shift;
+	my $msg = shift || "waiting";
+
+	$self->{wait_next} = $next_stage;
+	$self->{wait_msg} = $msg . " ";
+	Wait::start( $self, $time );
+}
+
+sub finish
+{
+	my $self = shift;
+
+	my $url = $self->{url};
+	$gotlist{ $url } = 1;
+
+	my $net = $self->{net};
+	my $id = $self->{id};
+	delete $running{ $net }->{ $id };
+
+	$checklist = 1;
+}
+
+sub download
+{
+	my $self = shift;
+
+	$self->{curl_next} = \&finish;
+	$self->print("starting download");
+	Curl::start( $self->{file_url}, $self, save => 1, @_ );
+}
+
+sub error
+{
+	my $self = shift;
+	my $msg = shift;
+
+	$self->print( $msg );
+}
+
+# }}}
+package Get::RapidShare; # {{{
+
+BEGIN {
+	our @ISA;
+	@ISA = qw(Get);
+}
+
+sub new
+{
+	my $proto = shift;
+	my $class = ref $proto || $proto;
+	my $url = shift;
<<diff output has been trimmed to 500 lines, 509 line(s) remained.>>


More information about the pld-cvs-commit mailing list