SVN: toys/rsget.pl: RSGet RSGet/Curl.pm RSGet/Dispatch.pm RSGet/FileList.pm RSGet/Get.pm RSGet/Line....

sparky sparky at pld-linux.org
Thu Aug 27 15:23:20 CEST 2009


Author: sparky
Date: Thu Aug 27 15:23:19 2009
New Revision: 10494

Added:
   toys/rsget.pl/RSGet/
   toys/rsget.pl/RSGet/Curl.pm
   toys/rsget.pl/RSGet/Dispatch.pm
   toys/rsget.pl/RSGet/FileList.pm
   toys/rsget.pl/RSGet/Get.pm
   toys/rsget.pl/RSGet/Line.pm
   toys/rsget.pl/RSGet/MicroHTTP.pm
   toys/rsget.pl/RSGet/Processor.pm
   toys/rsget.pl/RSGet/Tools.pm
Modified:
   toys/rsget.pl/rsget.pl
Log:
- mostly rewritten, with quite a few cool features
  and even more interesting bugs


Added: toys/rsget.pl/RSGet/Curl.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Curl.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,380 @@
+package RSGet::Curl;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+use RSGet::Line;
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
+use URI::Escape;
+use File::Copy;
+use Fcntl qw(SEEK_SET);
+
+my $curl_multi = new WWW::Curl::Multi;
+
+my $curl_headers = [
+	'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
+	'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+	'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+	'Accept-Language: en-us,en;q=0.5',
+	];
+
+my %active_curl;
+
+sub new
+{
+	my $uri = shift;
+	my $get_obj = shift;
+	my %opts = @_;
+
+	my $curl = new WWW::Curl::Easy;
+
+	my $id = 1;
+	++$id while exists $active_curl{ $id };
+
+	my $supercurl = {
+		curl => $curl,
+		id => $id,
+		get_obj => $get_obj,
+		got => 0,
+		head => "",
+		body => "",
+	};
+
+	$curl->setopt( CURLOPT_PRIVATE, $id );
+	$curl->setopt( CURLOPT_INTERFACE, $get_obj->{_outif} )
+		if $get_obj->{_outif};
+
+	if ( defined $get_obj->{_cookie} ) {
+		$curl->setopt( CURLOPT_COOKIEJAR, $get_obj->{_cookie} );
+		$curl->setopt( CURLOPT_COOKIEFILE, $get_obj->{_cookie} );
+	}
+	$curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
+	$curl->setopt( CURLOPT_WRITEHEADER, \$supercurl->{head} );
+	$curl->setopt( CURLOPT_MAXREDIRS, 10 );
+	$curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
+	$curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
+	$curl->setopt( CURLOPT_URL, $uri );
+	$curl->setopt( CURLOPT_REFERER, $get_obj->{_referer} )
+		if defined $get_obj->{_referer};
+	$curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
+	$curl->setopt( CURLOPT_CONNECTTIMEOUT, 20 );
+
+	if ( $opts{post} ) {
+		my $post = $opts{post};
+		$curl->setopt( CURLOPT_POST, 1 );
+		if ( ref $post and ref $post eq "HASH" ) {
+			$post = join "&",
+				map { uri_escape( $_ ) . "=" . uri_escape( $post->{$_} ) }
+				sort keys %$post;
+		}
+		$curl->setopt( CURLOPT_POSTFIELDS, $post );
+	}
+
+	if ( $opts{save} ) {
+		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
+		$curl->setopt( CURLOPT_WRITEDATA, $supercurl );
+
+		# if file exists try to continue
+		my $fn = $get_obj->{_opts}->{fname};
+		if ( $fn and -r $fn ) {
+			my $got = (stat(_))[7];
+			#p "File '$fn' already exists, trying to continue at $got";
+			$curl->setopt( CURLOPT_RANGE, "$got-" );
+
+			$supercurl->{fname} = $fn;
+		}
+		delete $get_obj->{is_html};
+	} else {
+		$get_obj->{is_html} = 1;
+		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
+		$curl->setopt( CURLOPT_WRITEDATA, \$supercurl->{body} );
+	}
+
+	$active_curl{ $id } = $supercurl;
+	$curl_multi->add_handle( $curl );
+}
+
+sub file_backup
+{
+	my $fn = shift;
+	return undef if $settings{no_backup};
+	return undef unless -r $fn;
+
+	if ( my $s = $settings{backup_suf} ) {
+		my $i = 1;
+		++$i while -r $fn . $s . $i;
+		return $fn . $s . $i;
+	}
+
+	my $ext = "";
+	$ext = $1 if $fn =~ s/(\..{3,5})$//;
+	my $i = 1;
+	++$i while -r "$fn-$i$ext";
+
+	return "$fn-$i$ext";
+}
+
+sub file_init
+{
+	my $supercurl = shift;
+	my $curl = $supercurl->{curl};
+	my $time = time;
+
+	hadd $supercurl,
+		time_start => $time,
+		time_stamp => [ $time, 0, $time, 0, $time, 0 ],
+		size_start => 0,
+		size_got => 0,
+		size_total => 0;
+
+	{
+		my $mime = $curl->getinfo( CURLINFO_CONTENT_TYPE );
+		if ( $mime =~ m#^text/html# ) {
+			$supercurl->{get_obj}->{is_html} = 1;
+			return;
+		}
+	}
+
+	if ( my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD ) ) {
+		$supercurl->{size_total} = $f_len;
+	}
+
+	if ( my $fn = $supercurl->{fname} ) {
+		if ( $supercurl->{head} =~ m{^Content-Range:\s*bytes\s*(\d+)-(\d+)(/(\d+))?\s*$}im ) {
+			my ( $start, $stop ) = ( +$1, +$2 );
+			$supercurl->{size_total} = +$4 if $3;
+
+			my $old = file_backup( $fn );
+			my $old_msg = "";
+			if ( $old ) {
+				rename $fn, $old;
+				copy( $old, $fn ) || die "Cannot create backup file: $!";
+				$old_msg = ", backup saved as '$old'";
+			}
+
+			open my $f_out, '+<', $fn;
+			seek $f_out, $start, SEEK_SET;
+			p "Continuing '$fn' at " . bignum( $start ) . $old_msg;
+
+
+			hadd $supercurl,
+				file => $f_out,
+				size_start => $start,
+				size_got => $start,
+				time_stamp => [ $time, $start, $time, $start, $time, $start ];
+			return;
+		}
+	} elsif ( $supercurl->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
+		$supercurl->{fname} = de_ml( uri_unescape( $1 ) );
+	} else {
+		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+		$eurl =~ s#^.*/##;
+		$supercurl->{fname} = de_ml( uri_unescape( $eurl ) );
+	}
+
+	$supercurl->{get_obj}->set_fname( $supercurl->{fname} );
+
+	{
+		my $fn = $supercurl->{fname};
+		my $old = file_backup( $fn );
+		if ( $old ) {
+			p "'$fn' renamed to '$old'";
+			rename $fn, $old;
+		}
+		open my $f_out, '>', $fn;
+		$supercurl->{file} = $f_out;
+	}
+}
+
+sub body_file
+{
+	my ($chunk, $supercurl) = @_;
+
+	file_init( $supercurl ) unless exists $supercurl->{size_total};
+
+	my $len = length $chunk;
+	$supercurl->{size_got} += $len;
+
+	if ( my $file = $supercurl->{file} ) {
+		my $p = print $file $chunk;
+		die "\nCannot write data: $!\n" unless $p;
+	} else {
+		$supercurl->{body} .= $chunk;
+	}
+
+	return $len;
+}
+
+sub body_scalar
+{
+	my ($chunk, $scalar) = @_;
+	$$scalar .= $chunk;
+	return length $chunk;
+}
+
+
+
+
+sub finish
+{
+	my $id = shift;
+	my $err = shift;
+
+	my $supercurl = $active_curl{ $id };
+	delete $active_curl{ $id };
+
+	my $curl = $supercurl->{curl};
+	delete $supercurl->{curl}; # remove circular dep
+
+	my $get_obj = $supercurl->{get_obj};
+	delete $supercurl->{get_obj};
+
+	if ( $supercurl->{file} ) {
+		close $supercurl->{file};
+		$get_obj->print( donemsg( $supercurl ) );
+	}
+
+	if ( $err ) {
+		my $error = $curl->errbuf;
+		warn "error($err): $error\n";
+		$get_obj->print( "error($err): $error" );
+		if ( $error =~ /Couldn't bind to '(.*)'/ ) {
+			my $if = $1;
+			RSGet::Dispatch::remove_interface( $if, "Interface $if is dead" );
+			$get_obj->{_abort} = "Interface $if is dead";
+		} elsif ( $error =~ /transfer closed with (\d+) bytes remaining to read/ ) {
+			$get_obj->{_abort} = "PARTIAL, " . bignum( $1 ) . " bytes left";
+		}
+		$get_obj->problem();
+		return undef;
+	}
+
+	return unless $get_obj->{after_curl};
+
+	my $func = $get_obj->{after_curl};
+	if ( $supercurl->{file} ) {
+		$get_obj->{dlinfo} = sprintf 'DONE %s %s / %s',
+			$supercurl->{fname},
+			bignum( $supercurl->{size_got} ),
+			bignum( $supercurl->{size_total} );
+	} else {
+		$get_obj->{body} = $supercurl->{body};
+	}
+
+	$get_obj->get_finish( $curl->getinfo( CURLINFO_EFFECTIVE_URL ) );
+}
+
+sub need_run
+{
+	return scalar keys %active_curl;
+}
+
+sub maybe_abort
+{
+	my $time = time;
+	my $stall_time = $time - 120;
+	foreach my $id ( keys %active_curl ) {
+		my $supercurl = $active_curl{ $id };
+		my $get_obj = $supercurl->{get_obj};
+		if ( $get_obj->{_abort} ) {
+			my $curl = $supercurl->{curl};
+			$curl_multi->remove_handle( $curl );
+			finish( $id, "aborted" );
+		}
+		if ( ( $supercurl->{stalled_since} || $time ) < $stall_time ) {
+			my $curl = $supercurl->{curl};
+			$curl_multi->remove_handle( $curl );
+			finish( $id, "timeout" );
+		}
+	}
+}
+
+sub perform
+{
+	my $running = scalar keys %active_curl;
+	return unless $running;
+	my $act = $curl_multi->perform();
+	return if $act == $running;
+
+	while ( my ($id, $rv) = $curl_multi->info_read() ) {
+		next unless $id;
+
+		finish( $id, $rv );
+	}
+}
+
+my $avg_speed = 0;
+sub update_status
+{
+	my $time = time;
+	my $total_speed = 0;
+
+	foreach my $supercurl ( values %active_curl ) {
+		next unless exists $supercurl->{size_total};
+		my ($size_got, $size_total, $time_stamp ) =
+			@$supercurl{ qw(size_got size_total time_stamp) };
+
+		my $size = bignum( $size_got ) . " / " . bignum( $size_total );
+		$size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total
+			if $size_total > 0;
+
+		if ( $time_stamp->[4] + 30 <= $time ) {
+			@$time_stamp[0..3] = @$time_stamp[2..5];
+			$time_stamp->[4] = $time;
+			$time_stamp->[5] = $size_got;
+		}
+
+		my $time_diff = $time - $time_stamp->[0];
+		my $size_diff = $size_got - $time_stamp->[1];
+
+		if ( $time_diff > 0 and $size_diff == 0 ) {
+			$supercurl->{stalled_since} ||= $time;
+			my $stime = s2string( $time - $supercurl->{stalled_since} );
+			$supercurl->{get_obj}->print( "$size; STALLED $stime" );
+			next;
+		}
+
+		my $speed = "???";
+		if ( $time_diff > 0 ) {
+			my $s = $size_diff / ( $time_diff * 1024 );
+			$speed = sprintf "%.2f", $s;
+			$total_speed += $s;
+		}
+
+		my $eta = "";
+		if ( $size_total > 0 and $time_diff > 0 and $size_diff > 0 ) {
+			my $tleft = ($size_total - $size_got) * $time_diff / $size_diff;
+			$eta = " " . s2string( $tleft );
+			delete $supercurl->{stalled_since}
+		}
+
+		$supercurl->{get_obj}->print( "$size; ${speed}KB/s$eta" );
+	}
+	$avg_speed = ($avg_speed * 9 + $total_speed) / 10;
+
+	my $running = scalar keys %active_curl;
+	RSGet::Line::status(
+		'running cURL' => $running,
+		'total speed' => ( sprintf '%.2fKB/s', $avg_speed )
+	);
+	return;
+}
+
+sub donemsg
+{
+	my $supercurl = shift;
+
+	my $size_diff = $supercurl->{size_got} - $supercurl->{size_start};
+	my $time_diff = time() - $supercurl->{time_start};
+	$time_diff = 1 unless $time_diff;
+	my $eta = s2string( $time_diff );
+	my $speed = sprintf "%.2f", $size_diff / ( $time_diff * 1024 );
+
+	return "DONE " . bignum( $supercurl->{size_got} ) . "; ${speed}KB/s $eta";
+}
+
+
+1;
+
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/Dispatch.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Dispatch.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,248 @@
+package RSGet::Dispatch;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+
+our %downloading;
+our %downloaded;
+our %checking;
+our %checked; # HASH for valid, SCALAR if error
+our %resolving;
+our %resolved;
+
+my %working = (
+	get => \%downloading,
+	check => \%checking,
+	link => \%resolving,
+);
+my %finished = (
+	get => \%downloaded,
+	check => \%checked,
+	link => \%resolved,
+);
+
+my @interfaces;
+sub add_interface
+{
+	my $newifs = shift;
+	NEW_IP: foreach my $new_if ( split /[ ,]+/, $newifs ) {
+		foreach my $old_if ( @interfaces ) {
+			if ( $new_if eq $old_if ) {
+				print "Address $new_if already on the list\n";
+				next NEW_IP;
+			}
+		}
+		print "Adding $new_if interface/address\n";
+		push @interfaces, $new_if;
+	}
+}
+
+sub remove_interface
+{
+	my $if = shift;
+	my $reason = shift;
+	for ( my $i = 0; $i < @interfaces; $i++ ) {
+		next unless $interfaces[ $i ] eq $if;
+		my $removed = splice @interfaces, $i, 1;
+		warn "Removed interface '$removed': $reason\n";
+	}
+
+	die "No working interfaces left\n" unless @interfaces;
+}
+
+my %last_used;
+
+sub find_free_if
+{
+	my $pkg = shift;
+	my $working = shift;
+	my $slots = shift;
+
+	unless ( scalar @interfaces ) {
+		my $running = 0;
+		foreach ( values %$working ) {
+			$running++ if $_->{_pkg} eq $pkg
+		}
+		#p "running: $running / $slots";
+		return undef if $running >= $slots;
+		return "";
+	}
+
+	my %by_if = map { $_ => 0 } @interfaces;
+	foreach ( values %$working ) {
+		next unless $_->{_pkg} eq $pkg;
+		$by_if{ $_->{_outif} }++;
+	}
+
+	my $min = $slots;
+	grep { $min = $_ if $_ < $min } values %by_if;
+	return undef if $min >= $slots;
+
+	my $lu = $last_used{$pkg} ||= {};
+	my @min_if = sort {
+		my $_a = $lu->{ $a } || 0;
+		my $_b = $lu->{ $b } || 0;
+		$_a <=> $_b
+	} grep { $by_if{ $_ } <= $min } keys %by_if;
+	return $min_if[ 0 ];
+}
+
+sub mark_used
+{
+	my $obj = shift;
+	my $if = $obj->{_outif};
+	return unless $if;
+	my $pkg = $obj->{_pkg};
+	my $lu = $last_used{$pkg} ||= {};
+	$lu->{$if} = time;
+}
+
+sub is_error
+{
+	my $uri = shift;
+	my $c = $checked{ $uri };
+	return undef unless defined $c;
+	if ( $c and not ref $c ) {
+		return $c;
+	}
+	return 0;
+}
+sub is_ok
+{
+	my $uri = shift;
+	my $c = $checked{ $uri };
+	return undef unless defined $c;
+	if ( $c and ref $c and ref $c eq "HASH" ) {
+		return $c;
+	}
+	return 0;
+}
+
+sub finished
+{
+	my $obj = shift;
+	my $status = shift;
+
+	my ( $uri, $cmd ) = @$obj{ qw(_uri _cmd) };
+	my $working = $working{ $cmd };
+	delete $working->{ $uri };
+
+	if ( $status ) {
+		my $finished = $finished{ $cmd };
+		$finished->{ $uri } = $status;
+	}
+
+	$RSGet::FileList::reread = 1;
+}
+
+sub run
+{
+	my ( $cmd, $uri, $getter, $options ) = @_;
+	my $class = $getter->{class};
+	$cmd = "link" if $class eq "Link";
+	#p "run( $cmd, $uri, ... )";
+
+	my $finished = $finished{ $cmd };
+	my $f = $finished->{ $uri };
+	return $f if defined $f;
+	#p "-> not finished";
+
+	my $working = $working{ $cmd };
+	my $w = $working->{ $uri };
+	return $w if defined $w;
+	#p "-> not working";
+
+	my $pkg = $getter->{pkg};
+	my $outif = find_free_if( $pkg, $working, ($cmd eq "get" ? ($getter->{slots} || 1) : 5) );
+	return unless defined $outif;
+	#p "-> got if";
+
+	my $obj = RSGet::Get::new( $pkg, $cmd, $uri, $options, $outif );
+	$working->{ $uri } = $obj if $obj;
+	#p "run( $cmd, $uri, ... ) -> $obj" if $obj;
+	
+	$RSGet::FileList::reread = 1;
+
+	return $obj;
+}
+
+sub process
+{
+	my $getlist = shift;
+
+	my %num_by_pkg;
+	my %all_uris;
+	foreach my $line ( @$getlist ) {
+		my ( $uris, $options ) = @$line;
+		foreach my $uri ( keys %$uris ) {
+			my $getter = $uris->{ $uri };
+			$all_uris{ $uri } = 1;
+			my $pkg = $getter->{pkg};
+			$num_by_pkg{ $pkg } ||= 0;
+			$num_by_pkg{ $pkg }++;
+		}
+	}
+	abort_missing( \%all_uris, $_ ) foreach values %working;
+	RSGet::Line::status(
+		'to download' => scalar @$getlist,
+		'downloading' => scalar keys %downloading,
+		'resolving links' => scalar keys %resolving,
+		'checking URIs' => scalar keys %checking,
+	);
+
+	my $all_valid = 1;
+	foreach my $line ( @$getlist ) {
+		my ( $uris, $options ) = @$line;
+		foreach my $uri ( keys %$uris ) {
+			my $getter = $uris->{ $uri };
+			my $ok = is_ok( $uri );
+			#p "$uri - $ok";
+			if ( not defined $ok ) {
+				run( "check", $uri, $getter, $options );
+				$all_valid = 0;
+			} elsif ( not $ok ) {
+				$all_valid = 0;
+			}
+		}
+
+		next unless $all_valid;
+
+		foreach my $uri ( sort {
+					my $a_pkg = $uris->{ $a }->{pkg};
+					my $b_pkg = $uris->{ $b }->{pkg};
+					$num_by_pkg{ $a_pkg } <=> $num_by_pkg{ $b_pkg }
+				} keys %$uris ) {
+			my $getter = $uris->{ $uri };
+			last if run( "get", $uri, $getter, $options );
+		}
+	}
+}
+
+sub abort_missing
+{
+	my $all = shift;
+	my $running = shift;
+	foreach ( keys %$running ) {
+		next if exists $all->{$_};
+		my $obj = $running->{$_};
+		$obj->{_abort} = "Removed from the list!";
+	}
+}
+
+sub done
+{
+	my $uri = shift;
+	my $getter = shift;
+
+	my $class = $getter->{class};
+	my $cmd = $class eq "Link" ? "link" : "get";
+
+	my $f = $finished{ $cmd }->{ $uri };
+	return $f if defined $f;
+	return undef;
+}
+
+1;
+
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/FileList.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/FileList.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,212 @@
+package RSGet::FileList;
+
+use strict;
+use warnings;
+use URI::Escape;
+use RSGet::Tools;
+our $file = 'get.list';
+our $reread = 1;
+our %uri_options; # options to be saved
+
+sub need_update
+{
+	$reread = 1;
+}
+
+sub words
+{
+	my $pre = shift;
+	my $before = shift;
+	return () unless @_;
+	my $line = "";
+	my $lline = $pre . $before . shift;
+	foreach ( @_ ) {
+		if ( length $lline . $_ > 76 ) {
+			$line .= "$lline \\\n";
+			$lline = $pre;
+		}
+		$lline .= " " . $_;
+	}
+
+	$lline = $line.$lline if $line;
+	return $lline."\n";
+}
+
+
+sub h2a($)
+{
+	my $h = shift;
+	return map { $_ . "=" . uri_escape( $h->{$_} ) } sort keys %$h;
+}
+
+sub getter
+{
+	my $uri = shift;
+	my @g = grep { $uri =~ m/^http:\/\/(:?www\.)?$_->{uri}/ } values %getters;
+	return undef unless @g;
+	return $g[0];
+}
+
+my @added_text;
+sub add_text
+{
+	my $type = shift;
+	my $text = shift;
+
+	return unless $text;
+	if ( $type eq "links" ) {
+		my @words = split /\s/s, $text;
+
+		foreach ( @words ) {
+			next unless m{^(http://)?(.*?)$};
+			my $proto = $1 || "http://";
+			my $uri = $proto . $2;
+			push @added_text, $uri . "\n" if getter($uri);
+		}
+
+		$reread = 2;
+	} elsif ( $type eq "comment" ) {
+		foreach ( split /\n/, $text ) {
+			s/\s+$//;
+			push @added_text, "# $_\n";
+		}
+	}
+	return \@added_text;
+}
+
+my $listmtime = 0;
+sub readlist
+{
+	return unless -r $file;
+	my $mtime = (stat _)[9];
+	return unless $reread or $mtime != $listmtime;
+	#p "readlist()";
+
+	my @getlist;
+	my @newlist;
+	open my $list, '<', $file;
+	while ( my $line = <$list> ) {
+		chomp $line;
+		if ( $line =~ /^\s*(#.*)?$/ ) { # comments and empty lines
+			push @newlist, $line . "\n";
+			next;
+		} elsif ( $line =~ /^__END__\s*$/ ) { # end of list
+			push @newlist, $line . "\n";
+			push @newlist, <$list>;
+			last;
+		}
+		while ( $line =~ s/\\$/ / ) { # stitch broken lines together
+			$line .= <$list>;
+			chomp $line;
+		}
+
+		$line =~ s/^\s+//;
+		$line =~ s/\s+$//;
+
+
+		my %uris;
+		my %options;
+		my @invalid;
+		my @invalid_uri;
+
+		# split line into words
+		foreach ( split /\s+/, $line ) {
+			if ( /^([a-z_]+)=(.*)$/ ) {
+				$options{$1} = uri_unescape( $2 );
+			} elsif ( m{^(http://)?(.*?)$} ) {
+				my $proto = $1 || "http://";
+				my $uri = $proto . $2;
+				if ( my $getter = getter($uri) ) {
+					$uris{ $uri } = $getter;
+				} elsif ( $uri =~ m{.+\.[a-z]{2,4}/.+} ) {
+					push @invalid_uri, $uri;
+				} else {
+					push @invalid, $_;
+				}
+			} else {
+				push @invalid, $_;
+			}
+		}
+
+		if ( not scalar keys %uris ) {
+			push @newlist, words(
+				"# ", "invalid line: ",
+				@invalid, @invalid_uri, h2a( \%options ),
+			);
+			next;
+		} elsif ( @invalid ) {
+			push @newlist, words(
+				"# ", "invalid words: ",
+				@invalid, @invalid_uri
+			);
+		} elsif ( @invalid_uri ) {
+			push @newlist, words(
+				"# ", "invalid uri: ",
+				@invalid_uri,
+			);
+		}
+
+		foreach my $uri ( sort keys %uris ) {
+			my $error = RSGet::Dispatch::is_error( $uri );
+			next unless $error;
+			delete $uris{ $uri };
+			push @newlist, "# $error:\n# $uri\n";
+		}
+
+		unless ( keys %uris ) {
+			push @newlist, words(
+				"#", "", h2a( \%options )
+			) if keys %options;
+			next;
+		}
+
+		foreach my $uri ( sort keys %uris ) {
+			hadd \%options, %{$uri_options{ $uri }} if $uri_options{ $uri };
+		}
+
+		my $status;
+		foreach my $uri ( sort keys %uris ) {
+			next unless $status = RSGet::Dispatch::done( $uri, $uris{ $uri } );
+			$uri = "*" . $uri;
+			if ( ref $status and ref $status eq "ARRAY" ) {
+				push @newlist, words(
+					"#", " Link: ",
+					(sort keys %uris), h2a( \%options )
+				);
+				push @newlist, words( '', '', @$status );
+			} else {
+				push @newlist, words(
+					"# ", "$status:\n# ",
+					(sort keys %uris), h2a( \%options )
+				);
+			}
+			$reread = 2;
+			last;
+		}
+		next if $status;
+
+		push @newlist, words( '', '', (sort keys %uris), h2a( \%options ) );
+
+		push @getlist, [ \%uris, \%options ];
+	}
+	close $list;
+
+	unless ( -e ".${file}.swp" ) {
+		open my $newlist, '>', $file . ".tmp";
+		print $newlist @newlist;
+		print $newlist @added_text;
+		@added_text = ();
+		close $newlist || die "\nCannot update $file file: $!\n";
+		unlink $file;
+		rename $file . ".tmp", $file;
+	}
+
+	$reread = $reread == 2 ? 1 : 0;
+	$listmtime = (stat $file)[9];
+
+	return \@getlist;
+}
+
+1;
+
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/Get.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Get.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,301 @@
+package RSGet::Get;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+use URI;
+
+my %cookies;
+sub make_cookie
+{
+	my $c = shift;
+	return () unless $c;
+	$cookies{ $c } = 1 unless $cookies{ $c };
+	my $n = $cookies{ $c }++;
+
+	local $_ = ".cookie.$c.$n.txt";
+	unlink $_ if -e $_;
+	return _cookie => $_;
+}
+
+
+sub new
+{
+	my ( $pkg, $cmd, $uri, $options, $outif ) = @_;
+	my $getter = $getters{ $pkg };
+
+	my $self = {
+		_uri => $uri,
+		_opts => $options,
+		_try => 0,
+		_cmd => $cmd,
+		_pkg => $pkg,
+		_outif => $outif,
+		make_cookie( $getter->{cookie} ),
+	};
+	bless $self, $pkg;
+
+	if ( $cmd eq "get" ) {
+		my $outifstr = $outif ? "[$outif]" :  "";
+		hadd $self,
+			_line => new RSGet::Line( "[$getter->{short}]$outifstr " ),
+			_name => $options->{fname} || ($uri =~ m{([^/]+)/*$})[0];
+		$self->print( "start" );
+	}
+
+	$self->start();
+	return $self;
+}
+
+sub DESTROY
+{
+	my $self = shift;
+	if ( my $c = $self->{_cookie} ) {
+		unlink $c;
+	}
+}
+
+sub print
+{
+	my $self = shift;
+	my $text = shift;
+	my $line = $self->{_line};
+	return unless $line;
+	$line->print( $self->{_name} . ": " . $text );
+}
+
+sub get
+{
+	my $self = shift;
+	$self->{after_curl} = shift;
+	my $uri = shift;
+
+	$uri = URI->new( $uri )->abs( $self->{_referer} )->as_string
+		if $self->{_referer};
+
+	RSGet::Curl::new( $uri, $self, @_ );
+}
+
+sub get_finish
+{
+	my $self = shift;
+	$self->{_referer} = shift;
+
+	my $func = $self->{after_curl};
+	$_ = $self->{body};
+	&$func( $self );
+}
+
+sub download
+{
+	my $self = shift;
+	my $uri = shift;
+	$self->print("starting download");
+	$self->get( \&finish, $uri, save => 1, @_ );
+}
+
+sub restart
+{
+	my $self = shift;
+	my $time = shift || 1;
+	my $msg = shift || "restarting";
+
+	return $self->wait( \&start, $time, $msg );
+}
+
+sub multi
+{
+	my $self = shift;
+	return $self->wait( 60 + 240 * rand, \&start, "multi-download not allowed, waiting" );
+}
+
+sub finish
+{
+	my $self = shift;
+
+	if ( $self->{is_html} ) {
+		$self->print( "is HTML" );
+		$_ = $self->{body};
+		return $self->stage_is_html();
+	}
+
+	RSGet::Dispatch::mark_used( $self );
+	RSGet::Dispatch::finished( $self, $self->{dlinfo} );
+}
+
+sub abort
+{
+	my $self = shift;
+	$self->print( $self->{_abort} || "aborted" );
+	RSGet::Dispatch::finished( $self );
+}
+
+sub error
+{
+	my $self = shift;
+	my $msg = shift;
+	if ( $self->{body} ) {
+		my $n = 0;
+		my $name;
+		do {
+			$name = "errorlog." . (++$n) . ".html";
+		} while ( -r $name );
+		open ERR_OUT, '>', $name;
+		print ERR_OUT $self->{body};
+		close ERR_OUT;
+
+		$msg .= "; saved $name";
+	}
+
+	$self->print( $msg );
+	RSGet::Dispatch::finished( $self, $msg );
+}
+
+sub start
+{
+	my $self = shift;
+	$self->clean();
+	return $self->stage0();
+}
+
+sub problem
+{
+	my $self = shift;
+	my $line = shift;
+	my $msg = $line ? "problem at line: $line" : "unknown problem";
+	if ( ++$self->{_try} < 8 ) {
+		return $self->wait( \&start, 2 ** $self->{_try}, $msg . ", waiting" );
+	} else {
+		return $self->error( $msg . ", aborting" );
+	}
+}
+
+sub clean
+{
+	my $self = shift;
+	foreach ( keys %$self ) {
+		delete $self->{$_} unless /^_/;
+	}
+	delete $self->{_referer};
+}
+
+sub info
+{
+	my $self = shift;
+	my %info = @_;
+	$info{name} = de_ml( $info{name} );
+	$info{kilo} ||= 1024;
+
+	$self->{_name} = $self->{_opts}->{fname} || $info{name};
+	return 0 unless $self->{_cmd} eq "check";
+	#p "info( $self->{_uri} ): $info{name}, $info{size}\n";
+	RSGet::Dispatch::finished( $self, \%info );
+	return 1;
+}
+
+sub search
+{
+	my $self = shift;
+	my %search = @_;
+
+	foreach my $name ( keys %search ) {
+		my $search = $search{$name};
+		if ( m/$search/ ) {
+			$self->{$name} = $1;
+		} else {
+			$self->problem( "Can't find '$name': $search" );
+			return 1;
+		}
+	}
+	return 0;
+}
+
+sub link
+{
+	my $self = shift;
+	my $links = [ @_ ];
+	RSGet::Dispatch::finished( $self, $links );
+	return 1;
+}
+
+sub set_fname
+{
+	my $self = shift;
+	my $fname = shift;
+	$self->{_name} = $fname;
+
+	my $opts = $RSGet::FileList::uri_options{ $self->{_uri} } ||= {};
+	hadd $opts,
+		fname => $fname;
+
+	$RSGet::FileList::reread = 1;
+}
+
+my %waiting;
+sub wait
+{
+	my $self = shift;
+	my $next_stage = shift;
+	my $wait = shift() + int rand 10;
+	my $msg = shift || "???";
+
+	my $time = time;
+	delete $self->{wait_until_should};
+
+	my $rnd_wait = int rand ( 5 * 60 ) + 2 * 60;
+	if ( $wait > $rnd_wait + 1 * 60 ) {
+		$self->{wait_until_should} = $time + $wait;
+		$wait = $rnd_wait;
+	}
+
+	$self->{wait_next} = $next_stage;
+	$self->{wait_msg} = $msg;
+	$self->{wait_until} = $time + $wait;
+
+	my $id = 0;
+	++$id while exists $waiting{ $id };
+	$waiting{ $id } = $self;
+}
+
+sub wait_finish
+{
+	my $self = shift;;
+
+	delete $self->{body};
+	$_ = undef;
+
+	my $func = $self->{wait_next};
+	&$func( $self );
+}
+
+sub wait_update
+{
+	my $time = time;
+
+	foreach my $id ( keys %waiting ) {
+		my $obj = $waiting{ $id };
+		my $left = $obj->{wait_until} - $time;
+		if ( $left <= 0 ) {
+			delete $waiting{ $id };
+			$obj->print( $obj->{wait_msg} . "; done waiting" );
+			wait_finish( $obj );
+		} elsif ( $obj->{_abort} ) {
+			delete $waiting{ $id };
+			$obj->abort();
+		} else {
+			if ( $obj->{wait_until_should} ) {
+				$obj->print( sprintf "%s; should wait %s, retrying in %s",
+					$obj->{wait_msg},
+					s2string( $obj->{wait_until_should} - $time),
+					s2string( $left ) );
+			} else {
+				$obj->print( $obj->{wait_msg} . "; waiting " . s2string( $left ) );
+			}
+		}
+	}
+	RSGet::Line::status( 'waiting' => scalar keys %waiting );
+}
+
+1;
+
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/Line.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Line.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,192 @@
+package RSGet::Line;
+use strict;
+use warnings;
+use RSGet::Tools;
+use Term::Size;
+
+$| = 1;
+
+my %active;
+my %dead;
+my %status;
+our @active;
+our @dead;
+my $last_line = 0;
+
+my $last_day = -1;
+sub print_dead_lines
+{
+	my @l = localtime;
+	my $time = sprintf "[%.2d:%.2d:%.2d] ", @l[(2,1,0)];
+
+	my @print;
+
+	if ( $last_day != $l[3] ) {
+		$last_day = $l[3];
+		my $date = sprintf "[Actual date: %d-%.2d-%.2d]", $l[5] + 1900, $l[4] + 1, $l[3];
+		push @print, "\r" . $date . "\033[J\n";
+		push @dead, $date;
+	}
+
+
+	foreach my $key ( sort { $a <=> $b } keys %dead ) {
+		my $line = $dead{$key};
+		my $text = $line->[0] . $line->[1];
+		$text = $time . $text if $text =~ /\S/;
+
+		push @print, "\r" . $text . "\033[J\n";
+		push @dead, $text;
+	}
+
+	print @print;
+
+	my $max = 1000;
+	if ( scalar @dead > $max ) {
+		splice @dead, 0, $max - scalar @dead;
+	}
+
+	%dead = ();
+}
+
+sub print_status_lines
+{
+    my $columns = shift();
+	my $horiz = "-" x ($columns - 4);
+
+	my $date = "< ".isotime()." >";
+	my $date_l = length $date;
+	my $h = $horiz;
+	substr $h, int( (length($horiz) - $date_l ) / 2 ), $date_l, $date;
+	@active = ( " \\$h/ " );
+
+	my @status = ( "rsget.pl -- " );
+	foreach my $name ( sort keys %status ) {
+		my $value = $status{$name};
+		next unless $value;
+		my $s = "$name: $value; ";
+		if ( length $status[ $#status ] . $s > $columns - 5 ) {
+			push @status, $s;
+		} else {
+			$status[ $#status ] .= $s;
+		}
+	}
+	foreach ( @status ) {
+		my $l = " |" . ( " " x ($columns - 4 - length $_ )) . $_ . "| ";
+		push @active, $l;
+	}
+	push @active, " /$horiz\\ ";
+	my @print = map { "\r\n$_\033[K" } @active;
+	print @print;
+	return scalar @print;
+}
+
+
+sub print_active_lines
+{
+    my $columns = shift;
+	my @print;
+
+	foreach my $key ( sort { $a <=> $b } keys %active ) {
+		my $line = $active{$key};
+
+		my $text = $line->[1];
+		my $tl = length $line->[0] . $text;
+		substr $text, 4, $tl - $columns + 3, '...'
+			if $tl > $columns;
+		push @print, "\r\n" . $line->[0] . $text . "\033[K";
+		push @active, $line->[0] . $line->[1];
+	}
+
+	print @print;
+	return scalar @print;
+}
+
+sub print_all_lines
+{
+	my ( $columns, $rows ) = Term::Size::chars;
+	my $added = 0;
+	print_dead_lines();
+	$added += print_status_lines( $columns );
+	$added += print_active_lines( $columns );
+	return $added;
+}
+
+sub update
+{
+	my $added = print_all_lines();
+	print "\033[J\033[" . $added . "A\r" if $added;
+}
+
+sub new
+{
+    my $class = shift;
+	my $head = shift;
+	my $text = shift;
+	$head = "" unless defined $head;
+
+	my $line = "" . ($last_line++);
+	$active{ $line } = [ $head, "" ];
+
+	my $self = \$line;
+	bless $self, $class;
+	$self->print( $text );
+
+	return $self;
+}
+
+sub print
+{
+	my $self = shift;
+	my $line = $$self;
+	my $text = shift;
+	$text = "" unless defined $text;
+	$text =~ s/\n+$//sg;
+	$text =~ s/\n/ /sg;
+	$active{ $line }->[1] = $text;
+
+	return length $text;
+}
+
+sub DESTROY
+{
+	my $self = shift;
+	my $line = $$self;
+	$dead{ $line } = $active{ $line };
+	delete $active{ $line };
+}
+
+sub status
+{
+	hadd( \%status, @_ );
+}
+
+$SIG{INT} = sub {
+	print_all_lines();
+	print "\nTERMINATED\n";
+	exit 0;
+};
+
+$SIG{WINCH} = sub {
+	print "\033[2J\033[1;1H\n";
+	my ( $columns, $rows ) = Term::Size::chars;
+	my $start = $#dead - $rows;
+	$start = 0 if $start < 0;
+	print join( "\n", @dead[($start..$#dead)] ), "\n";
+	update();
+};
+
+$SIG{__WARN__} = sub {
+	new RSGet::Line( "WARNING: ", shift );
+	update();
+};
+
+$SIG{__DIE__} = sub {
+	print_all_lines();
+	print "\n";
+	print "DIED: ", shift, "\n\n";
+	exit 1;
+};
+
+1;
+
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/MicroHTTP.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/MicroHTTP.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,254 @@
+package RSGet::MicroHTTP;
+
+use strict;
+use warnings;
+use IO::Socket;
+use RSGet::Tools;
+
+my @template = <DATA>;
+our %data = (
+	last_lines => '',
+	status => '',
+	dl_list => '',
+);
+
+sub new
+{
+	my $class = shift;
+	my $port = shift;
+	my $socket = IO::Socket::INET->new(
+		Proto => 'tcp',
+		LocalPort => $port,
+		Listen => SOMAXCONN,
+		Reuse => 1,
+		Blocking => 0,
+	) || return undef;
+
+	my $self = \$socket;
+	return bless $self, $class;
+}
+
+sub perform
+{
+	my $self = shift;
+	my $socket = $$self;
+
+	my @ret;
+
+	my $client = $socket->accept();
+	return () unless $client;
+
+	u_last_lines();
+	u_status();
+	u_dl_list();
+
+	push @ret, request( $client );
+
+	for ( my $i = 0; $i < 5; $i++ ) {
+		my $client = $socket->accept() or last;
+		push @ret, request( $client );
+	}
+
+	return @ret;
+}
+
+my $actual_client;
+
+sub request
+{
+	my $client = shift;
+	$actual_client = $client;
+	local $SIG{ALRM} = sub {
+		print $actual_client "Close !\n";
+		die "HTTP Frozen !\n";
+	};
+	alarm 5; # XXX: this must be fixed
+	my $request = <$client>;
+	unless ( $request ) {
+		close $client;
+		alarm 0;
+		return;
+	}
+	chomp $request;
+
+	my( $method, $file, $ignore ) = split /\s+/, $request;
+	p "HTTP request: $method: $file";
+
+	my $len = 0;
+	while ( $_ = <$client> ) {
+		$len = $1 if /^Content-Length:\s*(\d+)/i;
+		last if /^\s*$/;
+	}
+	if ( $len and $method =~ /^POST$/i ) {
+		my $r;
+		$client->read( $r, $len );
+		foreach ( split /&/, $r ) {
+			s/^(.*?)=//;
+			my $key = $1;
+			tr/+/ /;
+			s/%(..)/chr hex $1/eg;
+			RSGet::FileList::add_text( $key, $_ );
+		}
+	}
+
+	print $client "HTTP/1.1 200 OK\r\n";
+	print $client "Content-Type: text/html; charset=utf-8\r\n";
+	print $client "\r\n";
+	foreach my $line ( @template ) {
+		local $_ = $line;
+		s/\${([a-z_]+)}/$data{$1}/g;
+		print $client $_;
+	}
+	close $client;
+	alarm 0;
+
+	return 1;
+}
+
+sub u_last_lines
+{
+	my $out = "";
+	foreach my $line ( @RSGet::Line::dead ) {
+		local $_ = $line;
+		s/&/&amp;/g;
+		s/</&lt;/g;
+		s/>/&gt;/g;
+		s#(^|\s)(http://\S*)#$1<a href="$2">$2</a>#g;
+		$out .= "<li>$_</li>\n";
+	}
+	$data{last_lines} = $out;
+	return;
+}
+
+sub u_status
+{
+	my $out = "";
+	foreach my $line ( @RSGet::Line::active ) {
+		local $_ = $line;
+		s/&/&amp;/g;
+		s/</&lt;/g;
+		s/>/&gt;/g;
+		s#(^|\s)(http://\S*)#$1<a href="$2">$2</a>#g;
+		$out .= "<li>$_</li>\n";
+	}
+	$data{status} = $out;
+	return;
+}
+
+sub u_dl_list
+{
+	unless ( -r $RSGet::FileList::file ) {
+		$data{dl_list} = '<li></li>';
+	}
+
+	my $out = "";
+	open my $list, '<', $RSGet::FileList::file;
+	while ( $_ = <$list> ) {
+		chomp;
+		my $class = "";
+		$class = ' class="comment"' if /^\s*#/;
+		s/&/&amp;/g;
+		s/</&lt;/g;
+		s/>/&gt;/g;
+		s{(^|\s|#)(http://\S*)}{$1<a href="$2">$2</a>}g;
+		$out .= "<li$class>$_</li>\n";
+	}
+	close $list;
+
+	$data{dl_list} = $out;
+	return;
+}
+1;
+
+__DATA__
+<html>
+<head>
+	<title>rsget.pl</title>
+<style>
+html {
+	background: #333;
+}
+body {
+	width: 900px;
+	margin: 10px;
+	margin-left: auto;
+	margin-right: auto;
+	border: 10px solid #555;
+	padding: 5px;
+	background: #777;
+	font-family: monospace;
+}
+fieldset {
+	border: 10px solid #999;
+	padding: 5px;
+	margin: 5px;
+	background: #bbb;
+}
+input, textarea {
+	border: 10px solid #ddd;
+	padding: 5px;
+	margin: 5px;
+	background: #fff;
+}
+input {
+	width: 150px;
+	margin-left: 700px;
+}
+legend {
+	border: 10px solid #999;
+	border-top: 0;
+	border-bottom: 0;
+	background: #bbb;
+}
+ul {
+	border: 10px solid #ddd;
+	padding: 5px;
+	margin: 5px;
+	background: #fff;
+	list-style: none;
+}
+li:first-child {
+	border-top: 0;
+}
+li {
+	border-top: 2px solid #ddd;
+	white-space: pre;
+}
+li.comment {
+	color: #00F;
+}
+a, a:visited {
+	color: inherit;
+}
+</style>
+</head>
+<body>
+	<fieldset>
+		<legend>Last lines</legend>
+		<ul>${last_lines}</ul>
+	</fieldset>
+
+	<fieldset>
+		<legend>Status</legend>
+		<ul>${status}</ul>
+	</fieldset>
+
+	<fieldset>
+		<legend>Download list</legend>
+		<ul>${dl_list}</ul>
+	</fieldset>
+
+	<form action="" method="post">
+		<fieldset>
+			<legend>Extract links from text</legend>
+			<textarea cols="100" rows="16" name="links"></textarea>
+			<input type="submit" value="OK" />
+		</fieldset>
+		<fieldset>
+			<legend>Append whole text to download list</legend>
+			<textarea cols="100" rows="16" name="text"></textarea>
+			<input type="submit" value="OK" />
+		</fieldset>
+	</form>
+</body>
+</html>

Added: toys/rsget.pl/RSGet/Processor.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Processor.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,260 @@
+package RSGet::Processor;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+
+my $options = "name|short|uri|slots|cookie|status";
+my $parts = "pre|start|perl";
+
+my $processed = "";
+sub pr(@)
+{
+	my $line = join "", @_;
+	$processed .= $line;
+	return length $line;
+}
+
+my $is_sub = 0;
+sub p_sub
+{
+	my $sub = shift;
+	pr "sub $sub {\n";
+	pr "\tmy \$self = shift;\n";
+	foreach ( @_ ) {
+		pr "\t$_;\n";
+	}
+	$is_sub++;
+}
+sub p_subend
+{
+	return unless $is_sub;
+	$is_sub--;
+	pr "\treturn \${self}->error( 'file is a html page' );\n}\n";
+}
+
+my $space;
+sub p_ret
+{
+	my $ret = shift;
+	my @opts = @_;
+	pr $space . "return \${self}->${ret}( ";
+	pr join( ", ", @opts ) . ", " if @opts;
+}
+
+sub p_line
+{
+	s/\$-{/\$self->{/g;
+	pr $_ . "\n";
+}
+
+
+sub read_file
+{
+	my $class = shift;
+	my $file = shift;
+
+	open F_IN, '<', $file;
+
+	my %opts;
+	my %parts = (
+		pre => [],
+		start => [],
+		perl => [],
+	);
+	my $part = undef;
+	while ( <F_IN> ) {
+		chomp;
+		next unless length;
+		next if /^\s*#/;
+
+		if ( $part ) {
+			unless ( /^\S+/ ) {
+				push @{$parts{$part}}, $_;
+				next;
+			}
+			if ( $part eq "perl" ) {
+				push @{$parts{perl}}, $_, <F_IN>;
+				last;
+			} elsif ( $part eq "start" and /^stage_.*?:/ ) {
+				push @{$parts{start}}, $_;
+				next;
+			}
+			$part = undef;
+		}
+
+		if ( /^($parts)\s*:/ ) {
+			$part = $1;
+		} elsif ( /^($options)\s*:\s+(.*)$/ ) {
+			$opts{$1} = $2;
+		}
+	}
+
+	close F_IN;
+	unless ( scalar @{$parts{start}} ) {
+		p "Can't find 'start:'\n";
+		return undef;
+	}
+	foreach ( qw(name short uri) ) {
+		next if $opts{$_};
+		p "Can't find '$_:'\n";
+		return undef;
+	}
+	$file =~ m{.*/(.*?)$};
+	my $fname = $1;
+	if ( $fname ne $opts{name} ) {
+		p "Name field: '$opts{name}' differs from file name: '$fname'\n";
+		return undef;
+	}
+	if ( $opts{status} and $opts{status} !~ /^OK(\s+.*)?$/ ) {
+		p "Marked as '$opts{status}'\n";
+		return undef;
+	}
+
+	$processed = "";
+	$space = "";
+	$is_sub = 0;
+
+	$opts{uri} = eval $opts{uri};
+	$opts{class} = ${class};
+	$opts{pkg} = "${class}::$opts{name}";
+
+	pr "package $opts{pkg};\n\n";
+	pr <<'EOF';
+	use strict;
+	use warnings;
+	use RSGet::Get;
+	use RSGet::Tools;
+
+	BEGIN {
+		our @ISA;
+		@ISA = qw(RSGet::Get);
+	}
+EOF
+
+	pr join "\n", @{$parts{pre}}, "\n";
+
+	my $stage = 0;
+	p_sub( "stage0" );
+	my @machine = @{$parts{start}};
+	while ( $_ = shift @machine ) {
+		s/^(\s+)//;
+		$space = $1;
+
+		if ( s/^GET\s*\(// ) {
+			my $next_stage = "stage" . ++$stage;
+			my @skip;
+			push @skip, $_;
+			until ( /;\s*$/ ) {
+				$_ = shift @machine;
+				push @skip, $_;
+			}
+			if ( $machine[0] =~ s/^(stage_.*?):\s*$// ) {
+				$next_stage = $1;
+				shift @machine;
+			}
+			p_ret( "get", "\\&$next_stage" );
+			foreach ( @skip ) {
+				p_line();
+			}
+			p_subend();
+			p_sub( $next_stage );
+		} elsif ( s/^GET_NEXT\s*\(\s*(.*?)\s*,// ) {
+			my $next_stage = $1;
+			p_ret( "get", "\\&$1" );
+			p_line();
+		} elsif ( s/^ERROR\s*\(// ) {
+			p_ret( "error" );
+			p_line();
+		} elsif ( s/^INFO\s*\(// ) {
+			pr $space . 'return "info" if $self->info( ';
+			p_line();
+		} elsif ( s/^SEARCH\s*\(// ) {
+			pr $space . 'return if $self->search( ';
+			p_line();
+		} elsif ( s/^WAIT\s*\(// ) {
+			my $next_stage = "stage" . ++$stage;
+			my @skip;
+			push @skip, $_;
+			until ( /;\s*$/ ) {
+				$_ = shift @machine;
+				push @skip, $_;
+			}
+			if ( $machine[0] =~ s/^(stage_.*?):\s*$// ) {
+				$next_stage = $1;
+				shift @machine;
+			}
+			p_ret( "wait", "\\&$next_stage" );
+			foreach ( @skip ) {
+				p_line();
+			}
+			p_subend();
+			p_sub( $next_stage );
+		} elsif ( s/^WAIT_NEXT\s*\(\s*(.*?)\s*,// ) {
+			my $next_stage = $1;
+			p_ret( "wait", "\\&$next_stage" );
+			p_line();
+		} elsif ( s/^RESTART\s*\(\s*// ) {
+			p_ret( "restart" );
+			p_line();
+		} elsif ( s/^DOWNLOAD\s*\(\s*// ) {
+			p_ret( "download" );
+			p_line();
+			until ( /;\s*$/ ) {
+				$_ = shift @machine;
+				p_line();
+			}
+			p_subend();
+			p_sub( "stage_is_html" );
+		} elsif ( s/^LINK\s*\(\s*// ) {
+			p_ret( "link" );
+			p_line();
+			until ( /;\s*$/ ) {
+				$_ = shift @machine;
+				p_line();
+			}
+			p_subend();
+		} elsif ( s/^MULTI\s*\(// ) {
+			p_ret( "multi" );
+			p_line();
+		} elsif ( s/^!\s+// ) {
+			my $line = quotemeta $_;
+			pr $space . 'return $self->problem( "'. $line .'" ) unless ';
+			p_line();
+		} else {
+			pr $space;
+			p_line();
+		}
+	}
+	p_subend();
+
+	pr @{$parts{perl}};
+	pr "1;";
+
+	my $ret;
+	{
+		local $SIG{__DIE__} = undef;
+		$ret = eval $processed;
+	}
+
+	if ( $@ ) {
+		p "Error(s): $@\n";
+		my $err = $@;
+		return undef unless $err =~ /line \d+/;
+		my @p = split /\n/, $processed;
+		for ( my $i = 0; $i < scalar @p; $i++ ) {
+			my $n = $i + 1;
+			p sprintf "%s%4d: %s\n",
+				($err =~ /line $n[^\d]/ ? "!" : " "),
+				$n,
+				$p[ $i ];
+		}
+		return undef;
+	}
+
+	return $opts{pkg} => \%opts if $ret and $ret == 1;
+	return ();
+}
+
+1;
+# vim:ts=4:sw=4

Added: toys/rsget.pl/RSGet/Tools.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Tools.pm	Thu Aug 27 15:23:19 2009
@@ -0,0 +1,97 @@
+package RSGet::Tools;
+
+use strict;
+use warnings;
+use vars qw(@ISA @EXPORT @EXPORT_OK);
+
+require Exporter;
+ at ISA = qw(Exporter);
+ at EXPORT = qw(s2string bignum de_ml hadd hprint p isotime require_prog
+			randomize %getters %settings);
+ at EXPORT_OK = qw();
+
+our %settings;
+our %getters;
+
+sub s2string($)
+{
+	my $s = shift;
+	my $minutes = int( $s / 60 );
+	my $seconds = $s % 60;
+
+	if ( $minutes >= 60 ) {
+		my $hours = int( $minutes / 60 );
+		$minutes %= 60;
+		return sprintf '%d:%.2d:%.2d', $hours, $minutes, $seconds;
+	} else {
+		return sprintf '%d:%.2d', $minutes, $seconds;
+	}
+}
+
+sub bignum($)
+{
+	local $_ = shift;
+	return $_ if /[^\d]/;
+	s/(..?.?)(?=(...)+$)/$1_/g;
+	return $_;
+}
+
+sub hadd(%@)
+{
+	my $h = shift;
+	my %new = @_;
+	foreach ( keys %new ) {
+		$h->{$_} = $new{$_};
+	}
+}
+
+
+sub p($)
+{
+	require RSGet::Line;
+	new RSGet::Line( "INFO: ", shift );
+}
+
+sub hprint(%)
+{
+	my $h = shift;
+	foreach ( keys %$h ) {
+		p "$_ => $h->{$_}";
+	}
+}
+
+sub randomize
+{
+	# not really good, but works
+	return sort { 0.5 <=> rand } @_;
+}
+
+
+sub isotime()
+{
+	my @l = localtime;
+	return sprintf "%d-%.2d-%.2d %2d:%.2d:%.2d", $l[5] + 1900, $l[4] + 1, @l[(3,2,1,0)];
+}
+
+sub de_ml
+{
+	local $_ = shift;
+	s/&le;/</g;
+	s/&ge;/>/g;
+	s/&quot;/"/g;
+	s/&amp;/&/g;
+	return $_;
+}
+
+sub require_prog
+{
+	my $prog = shift;
+	foreach my $dir ( split /:+/, $ENV{PATH} ) {
+		my $full = "$dir/$prog";
+		return $full if -x $full;
+	}
+	return undef;
+}
+
+1;
+# vim:ts=4:sw=4

Modified: toys/rsget.pl/rsget.pl
==============================================================================
--- toys/rsget.pl/rsget.pl	(original)
+++ toys/rsget.pl/rsget.pl	Thu Aug 27 15:23:19 2009
@@ -1,2134 +1,98 @@
 #!/usr/bin/perl
 #
-# 2008 (c) Przemysław Iskra <sparky at pld-linux.org>
+# 2009 (c) Przemysław Iskra <sparky at pld-linux.org>
 # Use/modify/distribute under GPL v2 or newer.
 #
-=item TODO:
-
-- removing URI from list should stop download
-- new URI higher in the list should replace any connection
-  to the same network if still in the wait stage
-- check all the URIs just after finding them in the list
-  (catch non-existing files quickly)
-- OdSiebie: there is a captcha now
-
-=item Status:
-- RS: 2009-08-12 OK
-- NL: 2009-08-12 OK, captcha works
-- OS: not working, captcha not supported
-- MU: 2009-08-12 OK, captcha works, requires mu_font_db.png
-- UT: 2009-06-07 OK
-- HF: captcha not supported
-- FF: 2009-08-12 OK
-- DF: 2009-08-12 OK
-- TU: 2009-08-12 OK
-- ST: 2009-08-12 OK
-
-=item Wishlist:
-- handle multiple alternatives for same file
-- add more servers
-
-=cut
-use strict;
-use warnings;
-use Time::HiRes;
-
-our $data_path = $ENV{PWD};
-
-my $checklist = 1;
-my %gotlist;
-$SIG{CHLD} = "IGNORE";
-
-my %getters;
-
-package Line; # {{{
-use Term::Size;
-
-$| = 1;
-my $actual_line = 0;
-my $max_line = 0;
-
-my $columns = Term::Size::chars;
-
-sub new
-{
-	my $proto = shift;
-    my $class = ref( $proto ) || $proto;
-
-	my $steps = $max_line - $actual_line;
-	$actual_line = $max_line;
-	my $move = "";
-
-	if ( $steps < 0 ) {
-		return undef;
-	} elsif ( $steps > 0 ) {
-		$move = "\033[" . $steps . "B";
-	}
-
-	print $move . "\n\r\033[K";
-
-	my $line = $max_line++;
-	my $self = \$line;
-	return bless $self, $class;
-}
-
-sub print
-{
-	my $self = shift;
-	my $text = shift;
-	my $line = $$self;
-
-	return undef if $line >= $max_line;
-	my $steps = $line - $actual_line;
-	$actual_line = $line;
-	my $move = "";
-
-	if ( $steps < 0 ) {
-		$move = "\033[" . (-$steps) . "A";
-	} elsif ( $steps > 0 ) {
-		$move = "\033[" . $steps . "B";
-	}
-	my $tl = length $text;
-	substr $text, 22, $tl - $columns + 3, '...'
-		if $tl > $columns;
-	
-	print $move . "\r\033[K" . $text;
-}
-
-# }}}
-package Curl; # {{{
-use WWW::Curl::Easy;
-use WWW::Curl::Multi;
-use URI::Escape;
-
-my $curl_headers = [
-	'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
-	'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
-	'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-	'Accept-Language: en-us,en;q=0.5',
-	];
-
-sub file_init
-{
-	my $self = shift;
-	my $curl = $self->{curl};
-
-	$self->{time_start} = time;
-
-	{
-		my $mime = $curl->getinfo( 	CURLINFO_CONTENT_TYPE );
-		if ( $mime =~ m#^text/html# ) {
-			$self->{is_html} = 1;
-			$self->{size_total} = 0;
-			$self->{size_got} = 0;
-			return;
-		}
-	}
-
-	{
-		my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
-		$self->{size_total} = $f_len || 0;
-		$self->{size_got} = 0;
-	}
-
-	if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
-		$self->{file_name} = $1;
-	} else {
-		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
-		$eurl =~ s#^.*/##;
-		$self->{file_name} = uri_unescape( $eurl );
-	}
-
-	{
-		my $fn = $self->{file_name};
-		if ( -r $fn ) {
-			my $fn_old = $fn;
-			my $ext = "";
-			$ext = $1 if $fn_old =~ s/(\..{3,5})$//;
-			my $i = 1;
-			while ( -r "$fn_old-$i$ext" ) {
-				$i++;
-			}
-			$fn_old .= "-$i$ext";
-			rename $fn, $fn_old;
-		}
-		my $net = $self->{obj}->{net};
-		$self->{obj}->{netname} =~ s/] .*/] $fn: /;
-	}
-
-	{
-		open my $f_out, '>', $self->{file_name};
-		$self->{file} = $f_out;
-	}
-}
-
-sub body_file
-{
-	my ($chunk, $self) = @_;
-
-	file_init( $self ) unless exists $self->{size_total};
-
-	my $len = length $chunk;
-	$self->{size_got} += $len;
-
-	if ( $self->{file} ) {
-		my $file = $self->{file};
-		my $p = print $file $chunk;
-		die "\nCannot write data: $!\n" unless $p;
-	} else {
-		$self->{body} .= $chunk;
-	}
-
-	return $len;
-}
-
-sub body_scalar
-{
-	my ($chunk, $scalar) = @_;
-	$$scalar .= $chunk;
-	return length $chunk;
-}
-
-
-my $mcurl = new WWW::Curl::Multi;
-my %curling;
-
-sub start
-{
-	my $url = shift;
-	my $obj = shift;
-	my %opts = @_;
-
-	my $curl = new WWW::Curl::Easy;
-
-	my $id = 1;
-	++$id while exists $curling{ $id };
-
-	my $ecurl = {
-		curl => $curl,
-		id => $id,
-		got => 0,
-		head => "",
-		body => "",
-		obj => $obj,
-	};
-
-    $curl->setopt( CURLOPT_PRIVATE, $id );
-	if ( $obj->{outaddr} ) {
-		$curl->setopt( CURLOPT_INTERFACE, $obj->{outaddr} );
-	}
-	
-	if ( defined $opts{cookies} ) {
-		$curl->setopt( CURLOPT_COOKIEJAR, $opts{cookies} );
-		$curl->setopt( CURLOPT_COOKIEFILE, $opts{cookies} );
-	}
-	$curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
-	$curl->setopt( CURLOPT_WRITEHEADER, \$ecurl->{head} );
-	$curl->setopt( CURLOPT_MAXREDIRS, 10 );
-	$curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
-	$curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
-	$curl->setopt( CURLOPT_URL, $url );
-	$curl->setopt( CURLOPT_REFERER, $opts{referer} )
-		if defined $opts{referer};
-	$curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
-	$curl->setopt( CURLOPT_CONNECTTIMEOUT, 20 );
-
-	if ( $opts{post} ) {
-		my $post = $opts{post};
-		$curl->setopt( CURLOPT_POST, 1 );
-		if ( ref $post and ref $post eq "HASH" ) {
-			$post = join "&",
-				map { uri_escape( $_ ) . "=" . uri_escape( $post->{$_} ) }
-				sort keys %$post;
-		}
-		$curl->setopt( CURLOPT_POSTFIELDS, $post );
-	}
-
-	if ( $opts{save} ) {
-		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
-		$curl->setopt( CURLOPT_WRITEDATA, $ecurl );
-	} else {
-		$ecurl->{is_html} = 1;
-		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
-		$curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
-	}
-
-	$curling{ $id } = $ecurl;
-    $mcurl->add_handle( $curl );
-}
-
-sub finish
-{
-	my $id = shift;
-	my $err = shift;
-
-	my $ecurl = $curling{ $id };
-	delete $curling{ $id };
-
-	my $curl = $ecurl->{curl};
-	delete $ecurl->{curl}; # remove circular dep
-
-	my $obj = $ecurl->{obj};
-	delete $ecurl->{obj};
-
-	if ( $ecurl->{file} ) {
-		close $ecurl->{file};
-		$obj->print( donemsg( $ecurl ) );
-	}
-
-	if ( $err ) {
-		my $error = $curl->errbuf;
-		$obj->print( "error($err): $error" );
-		$obj->problem();
-		return undef;
-	}
-
-	if ( $obj->{curl_next} ) {
-		my $func = $obj->{curl_next};
-		my $body = $ecurl->{file}
-			? "DONE $ecurl->{file_name} " . main::bignum( $ecurl->{size_got} )
-				. " / " . main::bignum( $ecurl->{size_total} )
-			: $ecurl->{body};
-		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
-		
-		&$func( $obj, $body, $eurl, $ecurl->{is_html} );
-	}
-}
-
-sub perform
-{
-	my $running = scalar keys %curling;
-	return unless $running;
-	my $act = $mcurl->perform();
-	return if $act == $running;
-
-	while ( my ($id, $rv) = $mcurl->info_read() ) {
-		next unless $id;
-
-		finish( $id, $rv );
-	}
-}
-
-sub print_status
-{
-	my $time = time;
-
-	foreach my $ecurl ( values %curling ) {
-		next unless exists $ecurl->{size_total};
-		my $size_got = $ecurl->{size_got};
-		my $size_total = $ecurl->{size_total};
-
-		my $size = main::bignum( $size_got ) . " / " . main::bignum( $size_total );
-		my $eta = "";
-		my $time_diff = $time - $ecurl->{time_start};
-		if ( $size_total > 0 ) {
-			$size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total;
-			if ( $time_diff > 0 ) {
-				my $tleft = ($size_total - $size_got) * $time_diff / $size_got;
-				$eta = main::s2string( $tleft );
-			}
-		}
-		my $speed = "???";
-		$speed = sprintf "%.2f", $size_got / ( $time_diff * 1024 )
-			if $time_diff > 0;
-
-		$ecurl->{obj}->print( "$size; ${speed}KB/s $eta" );
-	}
-}
-
-sub donemsg
-{
-	my $ecurl = shift;
-
-	my $size_total = $ecurl->{size_got};
-
-	my $time_diff = time() - $ecurl->{time_start};
-	$time_diff = 1 unless $time_diff;
-	my $eta = main::s2string( $time_diff );
-	my $speed = sprintf "%.2f", $size_total / ( $time_diff * 1024 );
-
-	my @l = localtime;
-	my $date = sprintf "%d-%.2d-%.2d %2d:%.2d:%.2d", $l[5] + 1900, $l[4] + 1, @l[(3,2,1,0)];
-	return "DONE " . main::bignum( $size_total ) . "; ${speed}KB/s $eta @ $date";
-}
-
-
-# }}}
-package Wait; # {{{
-
-my %waiting;
-
-sub start
-{
-	my $obj = shift;
-	my $time = shift;
-	$obj->{wait_until} = time + $time;
-
-	my $id = 0;
-	++$id while exists $waiting{ $id };
-	$waiting{ $id } = $obj;
-}
-
-sub finish
-{
-	my $id = shift;
-
-	my $obj = $waiting{ $id };
-	delete $waiting{ $id };
-
-	my $func = $obj->{wait_next};
-		
-	&$func( $obj );
-}
-
-sub perform
-{
-	my $time = time;
-
-	foreach my $id ( keys %waiting ) {
-		my $obj = $waiting{ $id };
-		my $left = $obj->{wait_until} - $time;
-		if ( $left <= 0 ) {
-			finish( $id );
-		} else {
-			$obj->print( $obj->{wait_msg} . main::s2string( $left ) );
-		}
-	}
-}
-
-# }}}
-package Get; # {{{
-
-use URI;
-my @outaddr;
-
-sub add_outaddr
-{
-	my $newaddr = shift;
-	NEW_IP: foreach my $ip ( split /[ ,]+/, $newaddr ) {
-		foreach my $outaddr ( @outaddr ) {
-			if ( $ip eq $outaddr ) {
-				print "Address $ip already on the list\n";
-				next NEW_IP;
-			}
-		}
-		print "Adding $ip address\n";
-		push @outaddr, $ip;
-	}
-}
-
-my %running;
-sub makenew
-{
-	my $net = shift;
-	my $class = shift;
-	my $url = shift;
-	my $slots = 1;
-	if ( scalar @_ and $_[0] eq "slots" ) {
-		shift;
-		$slots = shift;
-	}
-	if ( scalar @outaddr > $slots ) {
-		$slots = scalar @outaddr;
-	}
-
-	my @opts = split /\s+/, $url;
-	$url = shift @opts;
-	my %opts = map { /(.*?)=(.*)/ ? ( $1, $2 ) : ( $_, 1 ) } @opts;
-
-	return {} if $gotlist{ $url };
-	$running{ $net } = {} unless exists $running{ $net };
-	my $rn = $running{ $net };
-	return {} if $slots <= scalar keys %$rn;
-	foreach my $id ( keys %$rn ) {
-		if ( $rn->{ $id }->{url} eq $url ) {
-			return {};
-		}
-	}
-
-	my $outaddr = undef;
-	if ( scalar @outaddr ) {
-		FIND_IP: foreach my $maybe_outaddr ( @outaddr ) {
-			foreach my $id ( keys %$rn ) {
-				if ( $rn->{ $id }->{outaddr} eq $maybe_outaddr ) {
-					next FIND_IP;
-				}
-			}
-			$outaddr = $maybe_outaddr;
-			last;
-		}
-		# no IP found ?
-		return {}
-			unless defined $outaddr;
-	}
-	my $outaddrstr = $outaddr ? "[$outaddr]" :  "";
-
-	my $id = 1;
-	++$id while exists $rn->{ $id };
-
-	my $line = new Line;
-
-	( my $fn = $url ) =~ s{/+$}{};
-	$fn =~ s#^.*/##;
-
-	my $self = {
-		@_,
-		url => $url,
-		opts => \%opts,
-		id => $id,
-		try => 0,
-		line => $line,
-		net => $net,
-		netname => "[$net]$outaddrstr $fn: ",
-		outaddr => $outaddr,
-	};
-
-	$rn->{ $id } = bless $self, $class;
-
-	$self->stage1();
-	return $self;
-}
-
-sub print
-{
-	my $self = shift;
-	my $text = shift;
-	my $line = $self->{line};
-	$line->print( $self->{netname} . $text );
-}
<<diff output has been trimmed to 500 lines, 1707 line(s) remained.>>


More information about the pld-cvs-commit mailing list