SVN: toys/rsget.pl: RSGet RSGet/Curl.pm RSGet/Dispatch.pm RSGet/FileList.pm RSGet/Get.pm RSGet/Line....
sparky
sparky at pld-linux.org
Thu Aug 27 15:23:20 CEST 2009
Author: sparky
Date: Thu Aug 27 15:23:19 2009
New Revision: 10494
Added:
toys/rsget.pl/RSGet/
toys/rsget.pl/RSGet/Curl.pm
toys/rsget.pl/RSGet/Dispatch.pm
toys/rsget.pl/RSGet/FileList.pm
toys/rsget.pl/RSGet/Get.pm
toys/rsget.pl/RSGet/Line.pm
toys/rsget.pl/RSGet/MicroHTTP.pm
toys/rsget.pl/RSGet/Processor.pm
toys/rsget.pl/RSGet/Tools.pm
Modified:
toys/rsget.pl/rsget.pl
Log:
- mostly rewritten, with quite a few cool features
and even more interesting bugs
Added: toys/rsget.pl/RSGet/Curl.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Curl.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,380 @@
+package RSGet::Curl;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+use RSGet::Line;
+use WWW::Curl::Easy;
+use WWW::Curl::Multi;
+use URI::Escape;
+use File::Copy;
+use Fcntl qw(SEEK_SET);
+
+my $curl_multi = new WWW::Curl::Multi;
+
+my $curl_headers = [
+ 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
+ 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+ 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept-Language: en-us,en;q=0.5',
+ ];
+
+my %active_curl;
+
+sub new
+{
+ my $uri = shift;
+ my $get_obj = shift;
+ my %opts = @_;
+
+ my $curl = new WWW::Curl::Easy;
+
+ my $id = 1;
+ ++$id while exists $active_curl{ $id };
+
+ my $supercurl = {
+ curl => $curl,
+ id => $id,
+ get_obj => $get_obj,
+ got => 0,
+ head => "",
+ body => "",
+ };
+
+ $curl->setopt( CURLOPT_PRIVATE, $id );
+ $curl->setopt( CURLOPT_INTERFACE, $get_obj->{_outif} )
+ if $get_obj->{_outif};
+
+ if ( defined $get_obj->{_cookie} ) {
+ $curl->setopt( CURLOPT_COOKIEJAR, $get_obj->{_cookie} );
+ $curl->setopt( CURLOPT_COOKIEFILE, $get_obj->{_cookie} );
+ }
+ $curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
+ $curl->setopt( CURLOPT_WRITEHEADER, \$supercurl->{head} );
+ $curl->setopt( CURLOPT_MAXREDIRS, 10 );
+ $curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
+ $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
+ $curl->setopt( CURLOPT_URL, $uri );
+ $curl->setopt( CURLOPT_REFERER, $get_obj->{_referer} )
+ if defined $get_obj->{_referer};
+ $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
+ $curl->setopt( CURLOPT_CONNECTTIMEOUT, 20 );
+
+ if ( $opts{post} ) {
+ my $post = $opts{post};
+ $curl->setopt( CURLOPT_POST, 1 );
+ if ( ref $post and ref $post eq "HASH" ) {
+ $post = join "&",
+ map { uri_escape( $_ ) . "=" . uri_escape( $post->{$_} ) }
+ sort keys %$post;
+ }
+ $curl->setopt( CURLOPT_POSTFIELDS, $post );
+ }
+
+ if ( $opts{save} ) {
+ $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
+ $curl->setopt( CURLOPT_WRITEDATA, $supercurl );
+
+ # if file exists try to continue
+ my $fn = $get_obj->{_opts}->{fname};
+ if ( $fn and -r $fn ) {
+ my $got = (stat(_))[7];
+ #p "File '$fn' already exists, trying to continue at $got";
+ $curl->setopt( CURLOPT_RANGE, "$got-" );
+
+ $supercurl->{fname} = $fn;
+ }
+ delete $get_obj->{is_html};
+ } else {
+ $get_obj->{is_html} = 1;
+ $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
+ $curl->setopt( CURLOPT_WRITEDATA, \$supercurl->{body} );
+ }
+
+ $active_curl{ $id } = $supercurl;
+ $curl_multi->add_handle( $curl );
+}
+
+sub file_backup
+{
+ my $fn = shift;
+ return undef if $settings{no_backup};
+ return undef unless -r $fn;
+
+ if ( my $s = $settings{backup_suf} ) {
+ my $i = 1;
+ ++$i while -r $fn . $s . $i;
+ return $fn . $s . $i;
+ }
+
+ my $ext = "";
+ $ext = $1 if $fn =~ s/(\..{3,5})$//;
+ my $i = 1;
+ ++$i while -r "$fn-$i$ext";
+
+ return "$fn-$i$ext";
+}
+
+sub file_init
+{
+ my $supercurl = shift;
+ my $curl = $supercurl->{curl};
+ my $time = time;
+
+ hadd $supercurl,
+ time_start => $time,
+ time_stamp => [ $time, 0, $time, 0, $time, 0 ],
+ size_start => 0,
+ size_got => 0,
+ size_total => 0;
+
+ {
+ my $mime = $curl->getinfo( CURLINFO_CONTENT_TYPE );
+ if ( $mime =~ m#^text/html# ) {
+ $supercurl->{get_obj}->{is_html} = 1;
+ return;
+ }
+ }
+
+ if ( my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD ) ) {
+ $supercurl->{size_total} = $f_len;
+ }
+
+ if ( my $fn = $supercurl->{fname} ) {
+ if ( $supercurl->{head} =~ m{^Content-Range:\s*bytes\s*(\d+)-(\d+)(/(\d+))?\s*$}im ) {
+ my ( $start, $stop ) = ( +$1, +$2 );
+ $supercurl->{size_total} = +$4 if $3;
+
+ my $old = file_backup( $fn );
+ my $old_msg = "";
+ if ( $old ) {
+ rename $fn, $old;
+ copy( $old, $fn ) || die "Cannot create backup file: $!";
+ $old_msg = ", backup saved as '$old'";
+ }
+
+ open my $f_out, '+<', $fn;
+ seek $f_out, $start, SEEK_SET;
+ p "Continuing '$fn' at " . bignum( $start ) . $old_msg;
+
+
+ hadd $supercurl,
+ file => $f_out,
+ size_start => $start,
+ size_got => $start,
+ time_stamp => [ $time, $start, $time, $start, $time, $start ];
+ return;
+ }
+ } elsif ( $supercurl->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
+ $supercurl->{fname} = de_ml( uri_unescape( $1 ) );
+ } else {
+ my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
+ $eurl =~ s#^.*/##;
+ $supercurl->{fname} = de_ml( uri_unescape( $eurl ) );
+ }
+
+ $supercurl->{get_obj}->set_fname( $supercurl->{fname} );
+
+ {
+ my $fn = $supercurl->{fname};
+ my $old = file_backup( $fn );
+ if ( $old ) {
+ p "'$fn' renamed to '$old'";
+ rename $fn, $old;
+ }
+ open my $f_out, '>', $fn;
+ $supercurl->{file} = $f_out;
+ }
+}
+
+sub body_file
+{
+ my ($chunk, $supercurl) = @_;
+
+ file_init( $supercurl ) unless exists $supercurl->{size_total};
+
+ my $len = length $chunk;
+ $supercurl->{size_got} += $len;
+
+ if ( my $file = $supercurl->{file} ) {
+ my $p = print $file $chunk;
+ die "\nCannot write data: $!\n" unless $p;
+ } else {
+ $supercurl->{body} .= $chunk;
+ }
+
+ return $len;
+}
+
+sub body_scalar
+{
+ my ($chunk, $scalar) = @_;
+ $$scalar .= $chunk;
+ return length $chunk;
+}
+
+
+
+
+sub finish
+{
+ my $id = shift;
+ my $err = shift;
+
+ my $supercurl = $active_curl{ $id };
+ delete $active_curl{ $id };
+
+ my $curl = $supercurl->{curl};
+ delete $supercurl->{curl}; # remove circular dep
+
+ my $get_obj = $supercurl->{get_obj};
+ delete $supercurl->{get_obj};
+
+ if ( $supercurl->{file} ) {
+ close $supercurl->{file};
+ $get_obj->print( donemsg( $supercurl ) );
+ }
+
+ if ( $err ) {
+ my $error = $curl->errbuf;
+ warn "error($err): $error\n";
+ $get_obj->print( "error($err): $error" );
+ if ( $error =~ /Couldn't bind to '(.*)'/ ) {
+ my $if = $1;
+ RSGet::Dispatch::remove_interface( $if, "Interface $if is dead" );
+ $get_obj->{_abort} = "Interface $if is dead";
+ } elsif ( $error =~ /transfer closed with (\d+) bytes remaining to read/ ) {
+ $get_obj->{_abort} = "PARTIAL, " . bignum( $1 ) . " bytes left";
+ }
+ $get_obj->problem();
+ return undef;
+ }
+
+ return unless $get_obj->{after_curl};
+
+ my $func = $get_obj->{after_curl};
+ if ( $supercurl->{file} ) {
+ $get_obj->{dlinfo} = sprintf 'DONE %s %s / %s',
+ $supercurl->{fname},
+ bignum( $supercurl->{size_got} ),
+ bignum( $supercurl->{size_total} );
+ } else {
+ $get_obj->{body} = $supercurl->{body};
+ }
+
+ $get_obj->get_finish( $curl->getinfo( CURLINFO_EFFECTIVE_URL ) );
+}
+
+sub need_run
+{
+ return scalar keys %active_curl;
+}
+
+sub maybe_abort
+{
+ my $time = time;
+ my $stall_time = $time - 120;
+ foreach my $id ( keys %active_curl ) {
+ my $supercurl = $active_curl{ $id };
+ my $get_obj = $supercurl->{get_obj};
+ if ( $get_obj->{_abort} ) {
+ my $curl = $supercurl->{curl};
+ $curl_multi->remove_handle( $curl );
+ finish( $id, "aborted" );
+ }
+ if ( ( $supercurl->{stalled_since} || $time ) < $stall_time ) {
+ my $curl = $supercurl->{curl};
+ $curl_multi->remove_handle( $curl );
+ finish( $id, "timeout" );
+ }
+ }
+}
+
+sub perform
+{
+ my $running = scalar keys %active_curl;
+ return unless $running;
+ my $act = $curl_multi->perform();
+ return if $act == $running;
+
+ while ( my ($id, $rv) = $curl_multi->info_read() ) {
+ next unless $id;
+
+ finish( $id, $rv );
+ }
+}
+
+my $avg_speed = 0;
+sub update_status
+{
+ my $time = time;
+ my $total_speed = 0;
+
+ foreach my $supercurl ( values %active_curl ) {
+ next unless exists $supercurl->{size_total};
+ my ($size_got, $size_total, $time_stamp ) =
+ @$supercurl{ qw(size_got size_total time_stamp) };
+
+ my $size = bignum( $size_got ) . " / " . bignum( $size_total );
+ $size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total
+ if $size_total > 0;
+
+ if ( $time_stamp->[4] + 30 <= $time ) {
+ @$time_stamp[0..3] = @$time_stamp[2..5];
+ $time_stamp->[4] = $time;
+ $time_stamp->[5] = $size_got;
+ }
+
+ my $time_diff = $time - $time_stamp->[0];
+ my $size_diff = $size_got - $time_stamp->[1];
+
+ if ( $time_diff > 0 and $size_diff == 0 ) {
+ $supercurl->{stalled_since} ||= $time;
+ my $stime = s2string( $time - $supercurl->{stalled_since} );
+ $supercurl->{get_obj}->print( "$size; STALLED $stime" );
+ next;
+ }
+
+ my $speed = "???";
+ if ( $time_diff > 0 ) {
+ my $s = $size_diff / ( $time_diff * 1024 );
+ $speed = sprintf "%.2f", $s;
+ $total_speed += $s;
+ }
+
+ my $eta = "";
+ if ( $size_total > 0 and $time_diff > 0 and $size_diff > 0 ) {
+ my $tleft = ($size_total - $size_got) * $time_diff / $size_diff;
+ $eta = " " . s2string( $tleft );
+ delete $supercurl->{stalled_since}
+ }
+
+ $supercurl->{get_obj}->print( "$size; ${speed}KB/s$eta" );
+ }
+ $avg_speed = ($avg_speed * 9 + $total_speed) / 10;
+
+ my $running = scalar keys %active_curl;
+ RSGet::Line::status(
+ 'running cURL' => $running,
+ 'total speed' => ( sprintf '%.2fKB/s', $avg_speed )
+ );
+ return;
+}
+
+sub donemsg
+{
+ my $supercurl = shift;
+
+ my $size_diff = $supercurl->{size_got} - $supercurl->{size_start};
+ my $time_diff = time() - $supercurl->{time_start};
+ $time_diff = 1 unless $time_diff;
+ my $eta = s2string( $time_diff );
+ my $speed = sprintf "%.2f", $size_diff / ( $time_diff * 1024 );
+
+ return "DONE " . bignum( $supercurl->{size_got} ) . "; ${speed}KB/s $eta";
+}
+
+
+1;
+
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/Dispatch.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Dispatch.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,248 @@
+package RSGet::Dispatch;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+
+our %downloading;
+our %downloaded;
+our %checking;
+our %checked; # HASH for valid, SCALAR if error
+our %resolving;
+our %resolved;
+
+my %working = (
+ get => \%downloading,
+ check => \%checking,
+ link => \%resolving,
+);
+my %finished = (
+ get => \%downloaded,
+ check => \%checked,
+ link => \%resolved,
+);
+
+my @interfaces;
+sub add_interface
+{
+ my $newifs = shift;
+ NEW_IP: foreach my $new_if ( split /[ ,]+/, $newifs ) {
+ foreach my $old_if ( @interfaces ) {
+ if ( $new_if eq $old_if ) {
+ print "Address $new_if already on the list\n";
+ next NEW_IP;
+ }
+ }
+ print "Adding $new_if interface/address\n";
+ push @interfaces, $new_if;
+ }
+}
+
+sub remove_interface
+{
+ my $if = shift;
+ my $reason = shift;
+ for ( my $i = 0; $i < @interfaces; $i++ ) {
+ next unless $interfaces[ $i ] eq $if;
+ my $removed = splice @interfaces, $i, 1;
+ warn "Removed interface '$removed': $reason\n";
+ }
+
+ die "No working interfaces left\n" unless @interfaces;
+}
+
+my %last_used;
+
+sub find_free_if
+{
+ my $pkg = shift;
+ my $working = shift;
+ my $slots = shift;
+
+ unless ( scalar @interfaces ) {
+ my $running = 0;
+ foreach ( values %$working ) {
+ $running++ if $_->{_pkg} eq $pkg
+ }
+ #p "running: $running / $slots";
+ return undef if $running >= $slots;
+ return "";
+ }
+
+ my %by_if = map { $_ => 0 } @interfaces;
+ foreach ( values %$working ) {
+ next unless $_->{_pkg} eq $pkg;
+ $by_if{ $_->{_outif} }++;
+ }
+
+ my $min = $slots;
+ grep { $min = $_ if $_ < $min } values %by_if;
+ return undef if $min >= $slots;
+
+ my $lu = $last_used{$pkg} ||= {};
+ my @min_if = sort {
+ my $_a = $lu->{ $a } || 0;
+ my $_b = $lu->{ $b } || 0;
+ $_a <=> $_b
+ } grep { $by_if{ $_ } <= $min } keys %by_if;
+ return $min_if[ 0 ];
+}
+
+sub mark_used
+{
+ my $obj = shift;
+ my $if = $obj->{_outif};
+ return unless $if;
+ my $pkg = $obj->{_pkg};
+ my $lu = $last_used{$pkg} ||= {};
+ $lu->{$if} = time;
+}
+
+sub is_error
+{
+ my $uri = shift;
+ my $c = $checked{ $uri };
+ return undef unless defined $c;
+ if ( $c and not ref $c ) {
+ return $c;
+ }
+ return 0;
+}
+sub is_ok
+{
+ my $uri = shift;
+ my $c = $checked{ $uri };
+ return undef unless defined $c;
+ if ( $c and ref $c and ref $c eq "HASH" ) {
+ return $c;
+ }
+ return 0;
+}
+
+sub finished
+{
+ my $obj = shift;
+ my $status = shift;
+
+ my ( $uri, $cmd ) = @$obj{ qw(_uri _cmd) };
+ my $working = $working{ $cmd };
+ delete $working->{ $uri };
+
+ if ( $status ) {
+ my $finished = $finished{ $cmd };
+ $finished->{ $uri } = $status;
+ }
+
+ $RSGet::FileList::reread = 1;
+}
+
+sub run
+{
+ my ( $cmd, $uri, $getter, $options ) = @_;
+ my $class = $getter->{class};
+ $cmd = "link" if $class eq "Link";
+ #p "run( $cmd, $uri, ... )";
+
+ my $finished = $finished{ $cmd };
+ my $f = $finished->{ $uri };
+ return $f if defined $f;
+ #p "-> not finished";
+
+ my $working = $working{ $cmd };
+ my $w = $working->{ $uri };
+ return $w if defined $w;
+ #p "-> not working";
+
+ my $pkg = $getter->{pkg};
+ my $outif = find_free_if( $pkg, $working, ($cmd eq "get" ? ($getter->{slots} || 1) : 5) );
+ return unless defined $outif;
+ #p "-> got if";
+
+ my $obj = RSGet::Get::new( $pkg, $cmd, $uri, $options, $outif );
+ $working->{ $uri } = $obj if $obj;
+ #p "run( $cmd, $uri, ... ) -> $obj" if $obj;
+
+ $RSGet::FileList::reread = 1;
+
+ return $obj;
+}
+
+sub process
+{
+ my $getlist = shift;
+
+ my %num_by_pkg;
+ my %all_uris;
+ foreach my $line ( @$getlist ) {
+ my ( $uris, $options ) = @$line;
+ foreach my $uri ( keys %$uris ) {
+ my $getter = $uris->{ $uri };
+ $all_uris{ $uri } = 1;
+ my $pkg = $getter->{pkg};
+ $num_by_pkg{ $pkg } ||= 0;
+ $num_by_pkg{ $pkg }++;
+ }
+ }
+ abort_missing( \%all_uris, $_ ) foreach values %working;
+ RSGet::Line::status(
+ 'to download' => scalar @$getlist,
+ 'downloading' => scalar keys %downloading,
+ 'resolving links' => scalar keys %resolving,
+ 'checking URIs' => scalar keys %checking,
+ );
+
+ my $all_valid = 1;
+ foreach my $line ( @$getlist ) {
+ my ( $uris, $options ) = @$line;
+ foreach my $uri ( keys %$uris ) {
+ my $getter = $uris->{ $uri };
+ my $ok = is_ok( $uri );
+ #p "$uri - $ok";
+ if ( not defined $ok ) {
+ run( "check", $uri, $getter, $options );
+ $all_valid = 0;
+ } elsif ( not $ok ) {
+ $all_valid = 0;
+ }
+ }
+
+ next unless $all_valid;
+
+ foreach my $uri ( sort {
+ my $a_pkg = $uris->{ $a }->{pkg};
+ my $b_pkg = $uris->{ $b }->{pkg};
+ $num_by_pkg{ $a_pkg } <=> $num_by_pkg{ $b_pkg }
+ } keys %$uris ) {
+ my $getter = $uris->{ $uri };
+ last if run( "get", $uri, $getter, $options );
+ }
+ }
+}
+
+sub abort_missing
+{
+ my $all = shift;
+ my $running = shift;
+ foreach ( keys %$running ) {
+ next if exists $all->{$_};
+ my $obj = $running->{$_};
+ $obj->{_abort} = "Removed from the list!";
+ }
+}
+
+sub done
+{
+ my $uri = shift;
+ my $getter = shift;
+
+ my $class = $getter->{class};
+ my $cmd = $class eq "Link" ? "link" : "get";
+
+ my $f = $finished{ $cmd }->{ $uri };
+ return $f if defined $f;
+ return undef;
+}
+
+1;
+
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/FileList.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/FileList.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,212 @@
+package RSGet::FileList;
+
+use strict;
+use warnings;
+use URI::Escape;
+use RSGet::Tools;
+our $file = 'get.list';
+our $reread = 1;
+our %uri_options; # options to be saved
+
+sub need_update
+{
+ $reread = 1;
+}
+
+sub words
+{
+ my $pre = shift;
+ my $before = shift;
+ return () unless @_;
+ my $line = "";
+ my $lline = $pre . $before . shift;
+ foreach ( @_ ) {
+ if ( length $lline . $_ > 76 ) {
+ $line .= "$lline \\\n";
+ $lline = $pre;
+ }
+ $lline .= " " . $_;
+ }
+
+ $lline = $line.$lline if $line;
+ return $lline."\n";
+}
+
+
+sub h2a($)
+{
+ my $h = shift;
+ return map { $_ . "=" . uri_escape( $h->{$_} ) } sort keys %$h;
+}
+
+sub getter
+{
+ my $uri = shift;
+ my @g = grep { $uri =~ m/^http:\/\/(:?www\.)?$_->{uri}/ } values %getters;
+ return undef unless @g;
+ return $g[0];
+}
+
+my @added_text;
+sub add_text
+{
+ my $type = shift;
+ my $text = shift;
+
+ return unless $text;
+ if ( $type eq "links" ) {
+ my @words = split /\s/s, $text;
+
+ foreach ( @words ) {
+ next unless m{^(http://)?(.*?)$};
+ my $proto = $1 || "http://";
+ my $uri = $proto . $2;
+ push @added_text, $uri . "\n" if getter($uri);
+ }
+
+ $reread = 2;
+ } elsif ( $type eq "comment" ) {
+ foreach ( split /\n/, $text ) {
+ s/\s+$//;
+ push @added_text, "# $_\n";
+ }
+ }
+ return \@added_text;
+}
+
+my $listmtime = 0;
+sub readlist
+{
+ return unless -r $file;
+ my $mtime = (stat _)[9];
+ return unless $reread or $mtime != $listmtime;
+ #p "readlist()";
+
+ my @getlist;
+ my @newlist;
+ open my $list, '<', $file;
+ while ( my $line = <$list> ) {
+ chomp $line;
+ if ( $line =~ /^\s*(#.*)?$/ ) { # comments and empty lines
+ push @newlist, $line . "\n";
+ next;
+ } elsif ( $line =~ /^__END__\s*$/ ) { # end of list
+ push @newlist, $line . "\n";
+ push @newlist, <$list>;
+ last;
+ }
+ while ( $line =~ s/\\$/ / ) { # stitch broken lines together
+ $line .= <$list>;
+ chomp $line;
+ }
+
+ $line =~ s/^\s+//;
+ $line =~ s/\s+$//;
+
+
+ my %uris;
+ my %options;
+ my @invalid;
+ my @invalid_uri;
+
+ # split line into words
+ foreach ( split /\s+/, $line ) {
+ if ( /^([a-z_]+)=(.*)$/ ) {
+ $options{$1} = uri_unescape( $2 );
+ } elsif ( m{^(http://)?(.*?)$} ) {
+ my $proto = $1 || "http://";
+ my $uri = $proto . $2;
+ if ( my $getter = getter($uri) ) {
+ $uris{ $uri } = $getter;
+ } elsif ( $uri =~ m{.+\.[a-z]{2,4}/.+} ) {
+ push @invalid_uri, $uri;
+ } else {
+ push @invalid, $_;
+ }
+ } else {
+ push @invalid, $_;
+ }
+ }
+
+ if ( not scalar keys %uris ) {
+ push @newlist, words(
+ "# ", "invalid line: ",
+ @invalid, @invalid_uri, h2a( \%options ),
+ );
+ next;
+ } elsif ( @invalid ) {
+ push @newlist, words(
+ "# ", "invalid words: ",
+ @invalid, @invalid_uri
+ );
+ } elsif ( @invalid_uri ) {
+ push @newlist, words(
+ "# ", "invalid uri: ",
+ @invalid_uri,
+ );
+ }
+
+ foreach my $uri ( sort keys %uris ) {
+ my $error = RSGet::Dispatch::is_error( $uri );
+ next unless $error;
+ delete $uris{ $uri };
+ push @newlist, "# $error:\n# $uri\n";
+ }
+
+ unless ( keys %uris ) {
+ push @newlist, words(
+ "#", "", h2a( \%options )
+ ) if keys %options;
+ next;
+ }
+
+ foreach my $uri ( sort keys %uris ) {
+ hadd \%options, %{$uri_options{ $uri }} if $uri_options{ $uri };
+ }
+
+ my $status;
+ foreach my $uri ( sort keys %uris ) {
+ next unless $status = RSGet::Dispatch::done( $uri, $uris{ $uri } );
+ $uri = "*" . $uri;
+ if ( ref $status and ref $status eq "ARRAY" ) {
+ push @newlist, words(
+ "#", " Link: ",
+ (sort keys %uris), h2a( \%options )
+ );
+ push @newlist, words( '', '', @$status );
+ } else {
+ push @newlist, words(
+ "# ", "$status:\n# ",
+ (sort keys %uris), h2a( \%options )
+ );
+ }
+ $reread = 2;
+ last;
+ }
+ next if $status;
+
+ push @newlist, words( '', '', (sort keys %uris), h2a( \%options ) );
+
+ push @getlist, [ \%uris, \%options ];
+ }
+ close $list;
+
+ unless ( -e ".${file}.swp" ) {
+ open my $newlist, '>', $file . ".tmp";
+ print $newlist @newlist;
+ print $newlist @added_text;
+ @added_text = ();
+ close $newlist || die "\nCannot update $file file: $!\n";
+ unlink $file;
+ rename $file . ".tmp", $file;
+ }
+
+ $reread = $reread == 2 ? 1 : 0;
+ $listmtime = (stat $file)[9];
+
+ return \@getlist;
+}
+
+1;
+
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/Get.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Get.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,301 @@
+package RSGet::Get;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+use URI;
+
+my %cookies;
+sub make_cookie
+{
+ my $c = shift;
+ return () unless $c;
+ $cookies{ $c } = 1 unless $cookies{ $c };
+ my $n = $cookies{ $c }++;
+
+ local $_ = ".cookie.$c.$n.txt";
+ unlink $_ if -e $_;
+ return _cookie => $_;
+}
+
+
+sub new
+{
+ my ( $pkg, $cmd, $uri, $options, $outif ) = @_;
+ my $getter = $getters{ $pkg };
+
+ my $self = {
+ _uri => $uri,
+ _opts => $options,
+ _try => 0,
+ _cmd => $cmd,
+ _pkg => $pkg,
+ _outif => $outif,
+ make_cookie( $getter->{cookie} ),
+ };
+ bless $self, $pkg;
+
+ if ( $cmd eq "get" ) {
+ my $outifstr = $outif ? "[$outif]" : "";
+ hadd $self,
+ _line => new RSGet::Line( "[$getter->{short}]$outifstr " ),
+ _name => $options->{fname} || ($uri =~ m{([^/]+)/*$})[0];
+ $self->print( "start" );
+ }
+
+ $self->start();
+ return $self;
+}
+
+sub DESTROY
+{
+ my $self = shift;
+ if ( my $c = $self->{_cookie} ) {
+ unlink $c;
+ }
+}
+
+sub print
+{
+ my $self = shift;
+ my $text = shift;
+ my $line = $self->{_line};
+ return unless $line;
+ $line->print( $self->{_name} . ": " . $text );
+}
+
+sub get
+{
+ my $self = shift;
+ $self->{after_curl} = shift;
+ my $uri = shift;
+
+ $uri = URI->new( $uri )->abs( $self->{_referer} )->as_string
+ if $self->{_referer};
+
+ RSGet::Curl::new( $uri, $self, @_ );
+}
+
+sub get_finish
+{
+ my $self = shift;
+ $self->{_referer} = shift;
+
+ my $func = $self->{after_curl};
+ $_ = $self->{body};
+ &$func( $self );
+}
+
+sub download
+{
+ my $self = shift;
+ my $uri = shift;
+ $self->print("starting download");
+ $self->get( \&finish, $uri, save => 1, @_ );
+}
+
+sub restart
+{
+ my $self = shift;
+ my $time = shift || 1;
+ my $msg = shift || "restarting";
+
+ return $self->wait( \&start, $time, $msg );
+}
+
+sub multi
+{
+ my $self = shift;
+ return $self->wait( 60 + 240 * rand, \&start, "multi-download not allowed, waiting" );
+}
+
+sub finish
+{
+ my $self = shift;
+
+ if ( $self->{is_html} ) {
+ $self->print( "is HTML" );
+ $_ = $self->{body};
+ return $self->stage_is_html();
+ }
+
+ RSGet::Dispatch::mark_used( $self );
+ RSGet::Dispatch::finished( $self, $self->{dlinfo} );
+}
+
+sub abort
+{
+ my $self = shift;
+ $self->print( $self->{_abort} || "aborted" );
+ RSGet::Dispatch::finished( $self );
+}
+
+sub error
+{
+ my $self = shift;
+ my $msg = shift;
+ if ( $self->{body} ) {
+ my $n = 0;
+ my $name;
+ do {
+ $name = "errorlog." . (++$n) . ".html";
+ } while ( -r $name );
+ open ERR_OUT, '>', $name;
+ print ERR_OUT $self->{body};
+ close ERR_OUT;
+
+ $msg .= "; saved $name";
+ }
+
+ $self->print( $msg );
+ RSGet::Dispatch::finished( $self, $msg );
+}
+
+sub start
+{
+ my $self = shift;
+ $self->clean();
+ return $self->stage0();
+}
+
+sub problem
+{
+ my $self = shift;
+ my $line = shift;
+ my $msg = $line ? "problem at line: $line" : "unknown problem";
+ if ( ++$self->{_try} < 8 ) {
+ return $self->wait( \&start, 2 ** $self->{_try}, $msg . ", waiting" );
+ } else {
+ return $self->error( $msg . ", aborting" );
+ }
+}
+
+sub clean
+{
+ my $self = shift;
+ foreach ( keys %$self ) {
+ delete $self->{$_} unless /^_/;
+ }
+ delete $self->{_referer};
+}
+
+sub info
+{
+ my $self = shift;
+ my %info = @_;
+ $info{name} = de_ml( $info{name} );
+ $info{kilo} ||= 1024;
+
+ $self->{_name} = $self->{_opts}->{fname} || $info{name};
+ return 0 unless $self->{_cmd} eq "check";
+ #p "info( $self->{_uri} ): $info{name}, $info{size}\n";
+ RSGet::Dispatch::finished( $self, \%info );
+ return 1;
+}
+
+sub search
+{
+ my $self = shift;
+ my %search = @_;
+
+ foreach my $name ( keys %search ) {
+ my $search = $search{$name};
+ if ( m/$search/ ) {
+ $self->{$name} = $1;
+ } else {
+ $self->problem( "Can't find '$name': $search" );
+ return 1;
+ }
+ }
+ return 0;
+}
+
+sub link
+{
+ my $self = shift;
+ my $links = [ @_ ];
+ RSGet::Dispatch::finished( $self, $links );
+ return 1;
+}
+
+sub set_fname
+{
+ my $self = shift;
+ my $fname = shift;
+ $self->{_name} = $fname;
+
+ my $opts = $RSGet::FileList::uri_options{ $self->{_uri} } ||= {};
+ hadd $opts,
+ fname => $fname;
+
+ $RSGet::FileList::reread = 1;
+}
+
+my %waiting;
+sub wait
+{
+ my $self = shift;
+ my $next_stage = shift;
+ my $wait = shift() + int rand 10;
+ my $msg = shift || "???";
+
+ my $time = time;
+ delete $self->{wait_until_should};
+
+ my $rnd_wait = int rand ( 5 * 60 ) + 2 * 60;
+ if ( $wait > $rnd_wait + 1 * 60 ) {
+ $self->{wait_until_should} = $time + $wait;
+ $wait = $rnd_wait;
+ }
+
+ $self->{wait_next} = $next_stage;
+ $self->{wait_msg} = $msg;
+ $self->{wait_until} = $time + $wait;
+
+ my $id = 0;
+ ++$id while exists $waiting{ $id };
+ $waiting{ $id } = $self;
+}
+
+sub wait_finish
+{
+ my $self = shift;;
+
+ delete $self->{body};
+ $_ = undef;
+
+ my $func = $self->{wait_next};
+ &$func( $self );
+}
+
+sub wait_update
+{
+ my $time = time;
+
+ foreach my $id ( keys %waiting ) {
+ my $obj = $waiting{ $id };
+ my $left = $obj->{wait_until} - $time;
+ if ( $left <= 0 ) {
+ delete $waiting{ $id };
+ $obj->print( $obj->{wait_msg} . "; done waiting" );
+ wait_finish( $obj );
+ } elsif ( $obj->{_abort} ) {
+ delete $waiting{ $id };
+ $obj->abort();
+ } else {
+ if ( $obj->{wait_until_should} ) {
+ $obj->print( sprintf "%s; should wait %s, retrying in %s",
+ $obj->{wait_msg},
+ s2string( $obj->{wait_until_should} - $time),
+ s2string( $left ) );
+ } else {
+ $obj->print( $obj->{wait_msg} . "; waiting " . s2string( $left ) );
+ }
+ }
+ }
+ RSGet::Line::status( 'waiting' => scalar keys %waiting );
+}
+
+1;
+
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/Line.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Line.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,192 @@
+package RSGet::Line;
+use strict;
+use warnings;
+use RSGet::Tools;
+use Term::Size;
+
+$| = 1;
+
+my %active;
+my %dead;
+my %status;
+our @active;
+our @dead;
+my $last_line = 0;
+
+my $last_day = -1;
+sub print_dead_lines
+{
+ my @l = localtime;
+ my $time = sprintf "[%.2d:%.2d:%.2d] ", @l[(2,1,0)];
+
+ my @print;
+
+ if ( $last_day != $l[3] ) {
+ $last_day = $l[3];
+ my $date = sprintf "[Actual date: %d-%.2d-%.2d]", $l[5] + 1900, $l[4] + 1, $l[3];
+ push @print, "\r" . $date . "\033[J\n";
+ push @dead, $date;
+ }
+
+
+ foreach my $key ( sort { $a <=> $b } keys %dead ) {
+ my $line = $dead{$key};
+ my $text = $line->[0] . $line->[1];
+ $text = $time . $text if $text =~ /\S/;
+
+ push @print, "\r" . $text . "\033[J\n";
+ push @dead, $text;
+ }
+
+ print @print;
+
+ my $max = 1000;
+ if ( scalar @dead > $max ) {
+ splice @dead, 0, $max - scalar @dead;
+ }
+
+ %dead = ();
+}
+
+sub print_status_lines
+{
+ my $columns = shift();
+ my $horiz = "-" x ($columns - 4);
+
+ my $date = "< ".isotime()." >";
+ my $date_l = length $date;
+ my $h = $horiz;
+ substr $h, int( (length($horiz) - $date_l ) / 2 ), $date_l, $date;
+ @active = ( " \\$h/ " );
+
+ my @status = ( "rsget.pl -- " );
+ foreach my $name ( sort keys %status ) {
+ my $value = $status{$name};
+ next unless $value;
+ my $s = "$name: $value; ";
+ if ( length $status[ $#status ] . $s > $columns - 5 ) {
+ push @status, $s;
+ } else {
+ $status[ $#status ] .= $s;
+ }
+ }
+ foreach ( @status ) {
+ my $l = " |" . ( " " x ($columns - 4 - length $_ )) . $_ . "| ";
+ push @active, $l;
+ }
+ push @active, " /$horiz\\ ";
+ my @print = map { "\r\n$_\033[K" } @active;
+ print @print;
+ return scalar @print;
+}
+
+
+sub print_active_lines
+{
+ my $columns = shift;
+ my @print;
+
+ foreach my $key ( sort { $a <=> $b } keys %active ) {
+ my $line = $active{$key};
+
+ my $text = $line->[1];
+ my $tl = length $line->[0] . $text;
+ substr $text, 4, $tl - $columns + 3, '...'
+ if $tl > $columns;
+ push @print, "\r\n" . $line->[0] . $text . "\033[K";
+ push @active, $line->[0] . $line->[1];
+ }
+
+ print @print;
+ return scalar @print;
+}
+
+sub print_all_lines
+{
+ my ( $columns, $rows ) = Term::Size::chars;
+ my $added = 0;
+ print_dead_lines();
+ $added += print_status_lines( $columns );
+ $added += print_active_lines( $columns );
+ return $added;
+}
+
+sub update
+{
+ my $added = print_all_lines();
+ print "\033[J\033[" . $added . "A\r" if $added;
+}
+
+sub new
+{
+ my $class = shift;
+ my $head = shift;
+ my $text = shift;
+ $head = "" unless defined $head;
+
+ my $line = "" . ($last_line++);
+ $active{ $line } = [ $head, "" ];
+
+ my $self = \$line;
+ bless $self, $class;
+ $self->print( $text );
+
+ return $self;
+}
+
+sub print
+{
+ my $self = shift;
+ my $line = $$self;
+ my $text = shift;
+ $text = "" unless defined $text;
+ $text =~ s/\n+$//sg;
+ $text =~ s/\n/ /sg;
+ $active{ $line }->[1] = $text;
+
+ return length $text;
+}
+
+sub DESTROY
+{
+ my $self = shift;
+ my $line = $$self;
+ $dead{ $line } = $active{ $line };
+ delete $active{ $line };
+}
+
+sub status
+{
+ hadd( \%status, @_ );
+}
+
+$SIG{INT} = sub {
+ print_all_lines();
+ print "\nTERMINATED\n";
+ exit 0;
+};
+
+$SIG{WINCH} = sub {
+ print "\033[2J\033[1;1H\n";
+ my ( $columns, $rows ) = Term::Size::chars;
+ my $start = $#dead - $rows;
+ $start = 0 if $start < 0;
+ print join( "\n", @dead[($start..$#dead)] ), "\n";
+ update();
+};
+
+$SIG{__WARN__} = sub {
+ new RSGet::Line( "WARNING: ", shift );
+ update();
+};
+
+$SIG{__DIE__} = sub {
+ print_all_lines();
+ print "\n";
+ print "DIED: ", shift, "\n\n";
+ exit 1;
+};
+
+1;
+
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/MicroHTTP.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/MicroHTTP.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,254 @@
+package RSGet::MicroHTTP;
+
+use strict;
+use warnings;
+use IO::Socket;
+use RSGet::Tools;
+
+my @template = <DATA>;
+our %data = (
+ last_lines => '',
+ status => '',
+ dl_list => '',
+);
+
+sub new
+{
+ my $class = shift;
+ my $port = shift;
+ my $socket = IO::Socket::INET->new(
+ Proto => 'tcp',
+ LocalPort => $port,
+ Listen => SOMAXCONN,
+ Reuse => 1,
+ Blocking => 0,
+ ) || return undef;
+
+ my $self = \$socket;
+ return bless $self, $class;
+}
+
+sub perform
+{
+ my $self = shift;
+ my $socket = $$self;
+
+ my @ret;
+
+ my $client = $socket->accept();
+ return () unless $client;
+
+ u_last_lines();
+ u_status();
+ u_dl_list();
+
+ push @ret, request( $client );
+
+ for ( my $i = 0; $i < 5; $i++ ) {
+ my $client = $socket->accept() or last;
+ push @ret, request( $client );
+ }
+
+ return @ret;
+}
+
+my $actual_client;
+
+sub request
+{
+ my $client = shift;
+ $actual_client = $client;
+ local $SIG{ALRM} = sub {
+ print $actual_client "Close !\n";
+ die "HTTP Frozen !\n";
+ };
+ alarm 5; # XXX: this must be fixed
+ my $request = <$client>;
+ unless ( $request ) {
+ close $client;
+ alarm 0;
+ return;
+ }
+ chomp $request;
+
+ my( $method, $file, $ignore ) = split /\s+/, $request;
+ p "HTTP request: $method: $file";
+
+ my $len = 0;
+ while ( $_ = <$client> ) {
+ $len = $1 if /^Content-Length:\s*(\d+)/i;
+ last if /^\s*$/;
+ }
+ if ( $len and $method =~ /^POST$/i ) {
+ my $r;
+ $client->read( $r, $len );
+ foreach ( split /&/, $r ) {
+ s/^(.*?)=//;
+ my $key = $1;
+ tr/+/ /;
+ s/%(..)/chr hex $1/eg;
+ RSGet::FileList::add_text( $key, $_ );
+ }
+ }
+
+ print $client "HTTP/1.1 200 OK\r\n";
+ print $client "Content-Type: text/html; charset=utf-8\r\n";
+ print $client "\r\n";
+ foreach my $line ( @template ) {
+ local $_ = $line;
+ s/\${([a-z_]+)}/$data{$1}/g;
+ print $client $_;
+ }
+ close $client;
+ alarm 0;
+
+ return 1;
+}
+
+sub u_last_lines
+{
+ my $out = "";
+ foreach my $line ( @RSGet::Line::dead ) {
+ local $_ = $line;
+ s/&/&/g;
+ s/</</g;
+ s/>/>/g;
+ s#(^|\s)(http://\S*)#$1<a href="$2">$2</a>#g;
+ $out .= "<li>$_</li>\n";
+ }
+ $data{last_lines} = $out;
+ return;
+}
+
+sub u_status
+{
+ my $out = "";
+ foreach my $line ( @RSGet::Line::active ) {
+ local $_ = $line;
+ s/&/&/g;
+ s/</</g;
+ s/>/>/g;
+ s#(^|\s)(http://\S*)#$1<a href="$2">$2</a>#g;
+ $out .= "<li>$_</li>\n";
+ }
+ $data{status} = $out;
+ return;
+}
+
+sub u_dl_list
+{
+ unless ( -r $RSGet::FileList::file ) {
+ $data{dl_list} = '<li></li>';
+ }
+
+ my $out = "";
+ open my $list, '<', $RSGet::FileList::file;
+ while ( $_ = <$list> ) {
+ chomp;
+ my $class = "";
+ $class = ' class="comment"' if /^\s*#/;
+ s/&/&/g;
+ s/</</g;
+ s/>/>/g;
+ s{(^|\s|#)(http://\S*)}{$1<a href="$2">$2</a>}g;
+ $out .= "<li$class>$_</li>\n";
+ }
+ close $list;
+
+ $data{dl_list} = $out;
+ return;
+}
+1;
+
+__DATA__
+<html>
+<head>
+ <title>rsget.pl</title>
+<style>
+html {
+ background: #333;
+}
+body {
+ width: 900px;
+ margin: 10px;
+ margin-left: auto;
+ margin-right: auto;
+ border: 10px solid #555;
+ padding: 5px;
+ background: #777;
+ font-family: monospace;
+}
+fieldset {
+ border: 10px solid #999;
+ padding: 5px;
+ margin: 5px;
+ background: #bbb;
+}
+input, textarea {
+ border: 10px solid #ddd;
+ padding: 5px;
+ margin: 5px;
+ background: #fff;
+}
+input {
+ width: 150px;
+ margin-left: 700px;
+}
+legend {
+ border: 10px solid #999;
+ border-top: 0;
+ border-bottom: 0;
+ background: #bbb;
+}
+ul {
+ border: 10px solid #ddd;
+ padding: 5px;
+ margin: 5px;
+ background: #fff;
+ list-style: none;
+}
+li:first-child {
+ border-top: 0;
+}
+li {
+ border-top: 2px solid #ddd;
+ white-space: pre;
+}
+li.comment {
+ color: #00F;
+}
+a, a:visited {
+ color: inherit;
+}
+</style>
+</head>
+<body>
+ <fieldset>
+ <legend>Last lines</legend>
+ <ul>${last_lines}</ul>
+ </fieldset>
+
+ <fieldset>
+ <legend>Status</legend>
+ <ul>${status}</ul>
+ </fieldset>
+
+ <fieldset>
+ <legend>Download list</legend>
+ <ul>${dl_list}</ul>
+ </fieldset>
+
+ <form action="" method="post">
+ <fieldset>
+ <legend>Extract links from text</legend>
+ <textarea cols="100" rows="16" name="links"></textarea>
+ <input type="submit" value="OK" />
+ </fieldset>
+ <fieldset>
+ <legend>Append whole text to download list</legend>
+ <textarea cols="100" rows="16" name="text"></textarea>
+ <input type="submit" value="OK" />
+ </fieldset>
+ </form>
+</body>
+</html>
Added: toys/rsget.pl/RSGet/Processor.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Processor.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,260 @@
+package RSGet::Processor;
+
+use strict;
+use warnings;
+use RSGet::Tools;
+
+my $options = "name|short|uri|slots|cookie|status";
+my $parts = "pre|start|perl";
+
+my $processed = "";
+sub pr(@)
+{
+ my $line = join "", @_;
+ $processed .= $line;
+ return length $line;
+}
+
+my $is_sub = 0;
+sub p_sub
+{
+ my $sub = shift;
+ pr "sub $sub {\n";
+ pr "\tmy \$self = shift;\n";
+ foreach ( @_ ) {
+ pr "\t$_;\n";
+ }
+ $is_sub++;
+}
+sub p_subend
+{
+ return unless $is_sub;
+ $is_sub--;
+ pr "\treturn \${self}->error( 'file is a html page' );\n}\n";
+}
+
+my $space;
+sub p_ret
+{
+ my $ret = shift;
+ my @opts = @_;
+ pr $space . "return \${self}->${ret}( ";
+ pr join( ", ", @opts ) . ", " if @opts;
+}
+
+sub p_line
+{
+ s/\$-{/\$self->{/g;
+ pr $_ . "\n";
+}
+
+
+sub read_file
+{
+ my $class = shift;
+ my $file = shift;
+
+ open F_IN, '<', $file;
+
+ my %opts;
+ my %parts = (
+ pre => [],
+ start => [],
+ perl => [],
+ );
+ my $part = undef;
+ while ( <F_IN> ) {
+ chomp;
+ next unless length;
+ next if /^\s*#/;
+
+ if ( $part ) {
+ unless ( /^\S+/ ) {
+ push @{$parts{$part}}, $_;
+ next;
+ }
+ if ( $part eq "perl" ) {
+ push @{$parts{perl}}, $_, <F_IN>;
+ last;
+ } elsif ( $part eq "start" and /^stage_.*?:/ ) {
+ push @{$parts{start}}, $_;
+ next;
+ }
+ $part = undef;
+ }
+
+ if ( /^($parts)\s*:/ ) {
+ $part = $1;
+ } elsif ( /^($options)\s*:\s+(.*)$/ ) {
+ $opts{$1} = $2;
+ }
+ }
+
+ close F_IN;
+ unless ( scalar @{$parts{start}} ) {
+ p "Can't find 'start:'\n";
+ return undef;
+ }
+ foreach ( qw(name short uri) ) {
+ next if $opts{$_};
+ p "Can't find '$_:'\n";
+ return undef;
+ }
+ $file =~ m{.*/(.*?)$};
+ my $fname = $1;
+ if ( $fname ne $opts{name} ) {
+ p "Name field: '$opts{name}' differs from file name: '$fname'\n";
+ return undef;
+ }
+ if ( $opts{status} and $opts{status} !~ /^OK(\s+.*)?$/ ) {
+ p "Marked as '$opts{status}'\n";
+ return undef;
+ }
+
+ $processed = "";
+ $space = "";
+ $is_sub = 0;
+
+ $opts{uri} = eval $opts{uri};
+ $opts{class} = ${class};
+ $opts{pkg} = "${class}::$opts{name}";
+
+ pr "package $opts{pkg};\n\n";
+ pr <<'EOF';
+ use strict;
+ use warnings;
+ use RSGet::Get;
+ use RSGet::Tools;
+
+ BEGIN {
+ our @ISA;
+ @ISA = qw(RSGet::Get);
+ }
+EOF
+
+ pr join "\n", @{$parts{pre}}, "\n";
+
+ my $stage = 0;
+ p_sub( "stage0" );
+ my @machine = @{$parts{start}};
+ while ( $_ = shift @machine ) {
+ s/^(\s+)//;
+ $space = $1;
+
+ if ( s/^GET\s*\(// ) {
+ my $next_stage = "stage" . ++$stage;
+ my @skip;
+ push @skip, $_;
+ until ( /;\s*$/ ) {
+ $_ = shift @machine;
+ push @skip, $_;
+ }
+ if ( $machine[0] =~ s/^(stage_.*?):\s*$// ) {
+ $next_stage = $1;
+ shift @machine;
+ }
+ p_ret( "get", "\\&$next_stage" );
+ foreach ( @skip ) {
+ p_line();
+ }
+ p_subend();
+ p_sub( $next_stage );
+ } elsif ( s/^GET_NEXT\s*\(\s*(.*?)\s*,// ) {
+ my $next_stage = $1;
+ p_ret( "get", "\\&$1" );
+ p_line();
+ } elsif ( s/^ERROR\s*\(// ) {
+ p_ret( "error" );
+ p_line();
+ } elsif ( s/^INFO\s*\(// ) {
+ pr $space . 'return "info" if $self->info( ';
+ p_line();
+ } elsif ( s/^SEARCH\s*\(// ) {
+ pr $space . 'return if $self->search( ';
+ p_line();
+ } elsif ( s/^WAIT\s*\(// ) {
+ my $next_stage = "stage" . ++$stage;
+ my @skip;
+ push @skip, $_;
+ until ( /;\s*$/ ) {
+ $_ = shift @machine;
+ push @skip, $_;
+ }
+ if ( $machine[0] =~ s/^(stage_.*?):\s*$// ) {
+ $next_stage = $1;
+ shift @machine;
+ }
+ p_ret( "wait", "\\&$next_stage" );
+ foreach ( @skip ) {
+ p_line();
+ }
+ p_subend();
+ p_sub( $next_stage );
+ } elsif ( s/^WAIT_NEXT\s*\(\s*(.*?)\s*,// ) {
+ my $next_stage = $1;
+ p_ret( "wait", "\\&$next_stage" );
+ p_line();
+ } elsif ( s/^RESTART\s*\(\s*// ) {
+ p_ret( "restart" );
+ p_line();
+ } elsif ( s/^DOWNLOAD\s*\(\s*// ) {
+ p_ret( "download" );
+ p_line();
+ until ( /;\s*$/ ) {
+ $_ = shift @machine;
+ p_line();
+ }
+ p_subend();
+ p_sub( "stage_is_html" );
+ } elsif ( s/^LINK\s*\(\s*// ) {
+ p_ret( "link" );
+ p_line();
+ until ( /;\s*$/ ) {
+ $_ = shift @machine;
+ p_line();
+ }
+ p_subend();
+ } elsif ( s/^MULTI\s*\(// ) {
+ p_ret( "multi" );
+ p_line();
+ } elsif ( s/^!\s+// ) {
+ my $line = quotemeta $_;
+ pr $space . 'return $self->problem( "'. $line .'" ) unless ';
+ p_line();
+ } else {
+ pr $space;
+ p_line();
+ }
+ }
+ p_subend();
+
+ pr @{$parts{perl}};
+ pr "1;";
+
+ my $ret;
+ {
+ local $SIG{__DIE__} = undef;
+ $ret = eval $processed;
+ }
+
+ if ( $@ ) {
+ p "Error(s): $@\n";
+ my $err = $@;
+ return undef unless $err =~ /line \d+/;
+ my @p = split /\n/, $processed;
+ for ( my $i = 0; $i < scalar @p; $i++ ) {
+ my $n = $i + 1;
+ p sprintf "%s%4d: %s\n",
+ ($err =~ /line $n[^\d]/ ? "!" : " "),
+ $n,
+ $p[ $i ];
+ }
+ return undef;
+ }
+
+ return $opts{pkg} => \%opts if $ret and $ret == 1;
+ return ();
+}
+
+1;
+# vim:ts=4:sw=4
Added: toys/rsget.pl/RSGet/Tools.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Tools.pm Thu Aug 27 15:23:19 2009
@@ -0,0 +1,97 @@
+package RSGet::Tools;
+
+use strict;
+use warnings;
+use vars qw(@ISA @EXPORT @EXPORT_OK);
+
+require Exporter;
+ at ISA = qw(Exporter);
+ at EXPORT = qw(s2string bignum de_ml hadd hprint p isotime require_prog
+ randomize %getters %settings);
+ at EXPORT_OK = qw();
+
+our %settings;
+our %getters;
+
+sub s2string($)
+{
+ my $s = shift;
+ my $minutes = int( $s / 60 );
+ my $seconds = $s % 60;
+
+ if ( $minutes >= 60 ) {
+ my $hours = int( $minutes / 60 );
+ $minutes %= 60;
+ return sprintf '%d:%.2d:%.2d', $hours, $minutes, $seconds;
+ } else {
+ return sprintf '%d:%.2d', $minutes, $seconds;
+ }
+}
+
+sub bignum($)
+{
+ local $_ = shift;
+ return $_ if /[^\d]/;
+ s/(..?.?)(?=(...)+$)/$1_/g;
+ return $_;
+}
+
+sub hadd(%@)
+{
+ my $h = shift;
+ my %new = @_;
+ foreach ( keys %new ) {
+ $h->{$_} = $new{$_};
+ }
+}
+
+
+sub p($)
+{
+ require RSGet::Line;
+ new RSGet::Line( "INFO: ", shift );
+}
+
+sub hprint(%)
+{
+ my $h = shift;
+ foreach ( keys %$h ) {
+ p "$_ => $h->{$_}";
+ }
+}
+
+sub randomize
+{
+ # not really good, but works
+ return sort { 0.5 <=> rand } @_;
+}
+
+
+sub isotime()
+{
+ my @l = localtime;
+ return sprintf "%d-%.2d-%.2d %2d:%.2d:%.2d", $l[5] + 1900, $l[4] + 1, @l[(3,2,1,0)];
+}
+
+sub de_ml
+{
+ local $_ = shift;
+ s/≤/</g;
+ s/≥/>/g;
+ s/"/"/g;
+ s/&/&/g;
+ return $_;
+}
+
+sub require_prog
+{
+ my $prog = shift;
+ foreach my $dir ( split /:+/, $ENV{PATH} ) {
+ my $full = "$dir/$prog";
+ return $full if -x $full;
+ }
+ return undef;
+}
+
+1;
+# vim:ts=4:sw=4
Modified: toys/rsget.pl/rsget.pl
==============================================================================
--- toys/rsget.pl/rsget.pl (original)
+++ toys/rsget.pl/rsget.pl Thu Aug 27 15:23:19 2009
@@ -1,2134 +1,98 @@
#!/usr/bin/perl
#
-# 2008 (c) Przemysław Iskra <sparky at pld-linux.org>
+# 2009 (c) Przemysław Iskra <sparky at pld-linux.org>
# Use/modify/distribute under GPL v2 or newer.
#
-=item TODO:
-
-- removing URI from list should stop download
-- new URI higher in the list should replace any connection
- to the same network if still in the wait stage
-- check all the URIs just after finding them in the list
- (catch non-existing files quickly)
-- OdSiebie: there is a captcha now
-
-=item Status:
-- RS: 2009-08-12 OK
-- NL: 2009-08-12 OK, captcha works
-- OS: not working, captcha not supported
-- MU: 2009-08-12 OK, captcha works, requires mu_font_db.png
-- UT: 2009-06-07 OK
-- HF: captcha not supported
-- FF: 2009-08-12 OK
-- DF: 2009-08-12 OK
-- TU: 2009-08-12 OK
-- ST: 2009-08-12 OK
-
-=item Wishlist:
-- handle multiple alternatives for same file
-- add more servers
-
-=cut
-use strict;
-use warnings;
-use Time::HiRes;
-
-our $data_path = $ENV{PWD};
-
-my $checklist = 1;
-my %gotlist;
-$SIG{CHLD} = "IGNORE";
-
-my %getters;
-
-package Line; # {{{
-use Term::Size;
-
-$| = 1;
-my $actual_line = 0;
-my $max_line = 0;
-
-my $columns = Term::Size::chars;
-
-sub new
-{
- my $proto = shift;
- my $class = ref( $proto ) || $proto;
-
- my $steps = $max_line - $actual_line;
- $actual_line = $max_line;
- my $move = "";
-
- if ( $steps < 0 ) {
- return undef;
- } elsif ( $steps > 0 ) {
- $move = "\033[" . $steps . "B";
- }
-
- print $move . "\n\r\033[K";
-
- my $line = $max_line++;
- my $self = \$line;
- return bless $self, $class;
-}
-
-sub print
-{
- my $self = shift;
- my $text = shift;
- my $line = $$self;
-
- return undef if $line >= $max_line;
- my $steps = $line - $actual_line;
- $actual_line = $line;
- my $move = "";
-
- if ( $steps < 0 ) {
- $move = "\033[" . (-$steps) . "A";
- } elsif ( $steps > 0 ) {
- $move = "\033[" . $steps . "B";
- }
- my $tl = length $text;
- substr $text, 22, $tl - $columns + 3, '...'
- if $tl > $columns;
-
- print $move . "\r\033[K" . $text;
-}
-
-# }}}
-package Curl; # {{{
-use WWW::Curl::Easy;
-use WWW::Curl::Multi;
-use URI::Escape;
-
-my $curl_headers = [
- 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
- 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
- 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- 'Accept-Language: en-us,en;q=0.5',
- ];
-
-sub file_init
-{
- my $self = shift;
- my $curl = $self->{curl};
-
- $self->{time_start} = time;
-
- {
- my $mime = $curl->getinfo( CURLINFO_CONTENT_TYPE );
- if ( $mime =~ m#^text/html# ) {
- $self->{is_html} = 1;
- $self->{size_total} = 0;
- $self->{size_got} = 0;
- return;
- }
- }
-
- {
- my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
- $self->{size_total} = $f_len || 0;
- $self->{size_got} = 0;
- }
-
- if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
- $self->{file_name} = $1;
- } else {
- my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
- $eurl =~ s#^.*/##;
- $self->{file_name} = uri_unescape( $eurl );
- }
-
- {
- my $fn = $self->{file_name};
- if ( -r $fn ) {
- my $fn_old = $fn;
- my $ext = "";
- $ext = $1 if $fn_old =~ s/(\..{3,5})$//;
- my $i = 1;
- while ( -r "$fn_old-$i$ext" ) {
- $i++;
- }
- $fn_old .= "-$i$ext";
- rename $fn, $fn_old;
- }
- my $net = $self->{obj}->{net};
- $self->{obj}->{netname} =~ s/] .*/] $fn: /;
- }
-
- {
- open my $f_out, '>', $self->{file_name};
- $self->{file} = $f_out;
- }
-}
-
-sub body_file
-{
- my ($chunk, $self) = @_;
-
- file_init( $self ) unless exists $self->{size_total};
-
- my $len = length $chunk;
- $self->{size_got} += $len;
-
- if ( $self->{file} ) {
- my $file = $self->{file};
- my $p = print $file $chunk;
- die "\nCannot write data: $!\n" unless $p;
- } else {
- $self->{body} .= $chunk;
- }
-
- return $len;
-}
-
-sub body_scalar
-{
- my ($chunk, $scalar) = @_;
- $$scalar .= $chunk;
- return length $chunk;
-}
-
-
-my $mcurl = new WWW::Curl::Multi;
-my %curling;
-
-sub start
-{
- my $url = shift;
- my $obj = shift;
- my %opts = @_;
-
- my $curl = new WWW::Curl::Easy;
-
- my $id = 1;
- ++$id while exists $curling{ $id };
-
- my $ecurl = {
- curl => $curl,
- id => $id,
- got => 0,
- head => "",
- body => "",
- obj => $obj,
- };
-
- $curl->setopt( CURLOPT_PRIVATE, $id );
- if ( $obj->{outaddr} ) {
- $curl->setopt( CURLOPT_INTERFACE, $obj->{outaddr} );
- }
-
- if ( defined $opts{cookies} ) {
- $curl->setopt( CURLOPT_COOKIEJAR, $opts{cookies} );
- $curl->setopt( CURLOPT_COOKIEFILE, $opts{cookies} );
- }
- $curl->setopt( CURLOPT_HEADERFUNCTION, \&body_scalar );
- $curl->setopt( CURLOPT_WRITEHEADER, \$ecurl->{head} );
- $curl->setopt( CURLOPT_MAXREDIRS, 10 );
- $curl->setopt( CURLOPT_FOLLOWLOCATION, 1 );
- $curl->setopt( CURLOPT_HTTPHEADER, $curl_headers );
- $curl->setopt( CURLOPT_URL, $url );
- $curl->setopt( CURLOPT_REFERER, $opts{referer} )
- if defined $opts{referer};
- $curl->setopt( CURLOPT_ENCODING, 'gzip,deflate' );
- $curl->setopt( CURLOPT_CONNECTTIMEOUT, 20 );
-
- if ( $opts{post} ) {
- my $post = $opts{post};
- $curl->setopt( CURLOPT_POST, 1 );
- if ( ref $post and ref $post eq "HASH" ) {
- $post = join "&",
- map { uri_escape( $_ ) . "=" . uri_escape( $post->{$_} ) }
- sort keys %$post;
- }
- $curl->setopt( CURLOPT_POSTFIELDS, $post );
- }
-
- if ( $opts{save} ) {
- $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
- $curl->setopt( CURLOPT_WRITEDATA, $ecurl );
- } else {
- $ecurl->{is_html} = 1;
- $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
- $curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
- }
-
- $curling{ $id } = $ecurl;
- $mcurl->add_handle( $curl );
-}
-
-sub finish
-{
- my $id = shift;
- my $err = shift;
-
- my $ecurl = $curling{ $id };
- delete $curling{ $id };
-
- my $curl = $ecurl->{curl};
- delete $ecurl->{curl}; # remove circular dep
-
- my $obj = $ecurl->{obj};
- delete $ecurl->{obj};
-
- if ( $ecurl->{file} ) {
- close $ecurl->{file};
- $obj->print( donemsg( $ecurl ) );
- }
-
- if ( $err ) {
- my $error = $curl->errbuf;
- $obj->print( "error($err): $error" );
- $obj->problem();
- return undef;
- }
-
- if ( $obj->{curl_next} ) {
- my $func = $obj->{curl_next};
- my $body = $ecurl->{file}
- ? "DONE $ecurl->{file_name} " . main::bignum( $ecurl->{size_got} )
- . " / " . main::bignum( $ecurl->{size_total} )
- : $ecurl->{body};
- my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
-
- &$func( $obj, $body, $eurl, $ecurl->{is_html} );
- }
-}
-
-sub perform
-{
- my $running = scalar keys %curling;
- return unless $running;
- my $act = $mcurl->perform();
- return if $act == $running;
-
- while ( my ($id, $rv) = $mcurl->info_read() ) {
- next unless $id;
-
- finish( $id, $rv );
- }
-}
-
-sub print_status
-{
- my $time = time;
-
- foreach my $ecurl ( values %curling ) {
- next unless exists $ecurl->{size_total};
- my $size_got = $ecurl->{size_got};
- my $size_total = $ecurl->{size_total};
-
- my $size = main::bignum( $size_got ) . " / " . main::bignum( $size_total );
- my $eta = "";
- my $time_diff = $time - $ecurl->{time_start};
- if ( $size_total > 0 ) {
- $size .= sprintf " [%.2f%%]", $size_got * 100 / $size_total;
- if ( $time_diff > 0 ) {
- my $tleft = ($size_total - $size_got) * $time_diff / $size_got;
- $eta = main::s2string( $tleft );
- }
- }
- my $speed = "???";
- $speed = sprintf "%.2f", $size_got / ( $time_diff * 1024 )
- if $time_diff > 0;
-
- $ecurl->{obj}->print( "$size; ${speed}KB/s $eta" );
- }
-}
-
-sub donemsg
-{
- my $ecurl = shift;
-
- my $size_total = $ecurl->{size_got};
-
- my $time_diff = time() - $ecurl->{time_start};
- $time_diff = 1 unless $time_diff;
- my $eta = main::s2string( $time_diff );
- my $speed = sprintf "%.2f", $size_total / ( $time_diff * 1024 );
-
- my @l = localtime;
- my $date = sprintf "%d-%.2d-%.2d %2d:%.2d:%.2d", $l[5] + 1900, $l[4] + 1, @l[(3,2,1,0)];
- return "DONE " . main::bignum( $size_total ) . "; ${speed}KB/s $eta @ $date";
-}
-
-
-# }}}
-package Wait; # {{{
-
-my %waiting;
-
-sub start
-{
- my $obj = shift;
- my $time = shift;
- $obj->{wait_until} = time + $time;
-
- my $id = 0;
- ++$id while exists $waiting{ $id };
- $waiting{ $id } = $obj;
-}
-
-sub finish
-{
- my $id = shift;
-
- my $obj = $waiting{ $id };
- delete $waiting{ $id };
-
- my $func = $obj->{wait_next};
-
- &$func( $obj );
-}
-
-sub perform
-{
- my $time = time;
-
- foreach my $id ( keys %waiting ) {
- my $obj = $waiting{ $id };
- my $left = $obj->{wait_until} - $time;
- if ( $left <= 0 ) {
- finish( $id );
- } else {
- $obj->print( $obj->{wait_msg} . main::s2string( $left ) );
- }
- }
-}
-
-# }}}
-package Get; # {{{
-
-use URI;
-my @outaddr;
-
-sub add_outaddr
-{
- my $newaddr = shift;
- NEW_IP: foreach my $ip ( split /[ ,]+/, $newaddr ) {
- foreach my $outaddr ( @outaddr ) {
- if ( $ip eq $outaddr ) {
- print "Address $ip already on the list\n";
- next NEW_IP;
- }
- }
- print "Adding $ip address\n";
- push @outaddr, $ip;
- }
-}
-
-my %running;
-sub makenew
-{
- my $net = shift;
- my $class = shift;
- my $url = shift;
- my $slots = 1;
- if ( scalar @_ and $_[0] eq "slots" ) {
- shift;
- $slots = shift;
- }
- if ( scalar @outaddr > $slots ) {
- $slots = scalar @outaddr;
- }
-
- my @opts = split /\s+/, $url;
- $url = shift @opts;
- my %opts = map { /(.*?)=(.*)/ ? ( $1, $2 ) : ( $_, 1 ) } @opts;
-
- return {} if $gotlist{ $url };
- $running{ $net } = {} unless exists $running{ $net };
- my $rn = $running{ $net };
- return {} if $slots <= scalar keys %$rn;
- foreach my $id ( keys %$rn ) {
- if ( $rn->{ $id }->{url} eq $url ) {
- return {};
- }
- }
-
- my $outaddr = undef;
- if ( scalar @outaddr ) {
- FIND_IP: foreach my $maybe_outaddr ( @outaddr ) {
- foreach my $id ( keys %$rn ) {
- if ( $rn->{ $id }->{outaddr} eq $maybe_outaddr ) {
- next FIND_IP;
- }
- }
- $outaddr = $maybe_outaddr;
- last;
- }
- # no IP found ?
- return {}
- unless defined $outaddr;
- }
- my $outaddrstr = $outaddr ? "[$outaddr]" : "";
-
- my $id = 1;
- ++$id while exists $rn->{ $id };
-
- my $line = new Line;
-
- ( my $fn = $url ) =~ s{/+$}{};
- $fn =~ s#^.*/##;
-
- my $self = {
- @_,
- url => $url,
- opts => \%opts,
- id => $id,
- try => 0,
- line => $line,
- net => $net,
- netname => "[$net]$outaddrstr $fn: ",
- outaddr => $outaddr,
- };
-
- $rn->{ $id } = bless $self, $class;
-
- $self->stage1();
- return $self;
-}
-
-sub print
-{
- my $self = shift;
- my $text = shift;
- my $line = $self->{line};
- $line->print( $self->{netname} . $text );
-}
<<diff output has been trimmed to 500 lines, 1707 line(s) remained.>>
More information about the pld-cvs-commit
mailing list