SVN: toys/rsget.pl/RSGet: Captcha.pm Dispatch.pm FileList.pm Get.pm HTTPRequest.pm ListAdder.pm Main...

sparky sparky at pld-linux.org
Thu Oct 29 03:48:11 CET 2009


Author: sparky
Date: Thu Oct 29 03:48:11 2009
New Revision: 10874

Added:
   toys/rsget.pl/RSGet/Plugin.pm
      - copied, changed from rev 10863, toys/rsget.pl/RSGet/Processor.pm
   toys/rsget.pl/RSGet/Processor.pm
Modified:
   toys/rsget.pl/RSGet/Captcha.pm
   toys/rsget.pl/RSGet/Dispatch.pm
   toys/rsget.pl/RSGet/FileList.pm
   toys/rsget.pl/RSGet/Get.pm
   toys/rsget.pl/RSGet/HTTPRequest.pm
   toys/rsget.pl/RSGet/ListAdder.pm
   toys/rsget.pl/RSGet/Main.pm
   toys/rsget.pl/RSGet/Tools.pm
Log:
- compile plugins on demand, lowers memory usage in typical cases


Modified: toys/rsget.pl/RSGet/Captcha.pm
==============================================================================
--- toys/rsget.pl/RSGet/Captcha.pm	(original)
+++ toys/rsget.pl/RSGet/Captcha.pm	Thu Oct 29 03:48:11 2009
@@ -109,7 +109,7 @@
 		return;
 	}
 
-	my $getter = $getters{ $self->{_pkg} };
+	my $getter = RSGet::Plugin::from_pkg( $self->{_pkg} );
 	my $dir = "captcha/$getter->{short}/$subdir";
 	mkpath( $dir ) unless -d $dir;
 

Modified: toys/rsget.pl/RSGet/Dispatch.pm
==============================================================================
--- toys/rsget.pl/RSGet/Dispatch.pm	(original)
+++ toys/rsget.pl/RSGet/Dispatch.pm	Thu Oct 29 03:48:11 2009
@@ -161,7 +161,11 @@
 	my $outif = find_free_if( $pkg, $working, get_slots( $cmd, $getter->{slots} ) );
 	return unless defined $outif;
 
-	my $obj = RSGet::Get::new( $pkg, $cmd, $uri, $options, $outif );
+	my $obj = $getter->start( $cmd, $uri, $options, $outif );
+	if ( not $obj and $getter->{error} ) {
+		$options->{error} = $getter->{error};
+		return;
+	}
 	$working->{ $uri } = $obj if $obj;
 	
 	return $obj;
@@ -275,31 +279,6 @@
 	}
 }
 
-sub getter
-{
-	my $uri = shift;
-	foreach my $getter ( values %getters ) {
-		foreach my $re ( @{ $getter->{uri} } ) {
-			return $getter
-				if $uri =~ m{^http://(?:www\.)?$re};
-		}
-	}
-	return undef;
-}
-
-sub unigetter
-{
-	my $uri = shift;
-	my $getter = getter( $uri );
-	if ( $getter ) {
-		my $unify = $getter->{unify};
-		$uri = &$unify( $uri );
-		return $getter, $uri;
-	}
-	return undef, $uri;
-}
-
-
 1;
 
 # vim: ts=4:sw=4

Modified: toys/rsget.pl/RSGet/FileList.pm
==============================================================================
--- toys/rsget.pl/RSGet/FileList.pm	(original)
+++ toys/rsget.pl/RSGet/FileList.pm	Thu Oct 29 03:48:11 2009
@@ -165,8 +165,10 @@
 				next;
 			} elsif ( m{^(http://)?(.*?)$} ) {
 				my $proto = $1 || "http://";
-				my ( $getter, $uri ) = RSGet::Dispatch::unigetter( $proto . $2 );
+				my $uri = $proto . $2;
+				my $getter = RSGet::Plugin::from_uri( $uri );
 				if ( $getter ) {
+					$uri = $getter->unify( $uri );
 					$options = {};
 					$decoded{ $uri } = [ $getter, $options ];
 					next;

Modified: toys/rsget.pl/RSGet/Get.pm
==============================================================================
--- toys/rsget.pl/RSGet/Get.pm	(original)
+++ toys/rsget.pl/RSGet/Get.pm	Thu Oct 29 03:48:11 2009
@@ -48,21 +48,20 @@
 
 sub new
 {
-	my ( $pkg, $cmd, $uri, $options, $outif ) = @_;
-	my $getter = $getters{ $pkg };
+	my ( $getter, $cmd, $uri, $options, $outif ) = @_;
 
 	my $self = {
 		_uri => $uri,
 		_opts => $options,
 		_try => 0,
 		_cmd => $cmd,
-		_pkg => $pkg,
+		_pkg => $getter->{pkg},
 		_outif => $outif,
 		_id => (sprintf "%.6x", int rand 1 << 24),
 		_last_dump => 0,
 		make_cookie( $getter->{cookie}, $cmd ),
 	};
-	bless $self, $pkg;
+	bless $self, $getter->{pkg};
 	$self->bestinfo();
 
 	if ( verbose( 2 ) or $cmd eq "get" ) {
@@ -94,7 +93,7 @@
 	return unless $line;
 
 	my $outifstr = $self->{_outif} ? "[$self->{_outif}]" :  "";
-	my $getter = $getters{ $self->{_pkg} };
+	my $getter = RSGet::Plugin::from_pkg( $self->{_pkg} );
 	new RSGet::Line( "[$getter->{short}]$outifstr ", $self->{_name} . ": " . $text );
 }
 

Modified: toys/rsget.pl/RSGet/HTTPRequest.pm
==============================================================================
--- toys/rsget.pl/RSGet/HTTPRequest.pm	(original)
+++ toys/rsget.pl/RSGet/HTTPRequest.pm	Thu Oct 29 03:48:11 2009
@@ -567,7 +567,7 @@
 	my $r = '<fieldset id="f_listask"><legend>Select clone</legend>'
 		. '<ul class="flist">';
 	my ( $uri, $options, $clones ) = @$ask;
-	my $getter = RSGet::Dispatch::getter( $uri );
+	my $getter = RSGet::Plugin::from_uri( $uri );
 
 	my $list_ids = $list->{ids};
 	$list_ids->{addclone} = { uri => $uri };
@@ -581,7 +581,7 @@
 				name => $ucd->[1],
 				size => $ucd->[3],
 			};
-			my $getter = RSGet::Dispatch::getter( $uri );
+			my $getter = RSGet::Plugin::from_uri( $uri );
 			$r .= file_info( $list_ids, "addclone", $uri, $getter, $options, ['SELECT'] );
 		}
 	}

Modified: toys/rsget.pl/RSGet/ListAdder.pm
==============================================================================
--- toys/rsget.pl/RSGet/ListAdder.pm	(original)
+++ toys/rsget.pl/RSGet/ListAdder.pm	Thu Oct 29 03:48:11 2009
@@ -73,8 +73,9 @@
 			}
 		}
 
-		(my $getter, $uri) = RSGet::Dispatch::unigetter( $uri );
+		my $getter = RSGet::Plugin::from_uri( $uri );
 		next unless $getter;
+		$uri = $getter->unify( $uri );
 		next if exists $all_uris{ $uri };
 		$all_uris{ $uri } = 1;
 		my $options = {};
@@ -132,8 +133,9 @@
 				if ( my $links = $save->{links} ) {
 					my @new;
 					foreach my $luri ( @$links ) {
-						my ($getter, $uri) = RSGet::Dispatch::unigetter( $luri );
+						my $getter = RSGet::Plugin::from_uri( $luri );
 						if ( $getter ) {
+							my $uri = $getter->unify( $luri );
 							push @new, { cmd => "ADD", globals => {}, uris => { $uri => [ $getter, {} ] } };
 						} else {
 							push @new, "# unsupported uri: $uri";

Modified: toys/rsget.pl/RSGet/Main.pm
==============================================================================
--- toys/rsget.pl/RSGet/Main.pm	(original)
+++ toys/rsget.pl/RSGet/Main.pm	Thu Oct 29 03:48:11 2009
@@ -16,7 +16,7 @@
 use RSGet::MortalObject;
 use RSGet::Line;
 use RSGet::ListManager;
-use RSGet::Processor;
+use RSGet::Plugin;
 use RSGet::Tools;
 use RSGet::Wait;
 use Time::HiRes;
@@ -239,32 +239,16 @@
 		foreach my $type ( qw(Get Link Video) ) {
 			my $dir = "$path/$type";
 			next unless -d $dir;
+			my $count = 0;
 			foreach ( sort glob "$path/$type/*" ) {
-				add_getter( $type, $_ );
+				$count += RSGet::Plugin::add( $type, $_ );
 			}
+			new RSGet::Line( "INIT: ", "$dir: found $count new plugins\n" )
+				if $count;
 		}
 	}
 }
 
-sub add_getter
-{
-	my $type = shift;
-	local $_ = shift;
-	return if /~$/;
-	return if m{/\.[^/]*$};
-	( my $file = $_ ) =~ s#.*/##;
-	return if exists $getters{ $type . "::" . $file };
-	my ( $pkg, $getter ) = RSGet::Processor::read_file( $type, $_ );
-	my $msg = "${type}/$file: failed";
-	if ( $pkg and $getter ) {
-		$getters{ $pkg } = $getter;
-		$msg = "$pkg: added\n";
-		new RSGet::Line( "INIT: ", $msg );
-	} else {
-		warn "$msg\n";
-	}
-}
-
 sub loop
 {
 	# main loop

Copied: toys/rsget.pl/RSGet/Plugin.pm (from rev 10863, toys/rsget.pl/RSGet/Processor.pm)
==============================================================================
--- toys/rsget.pl/RSGet/Processor.pm	(original)
+++ toys/rsget.pl/RSGet/Plugin.pm	Thu Oct 29 03:48:11 2009
@@ -1,4 +1,4 @@
-package RSGet::Processor;
+package RSGet::Plugin;
 # This file is an integral part of rsget.pl downloader.
 #
 # 2009 (c) Przemysław Iskra <sparky at pld-linux.org>
@@ -7,271 +7,274 @@
 
 use strict;
 use warnings;
+use RSGet::Processor;
 use RSGet::Tools;
 set_rev qq$Id$;
 
-my $options = "name|short|slots|cookie|status|min_ver";
+my %getters;
 
-my $processed = "";
-sub pr(@)
+sub read_file($)
 {
-	my $line = join "", @_;
-	$processed .= $line;
-	return length $line;
-}
-
-my $is_sub = 0;
-my $last_cmd = undef;
-sub p_sub
-{
-	my $sub = shift;
-	pr "sub $sub {\n";
-	pr "\tmy \$self = shift;\n";
-	foreach ( @_ ) {
-		pr "\t$_;\n";
-	}
-	$is_sub++;
-}
-sub p_subend
-{
-	return unless $is_sub;
-	$is_sub--;
-
-	my $error = 'unexpected end of script';
-	if ( $last_cmd and $last_cmd =~ /(?:click_)?download/ ) {
-		$error = 'download is a HTML page';
-	}
-	$last_cmd = undef;
-	pr "\treturn \${self}->error( '$error' );\n}\n";
-}
-
-my $space;
-sub p_ret
-{
-	my $ret = shift;
-	my @opts = @_;
-	pr $space . "return \${self}->${ret}( ";
-	pr join( ", ", @opts ) . ", " if @opts;
-}
-
-sub p_func
-{
-	my $f = shift;
-	pr $space . "\${self}->$f(";
-}
-
-sub p_line
-{
-	s/\$-{/\$self->{/g;
-	pr $_ . "\n";
-}
-
-
-sub read_file
-{
-	my $class = shift;
-	my $file = shift;
+	my $self = shift;
+	my $file = $self->{file};
 
-	open F_IN, '<', $file;
+	open F_IN, '<', $file or return;
 
 	my %opts = (
 		uri => [],
+		map { $_ => undef } qw(name short slots cookie status),
 	);
+	my $opts = join "|", keys %opts;
+
 	my %parts = (
-		unify => [],
-		pre => [],
-		start => [],
-		perl => [],
+		map { $_ => [] } qw(unify pre start perl),
 	);
 	my $parts = join "|", keys %parts;
 
-	my $part = undef;
+	my $part;
 	while ( <F_IN> ) {
 		chomp;
-		next unless length;
 		next if /^\s*#/;
+		next if /^\s*$/;
 
-		if ( $part ) {
-			unless ( /^\S+/ ) {
-				push @{$parts{$part}}, $_;
-				next;
-			}
-			if ( $part eq "perl" ) {
-				push @{$parts{perl}}, $_."\n", <F_IN>;
-				last;
-			} elsif ( $part eq "start" and /^stage_.*?:/ ) {
-				push @{$parts{start}}, $_;
-				next;
-			}
-			$part = undef;
+		if ( /^($parts)\s*:/ ) {
+			$part = $1;
+			last;
+		}
+
+		my ( $key, $value );
+		unless ( ($key, $value) = /^($opts)\s*:\s+(.*)$/ ) {
+			warn "$file: unrecognized line: $_\n";
+			next;
 		}
 
+		if ( ref $opts{ $key } ) {
+			push @{ $opts{ $key } }, $value;
+		} else {
+			warn "$file: $key overwritten (changed from '$opts{ $key }' to '$value')\n"
+				if defined $opts{ $key };
+			$opts{ $key } = $value;
+		}
+	}
+
+	while ( <F_IN> ) {
+		chomp;
+		next if /^\s*#/;
+		next if /^\s*$/;
+
 		if ( /^($parts)\s*:/ ) {
 			$part = $1;
-		} elsif ( /^uri\s*:\s+(.*)$/ ) {
-			push @{$opts{uri}}, $1;
-		} elsif ( /^($options)\s*:\s+(.*)$/ ) {
-			$opts{$1} = $2;
+			if ( $part eq "perl" ) {
+				my @perl = <F_IN>;
+				$parts{perl} = \@perl;
+			}
+			next;
 		}
+
+		push @{ $parts{ $part } }, $_;
 	}
 
 	close F_IN;
-	unless ( scalar @{$parts{start}} ) {
-		p "Can't find 'start:'\n";
-		return undef;
-	}
-	unless ( @{$opts{uri}} ) {
-		p "Can't find 'uri:'\n";
-		return undef;
+
+	foreach my $k ( keys %opts ) {
+		$self->{ $k } = $opts{ $k };
+	}
+
+	return \%parts;
+}
+
+sub check_opts
+{
+	my $self = shift;
+	my $file = shift;
+	my $plugin_class = shift;
+
+	unless ( @{$self->{uri}} ) {
+		return "Can't find 'uri:'\n";
 	}
+
 	foreach ( qw(name short) ) {
-		next if $opts{$_};
-		p "Can't find '$_:'\n";
-		return undef;
+		next if $self->{$_};
+		return "Can't find '$_:'\n";
 	}
+
 	$file =~ m{.*/(.*?)$};
 	my $fname = $1;
-	if ( $fname ne $opts{name} ) {
-		p "Name field: '$opts{name}' differs from file name: '$fname'\n";
-		return undef;
-	}
-	if ( $opts{status} and $opts{status} !~ /^OK(\s+.*)?$/ ) {
-		p "Marked as '$opts{status}'\n";
-		return undef;
-	}
-
-	$processed = "";
-	$space = "";
-	$last_cmd = undef;
-	$is_sub = 0;
-
-	$opts{uri} = [ map { eval $_ } @{$opts{uri}} ];
-	$opts{class} = ${class};
-	$opts{pkg} = "${class}::$opts{name}";
-	$opts{unify} = join "\n", @{ $parts{unify} };
-	$opts{unify} ||= 's/#.*//; s{/$}{};';
-
-	pr "package $opts{pkg};\n\n";
-	pr <<'EOF';
-	use strict;
-	use warnings;
-	use RSGet::Get;
-	use RSGet::Tools;
-	use URI::Escape;
-
-	BEGIN {
-		our @ISA;
-		@ISA = qw(RSGet::Get);
-	}
-
-	my $STDSIZE = qr/\d+(?:\.\d+)?\s*[kmg]?b/i;
-EOF
-
-	pr join "\n", @{$parts{pre}}, "\n";
-
-	my $stage = 0;
-	p_sub( "stage0" );
-	my @machine = @{$parts{start}};
-	while ( $_ = shift @machine ) {
-		$space = "";
-		$space = $1 if s/^(\s+)//;
-
-		if ( s/^(GET|WAIT|CAPTCHA|(?:CLICK_)?DOWNLOAD|CLICK)\s*\(// ) {
-			my $cmd = lc $1;
-			my $next_stage = "stage" . ++$stage;
-			my @skip;
-			push @skip, $_;
-			until ( /;\s*$/ ) {
-				$_ = shift @machine;
-				push @skip, $_;
-			}
-			p_ret( $cmd, "\\&$next_stage" );
-			foreach ( @skip ) {
-				p_line();
-			}
-			p_subend();
-			$last_cmd = $cmd;
-			p_sub( $next_stage );
-		} elsif ( s/^(GET|WAIT|CAPTCHA|CLICK)_NEXT\s*\(\s*(.*?)\s*,// ) {
-			my $cmd = lc $1;
-			my $next_stage = $2;
-			p_ret( $cmd, "\\&$next_stage" );
-			p_line();
-		} elsif ( s/^GOTO\s+(stage_[a-z0-9_]+)// ) {
-			p_ret( $1 );
-			pr ')';
-			p_line();
-		} elsif ( s/^(stage_[a-z0-9_]+)\s*:\s*(.*)$// ) {
-			my $next_stage = $1;
-			my $left = $_;
-			p_ret( $next_stage );
-			pr ');';
-			p_subend();
-			p_sub( $next_stage );
-			$_ = $left;
-			redo if /\S/;
-		} elsif ( s/^(ERROR|RESTART|LINK|MULTI)\s*\(// ) {
-			p_ret( lc $1 );
-			p_line();
-		} elsif ( s/^INFO\s*\(// ) {
-			pr $space . 'return "info" if $self->info( ';
-			p_line();
-		} elsif ( s/^SEARCH\s*\(// ) {
-			pr $space . 'return if $self->search( ';
-			p_line();
-		} elsif ( s/^(PRINT|LOG|COOKIE|CAPTCHA_RESULT)\s*\(// ) {
-			p_func( lc $1 );
-			p_line();
-		} elsif ( s/^!\s+// ) {
-			my $line = quotemeta $_;
-			pr $space . 'return $self->problem( "'. $line .'" ) unless ';
-			p_line();
+	if ( $fname eq $self->{name} ) {
+		$self->{pkg} = $plugin_class."::". $self->{name};
+	} else {
+		return "Name field: '$self->{name}' differs from file name\n";
+	}
+
+	if ( $self->{status} and $self->{status} =~ /^OK(\s+.*)?$/ ) {
+		return "";
+	}
+
+	return "Incorrect status\n";
+}
+
+sub check_parts
+{
+	my $class = shift;
+	my $parts = shift;
+
+	unless ( @{ $parts->{start} } ) {
+		return "Can't find start\n";
+	}
+
+	return "";
+}
+
+sub eval_uris
+{
+	my $self = shift;
+	my $in = $self->{uri};
+	my @out;
+
+	local $SIG{__DIE__};
+	delete $SIG{__DIE__};
+
+	foreach my $uri_text ( @$in ) {
+		my $re = eval $uri_text;
+		if ( $@ ) {
+			warn "Problem with uri $uri_text: $@\n";
+		} elsif ( not $re ) {
+			warn "Problem with uri $uri_text\n";
+		} elsif ( not ref $re or ref $re ne "Regexp" ) {
+			warn "URI $uri_text is not a regular expression\n";
 		} else {
-			pr $space;
-			p_line();
+			push @out, $re;
 		}
 	}
-	p_subend();
 
-	pr @{$parts{perl}};
+	$self->{uri} = \@out;
+}
 
-	pr "\npackage $opts{pkg};\n";
-	pr "sub unify { local \$_ = shift; $opts{unify};\nreturn \$_;\n};\n";
-	pr '\&unify;';
-
-	my $unify = eval_it( $processed );
-
-	if ( $@ ) {
-		p "Error(s): $@";
-		return undef unless verbose( 1 );
-		my $err = $@;
-		return undef unless $err =~ /line \d+/;
-		my @p = split /\n/, $processed;
-		for ( my $i = 0; $i < scalar @p; $i++ ) {
-			my $n = $i + 1;
-			p sprintf "%s%4d: %s\n",
-				($err =~ /line $n[^\d]/ ? "!" : " "),
-				$n,
-				$p[ $i ];
-		}
-		return undef;
+sub new
+{
+	my $class = shift;
+	my $type = shift;
+	my $file = shift;
+
+	my $self = {
+		file => $file,
+		class => $type,
+	};
+	bless $self, $class;
+
+	my $parts = $self->read_file();
+	return undef unless $parts;
+	my $error = "";
+	$error .= $self->check_opts( $file, $type );
+	$error .= $self->check_parts( $parts );
+
+	$self->eval_uris();
+	return undef unless @{ $self->{uri} };
+
+	$self->{error} = "$self->{pkg} plugin error: $error" if $error;
+	p $file . ": " . $self->{error} if $error;
+
+	return $self;
+}
+
+sub compile
+{
+	my $self = shift;
+	$self->{compiled} = 1;
+	return if $self->{error};
+	p "Compiling $self->{pkg} plugin";
+
+	my $parts = $self->read_file();
+	unless ( $parts ) {
+		$self->{error} = "$self->{pkg} compilation error: cannot read file $self->{file}";
+		p "Compilation failed";
+	}
+
+	my $unify = RSGet::Processor::compile( $self, $parts );
+
+	if ( ref $unify and ref $unify eq "CODE" ) {
+		$self->{unify} = $unify;
+		p "Compilation successful";
+	} else {
+		$self->{error} = "$self->{pkg} compilation error";
+		p "Compilation failed";
 	}
-	if ( not $unify or not ref $unify or ref $unify ne "CODE" ) {
-		my $ru = ref $unify || "undef";
-		p "Error: invalid, unify returned '$ru'";
-		return undef;
+}
+
+sub can_do
+{
+	my $self = shift;
+	my $uri = shift;
+
+	foreach my $re ( @{ $self->{uri} } ) {
+		return 1 if $uri =~ m{^http://(?:www\.)?$re};
 	}
-	$opts{unify} = $unify;
+	return 0;
+}
+
+sub unify
+{
+	my $self = shift;
+	my $uri = shift;
 
-	return $opts{pkg} => \%opts;
-	return ();
+	$self->compile() unless $self->{compiled};
+	return $uri if $self->{error};
+
+	my $func = $self->{unify};
+	return $uri unless $func;
+
+	return &$func( $uri );
 }
 
-sub eval_it
+sub start
 {
-	local $SIG{__DIE__};
-	delete $SIG{__DIE__};
-	return eval shift;
+	my $self = shift;
+	my @args = @_;
+
+	$self->compile() unless $self->{compiled};
+	return undef if $self->{error};
+
+	return RSGet::Get::new( $self, @args );
+}
+
+
+sub add
+{
+	my $type = shift;
+	local $_ = shift;
+	return 0 if /~$/;
+	return 0 if m{/\.[^/]*$};
+	( my $file = $_ ) =~ s#.*/##;
+	return 0 if exists $getters{ $type . "::" . $file };
+	my $plugin = new RSGet::Plugin( $type, $_ );
+	if ( $plugin ) {
+		my $pkg = $plugin->{pkg};
+		$getters{ $pkg } = $plugin;
+		new RSGet::Line( "INIT: ", "$pkg: added" )
+			if verbose( 1 );
+		return 1;
+	} else {
+		warn "${type}/$file: failed\n";
+		return 0;
+	}
+}
+
+
+
+sub from_uri
+{
+	my $uri = shift;
+	foreach my $getter ( values %getters ) {
+		return $getter if $getter->can_do( $uri );
+	}
+	return undef;
+}
+
+sub from_pkg
+{
+	my $pkg = shift;
+
<<diff output has been trimmed to 500 lines, 4 line(s) remained.>>

Added: toys/rsget.pl/RSGet/Processor.pm
==============================================================================
--- (empty file)
+++ toys/rsget.pl/RSGet/Processor.pm	Thu Oct 29 03:48:11 2009
@@ -0,0 +1,202 @@
+package RSGet::Processor;
+# This file is an integral part of rsget.pl downloader.
+#
+# 2009 (c) Przemysław Iskra <sparky at pld-linux.org>
+#		This program is free software,
+# you may distribute it under GPL v2 or newer.
+
+use strict;
+use warnings;
+use RSGet::Tools;
+set_rev qq$Id$;
+
+my $processed = "";
+sub pr(@)
+{
+	my $line = join "", @_;
+	$processed .= $line;
+	return length $line;
+}
+
+my $is_sub = 0;
+my $last_cmd = undef;
+sub p_sub
+{
+	my $sub = shift;
+	pr "sub $sub {\n";
+	pr "\tmy \$self = shift;\n";
+	foreach ( @_ ) {
+		pr "\t$_;\n";
+	}
+	$is_sub++;
+}
+sub p_subend
+{
+	return unless $is_sub;
+	$is_sub--;
+
+	my $error = 'unexpected end of script';
+	if ( $last_cmd and $last_cmd =~ /(?:click_)?download/ ) {
+		$error = 'download is a HTML page';
+	}
+	$last_cmd = undef;
+	pr "\treturn \${self}->error( '$error' );\n}\n";
+}
+
+my $space;
+sub p_ret
+{
+	my $ret = shift;
+	my @opts = @_;
+	pr $space . "return \${self}->${ret}( ";
+	pr join( ", ", @opts ) . ", " if @opts;
+}
+
+sub p_func
+{
+	my $f = shift;
+	pr $space . "\${self}->$f(";
+}
+
+sub p_line
+{
+	s/\$-{/\$self->{/g;
+	pr $_ . "\n";
+}
+
+
+sub compile
+{
+	my $opts = shift;
+	my $parts = shift;
+
+	$processed = "";
+	$space = "";
+	$last_cmd = undef;
+	$is_sub = 0;
+
+	my $unify_body = ( join "\n", @{ $parts->{unify} } ) || 's/#.*//; s{/$}{};';
+
+	pr "package $opts->{pkg};\n\n";
+	pr <<'EOF';
+	use strict;
+	use warnings;
+	use RSGet::Get;
+	use RSGet::Tools;
+	use URI::Escape;
+
+	BEGIN {
+		our @ISA;
+		@ISA = qw(RSGet::Get);
+	}
+
+	my $STDSIZE = qr/\d+(?:\.\d+)?\s*[kmg]?b/i;
+EOF
+
+	pr join "\n", @{$parts->{pre}}, "\n";
+
+	my $stage = 0;
+	p_sub( "stage0" );
+	my @machine = @{ $parts->{start} };
+	while ( $_ = shift @machine ) {
+		$space = "";
+		$space = $1 if s/^(\s+)//;
+
+		if ( s/^(GET|WAIT|CAPTCHA|(?:CLICK_)?DOWNLOAD|CLICK)\s*\(// ) {
+			my $cmd = lc $1;
+			my $next_stage = "stage" . ++$stage;
+			my @skip;
+			push @skip, $_;
+			until ( /;\s*$/ ) {
+				$_ = shift @machine;
+				push @skip, $_;
+			}
+			p_ret( $cmd, "\\&$next_stage" );
+			foreach ( @skip ) {
+				p_line();
+			}
+			p_subend();
+			$last_cmd = $cmd;
+			p_sub( $next_stage );
+		} elsif ( s/^(GET|WAIT|CAPTCHA|CLICK)_NEXT\s*\(\s*(.*?)\s*,// ) {
+			my $cmd = lc $1;
+			my $next_stage = $2;
+			p_ret( $cmd, "\\&$next_stage" );
+			p_line();
+		} elsif ( s/^GOTO\s+(stage_[a-z0-9_]+)// ) {
+			p_ret( $1 );
+			pr ')';
+			p_line();
+		} elsif ( s/^(stage_[a-z0-9_]+)\s*:\s*(.*)$// ) {
+			my $next_stage = $1;
+			my $left = $_;
+			p_ret( $next_stage );
+			pr ');';
+			p_subend();
+			p_sub( $next_stage );
+			$_ = $left;
+			redo if /\S/;
+		} elsif ( s/^(ERROR|RESTART|LINK|MULTI)\s*\(// ) {
+			p_ret( lc $1 );
+			p_line();
+		} elsif ( s/^INFO\s*\(// ) {
+			pr $space . 'return "info" if ${self}->info( ';
+			p_line();
+		} elsif ( s/^SEARCH\s*\(// ) {
+			pr $space . 'return if ${self}->search( ';
+			p_line();
+		} elsif ( s/^(PRINT|LOG|COOKIE|CAPTCHA_RESULT)\s*\(// ) {
+			p_func( lc $1 );
+			p_line();
+		} elsif ( s/^!\s+// ) {
+			my $line = quotemeta $_;
+			pr $space . 'return ${self}->problem( "'. $line .'" ) unless ';
+			p_line();
+		} else {
+			pr $space;
+			p_line();
+		}
+	}
+	p_subend();
+
+	pr @{$parts->{perl}};
+
+	pr "\npackage $opts->{pkg};\n";
+	pr "sub unify { local \$_ = shift; $unify_body;\nreturn \$_;\n};\n";
+	pr '\&unify;';
+
+	my $unify = eval_it( $processed );
+
+	if ( $@ ) {
+		p "Error(s): $@";
+		return undef unless verbose( 1 );
+		my $err = $@;
+		return undef unless $err =~ /line \d+/;
+		my @p = split /\n/, $processed;
+		for ( my $i = 0; $i < scalar @p; $i++ ) {
+			my $n = $i + 1;
+			p sprintf "%s%4d: %s\n",
+				($err =~ /line $n[^\d]/ ? "!" : " "),
+				$n,
+				$p[ $i ];
+		}
+		return undef;
+	}
+	if ( not $unify or not ref $unify or ref $unify ne "CODE" ) {
+		my $ru = ref $unify || "undef";
+		p "Error: invalid, unify returned '$ru'";
+		return undef;
+	}
+	return $unify;
+}
+
+sub eval_it
+{
+	local $SIG{__DIE__};
+	delete $SIG{__DIE__};
+	return eval shift;
+}
+
+1;
+
+# vim: ts=4:sw=4

Modified: toys/rsget.pl/RSGet/Tools.pm
==============================================================================
--- toys/rsget.pl/RSGet/Tools.pm	(original)
+++ toys/rsget.pl/RSGet/Tools.pm	Thu Oct 29 03:48:11 2009
@@ -16,10 +16,9 @@
 @ISA = qw(Exporter);
 @EXPORT = qw(set_rev s2string bignum de_ml hadd hprint p isotime require_prog
 	irand jstime def_settings setting verbose
-	data_file dump_to_file randomize %getters);
+	data_file dump_to_file randomize);
 @EXPORT_OK = qw();
 
-our %getters;
 our %revisions;
 
 sub set_rev($)


More information about the pld-cvs-commit mailing list