SVN: toys/fun/rsget.pl

sparky sparky at pld-linux.org
Sun Jun 7 17:58:05 CEST 2009


Author: sparky
Date: Sun Jun  7 17:58:05 2009
New Revision: 10374

Modified:
   toys/fun/rsget.pl
Log:
- handle cases where we get html page instead of the file
  to be downloaded
- added filefactory.com


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl	(original)
+++ toys/fun/rsget.pl	Sun Jun  7 17:58:05 2009
@@ -16,11 +16,12 @@
 
 =item Status:
 - RS: 2009-06-07 OK
-- NL: 2009-06-07 OK
+- NL: 2009-06-07 OK, captcha works
 - OS: not working, captcha not supported
 - MU: not working, new captcha not supported
 - UT: 2009-06-07 OK
 - HF: 2009-06-07 OK
+- FF: 2009-06-07 OK
 
 =item Wishlist:
 - handle multiple alternatives for same file
@@ -97,10 +98,10 @@
 use WWW::Curl::Multi;
 
 my $curl_headers = [
-	'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
+	'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
 	'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
 	'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-	'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
+	'Accept-Language: en-us,en;q=0.5',
 	];
 
 sub file_init
@@ -108,14 +109,24 @@
 	my $self = shift;
 	my $curl = $self->{curl};
 
+	$self->{time_start} = time;
+
+	{
+		my $mime = $curl->getinfo( 	CURLINFO_CONTENT_TYPE );
+		if ( $mime =~ m#^text/html# ) {
+			$self->{is_html} = 1;
+			$self->{size_total} = 0;
+			$self->{size_got} = 0;
+			return;
+		}
+	}
+
 	{
 		my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
 		$self->{size_total} = $f_len || 0;
 		$self->{size_got} = 0;
 	}
 
-	$self->{time_start} = time;
-
 	if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
 		$self->{file_name} = $1;
 	} else {
@@ -155,8 +166,12 @@
 	my $len = length $chunk;
 	$self->{size_got} += $len;
 
-	my $file = $self->{file};
-	print $file $chunk;
+	if ( $self->{file} ) {
+		my $file = $self->{file};
+		print $file $chunk;
+	} else {
+		$self->{body} .= $chunk;
+	}
 
 	return $len;
 }
@@ -222,6 +237,7 @@
 		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
 		$curl->setopt( CURLOPT_WRITEDATA, $ecurl );
 	} else {
+		$ecurl->{is_html} = 1;
 		$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
 		$curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
 	}
@@ -251,7 +267,7 @@
 
 	if ( $err ) {
 		my $error = $curl->errbuf;
-		$obj->print( "error: $err" );
+		$obj->print( "error($err): $error" );
 		$obj->problem();
 		return undef;
 	}
@@ -264,7 +280,7 @@
 			: $ecurl->{body};
 		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
 		
-		&$func( $obj, $body, $eurl );
+		&$func( $obj, $body, $eurl, $ecurl->{is_html} );
 	}
 }
 
@@ -485,6 +501,7 @@
 {
 	my $self = shift;
 	$self->print("starting download");
+	$self->{file_html} = \&start unless defined $self->{file_html};
 	$self->curl( $self->{file_url}, \&finish, save => 1, @_ );
 }
 
@@ -505,10 +522,19 @@
 sub finish
 {
 	my $self = shift;
-	my $reason = shift;
+	my $body = shift;
+	my $url = shift;
+	my $is_html = shift;
 
-	my $url = $self->{url};
-	$gotlist{ $url } = $reason;
+	if ( $is_html ) {
+		if ( my $func = $self->{file_html} ) {
+			delete $self->{file_url};
+			delete $self->{file_html};
+			return &$func( $self, $body, $url );
+		}
+	}
+
+	$gotlist{ $self->{url} } = $body;
 
 	my $net = $self->{net};
 	my $id = $self->{id};
@@ -629,7 +655,6 @@
 
 	$body =~ /form name="dlf" action="(.*?)"/m;
 	$self->{file_url} = $1;
-	$self->{file_referer} = $url;
 
 	$self->wait( $wait, \&stage4, "starting download in" );
 }
@@ -663,7 +688,7 @@
 	my $url = shift;
 
 	++$nlcookie;
-	my $cookie = ".nl.$nlcookie.txt";
+	my $cookie = ".cookie.nl.$nlcookie.txt";
 	unlink $cookie if -e $cookie;
 
 	Get::makenew( "NL", $class, $url, cookies => $cookie );
@@ -981,7 +1006,7 @@
 	my $url = shift;
 
 	++$oscookie;
-	my $cookie = ".os.$oscookie.txt";
+	my $cookie = ".cookie.os.$oscookie.txt";
 	unlink $cookie if -e $cookie;
 
 	Get::makenew( "OS", $class, $url, slots => 16, cookies => $cookie );
@@ -1042,7 +1067,7 @@
 	my $url = shift;
 
 	++$mucookie;
-	my $cookie = ".mu.$nlcookie.txt";
+	my $cookie = ".cookie.mu.$nlcookie.txt";
 	unlink $cookie if -e $cookie;
 
 	Get::makenew( "MU", $class, $url, cookies => $cookie );
@@ -1364,6 +1389,93 @@
 };
 
 # }}}
+package Get::FileFactory; # {{{
+
+BEGIN {
+	our @ISA;
+	@ISA = qw(Get);
+}
+
+sub new
+{
+	my $proto = shift;
+	my $class = ref $proto || $proto;
+	my $url = shift;
+	Get::makenew( "FF", $class, $url );
+}
+
+sub stage1
+{
+	my $self = shift;
+	delete $self->{referer};
+
+	$self->print("starting...");
+	$self->curl( $self->{url}, \&stage2 );
+}
+
+sub stage2
+{
+	my ($self, $body, $url) = @_;
+	$self->print("starting......");
+	$self->{referer} = $url;
+
+	my $link;
+	if ( $body =~ /You are currently downloading/ ) {
+		return $self->error( "multi-download not allowed" );
+	} elsif ( $body =~ /starthtimer[\s\S]*timerend=d\.getTime\(\)\+(\d+);/m and $1 > 0 ) {
+		return $self->wait( 1 + int ( $1 / 1000 ), \&stage1, "free limit reached, waiting" );
+	} elsif ( $body =~ m#<form action="(.*)" method="post">\s*<input type="submit" value="Free#m ) {
+		$link = $1;
+	} else {
+		return $self->problem( "link", $body );
+	}
+
+	$self->curl( $link, \&stage3, post => "freeBtn=Free%20Download" );
+}
+
+sub stage3
+{
+	my ($self, $body, $url) = @_;
+	$self->{referer} = $url;
+	$self->print("starting.........");
+	if ( $body =~ m#<a href="(.*?)">Click here to begin your download</a># ) {
+		$self->{file_url} = $1;
+	} else {
+		return $self->problem( "file url", $body );
+	}
+	
+	$self->wait( 30, \&stage4, "starting download in" );
+}
+
+sub stage4
+{
+	my $self = shift;
+	$self->print("downloading");
+	$self->{file_html} = \&stage5;
+
+	$self->download();
+}
+
+sub stage5
+{
+	my ($self, $body, $url) = @_;
+	# file turned out to be html, meens we need to wait
+	if ( $body =~ /You are currently downloading too many files at once/ ) {
+		return $self->error( "multi-download not allowed" );
+	} elsif ( $body =~ /Please wait (\d+) minutes to download more files/ ) {
+		return $self->wait( $1 * 60 - 30, \&stage1, "free limit reached, waiting" );
+	} elsif ( $body =~ /Please wait (\d+) seconds to download more files/ ) {
+		return $self->wait( $1, \&stage1, "free limit reached, waiting" );
+	}
+	return $self->problem( undef, $body );
+}
+
+$getters{FF} = {
+	uri => qr{(www.)?filefactory\.com/.*?},
+	add => sub { Get::FileFactory->new( @_ ) },
+};
+
+# }}}
 package main; # {{{
 
 my $get_list = 'get.list';


More information about the pld-cvs-commit mailing list