SVN: toys/fun/rsget.pl
sparky
sparky at pld-linux.org
Sun Jun 7 17:58:05 CEST 2009
Author: sparky
Date: Sun Jun 7 17:58:05 2009
New Revision: 10374
Modified:
toys/fun/rsget.pl
Log:
- handle cases where we get html page instead of the file
to be downloaded
- added filefactory.com
Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl (original)
+++ toys/fun/rsget.pl Sun Jun 7 17:58:05 2009
@@ -16,11 +16,12 @@
=item Status:
- RS: 2009-06-07 OK
-- NL: 2009-06-07 OK
+- NL: 2009-06-07 OK, captcha works
- OS: not working, captcha not supported
- MU: not working, new captcha not supported
- UT: 2009-06-07 OK
- HF: 2009-06-07 OK
+- FF: 2009-06-07 OK
=item Wishlist:
- handle multiple alternatives for same file
@@ -97,10 +98,10 @@
use WWW::Curl::Multi;
my $curl_headers = [
- 'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
+ 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- 'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
+ 'Accept-Language: en-us,en;q=0.5',
];
sub file_init
@@ -108,14 +109,24 @@
my $self = shift;
my $curl = $self->{curl};
+ $self->{time_start} = time;
+
+ {
+ my $mime = $curl->getinfo( CURLINFO_CONTENT_TYPE );
+ if ( $mime =~ m#^text/html# ) {
+ $self->{is_html} = 1;
+ $self->{size_total} = 0;
+ $self->{size_got} = 0;
+ return;
+ }
+ }
+
{
my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
$self->{size_total} = $f_len || 0;
$self->{size_got} = 0;
}
- $self->{time_start} = time;
-
if ( $self->{head} =~ /^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
$self->{file_name} = $1;
} else {
@@ -155,8 +166,12 @@
my $len = length $chunk;
$self->{size_got} += $len;
- my $file = $self->{file};
- print $file $chunk;
+ if ( $self->{file} ) {
+ my $file = $self->{file};
+ print $file $chunk;
+ } else {
+ $self->{body} .= $chunk;
+ }
return $len;
}
@@ -222,6 +237,7 @@
$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
$curl->setopt( CURLOPT_WRITEDATA, $ecurl );
} else {
+ $ecurl->{is_html} = 1;
$curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
$curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
}
@@ -251,7 +267,7 @@
if ( $err ) {
my $error = $curl->errbuf;
- $obj->print( "error: $err" );
+ $obj->print( "error($err): $error" );
$obj->problem();
return undef;
}
@@ -264,7 +280,7 @@
: $ecurl->{body};
my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
- &$func( $obj, $body, $eurl );
+ &$func( $obj, $body, $eurl, $ecurl->{is_html} );
}
}
@@ -485,6 +501,7 @@
{
my $self = shift;
$self->print("starting download");
+ $self->{file_html} = \&start unless defined $self->{file_html};
$self->curl( $self->{file_url}, \&finish, save => 1, @_ );
}
@@ -505,10 +522,19 @@
sub finish
{
my $self = shift;
- my $reason = shift;
+ my $body = shift;
+ my $url = shift;
+ my $is_html = shift;
- my $url = $self->{url};
- $gotlist{ $url } = $reason;
+ if ( $is_html ) {
+ if ( my $func = $self->{file_html} ) {
+ delete $self->{file_url};
+ delete $self->{file_html};
+ return &$func( $self, $body, $url );
+ }
+ }
+
+ $gotlist{ $self->{url} } = $body;
my $net = $self->{net};
my $id = $self->{id};
@@ -629,7 +655,6 @@
$body =~ /form name="dlf" action="(.*?)"/m;
$self->{file_url} = $1;
- $self->{file_referer} = $url;
$self->wait( $wait, \&stage4, "starting download in" );
}
@@ -663,7 +688,7 @@
my $url = shift;
++$nlcookie;
- my $cookie = ".nl.$nlcookie.txt";
+ my $cookie = ".cookie.nl.$nlcookie.txt";
unlink $cookie if -e $cookie;
Get::makenew( "NL", $class, $url, cookies => $cookie );
@@ -981,7 +1006,7 @@
my $url = shift;
++$oscookie;
- my $cookie = ".os.$oscookie.txt";
+ my $cookie = ".cookie.os.$oscookie.txt";
unlink $cookie if -e $cookie;
Get::makenew( "OS", $class, $url, slots => 16, cookies => $cookie );
@@ -1042,7 +1067,7 @@
my $url = shift;
++$mucookie;
- my $cookie = ".mu.$nlcookie.txt";
+ my $cookie = ".cookie.mu.$nlcookie.txt";
unlink $cookie if -e $cookie;
Get::makenew( "MU", $class, $url, cookies => $cookie );
@@ -1364,6 +1389,93 @@
};
# }}}
+package Get::FileFactory; # {{{
+
+BEGIN {
+ our @ISA;
+ @ISA = qw(Get);
+}
+
+sub new
+{
+ my $proto = shift;
+ my $class = ref $proto || $proto;
+ my $url = shift;
+ Get::makenew( "FF", $class, $url );
+}
+
+sub stage1
+{
+ my $self = shift;
+ delete $self->{referer};
+
+ $self->print("starting...");
+ $self->curl( $self->{url}, \&stage2 );
+}
+
+sub stage2
+{
+ my ($self, $body, $url) = @_;
+ $self->print("starting......");
+ $self->{referer} = $url;
+
+ my $link;
+ if ( $body =~ /You are currently downloading/ ) {
+ return $self->error( "multi-download not allowed" );
+ } elsif ( $body =~ /starthtimer[\s\S]*timerend=d\.getTime\(\)\+(\d+);/m and $1 > 0 ) {
+ return $self->wait( 1 + int ( $1 / 1000 ), \&stage1, "free limit reached, waiting" );
+ } elsif ( $body =~ m#<form action="(.*)" method="post">\s*<input type="submit" value="Free#m ) {
+ $link = $1;
+ } else {
+ return $self->problem( "link", $body );
+ }
+
+ $self->curl( $link, \&stage3, post => "freeBtn=Free%20Download" );
+}
+
+sub stage3
+{
+ my ($self, $body, $url) = @_;
+ $self->{referer} = $url;
+ $self->print("starting.........");
+ if ( $body =~ m#<a href="(.*?)">Click here to begin your download</a># ) {
+ $self->{file_url} = $1;
+ } else {
+ return $self->problem( "file url", $body );
+ }
+
+ $self->wait( 30, \&stage4, "starting download in" );
+}
+
+sub stage4
+{
+ my $self = shift;
+ $self->print("downloading");
+ $self->{file_html} = \&stage5;
+
+ $self->download();
+}
+
+sub stage5
+{
+ my ($self, $body, $url) = @_;
+ # file turned out to be html, meens we need to wait
+ if ( $body =~ /You are currently downloading too many files at once/ ) {
+ return $self->error( "multi-download not allowed" );
+ } elsif ( $body =~ /Please wait (\d+) minutes to download more files/ ) {
+ return $self->wait( $1 * 60 - 30, \&stage1, "free limit reached, waiting" );
+ } elsif ( $body =~ /Please wait (\d+) seconds to download more files/ ) {
+ return $self->wait( $1, \&stage1, "free limit reached, waiting" );
+ }
+ return $self->problem( undef, $body );
+}
+
+$getters{FF} = {
+ uri => qr{(www.)?filefactory\.com/.*?},
+ add => sub { Get::FileFactory->new( @_ ) },
+};
+
+# }}}
package main; # {{{
my $get_list = 'get.list';
More information about the pld-cvs-commit
mailing list