SVN: toys/fun/rsget.pl
sparky
sparky at pld-linux.org
Mon Jun 8 03:45:44 CEST 2009
Author: sparky
Date: Mon Jun 8 03:45:43 2009
New Revision: 10382
Modified:
toys/fun/rsget.pl
Log:
- updated MegaUpload, including captcha (requires Image::Magick and db.png file)
Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl (original)
+++ toys/fun/rsget.pl Mon Jun 8 03:45:43 2009
@@ -11,17 +11,17 @@
- check all the URIs just after finding them in the list
(catch non-existing files quickly)
- restart download if same URI has been added second time
-- MegaUpload: update, there is new page, and new captcha
- OdSiebie: there is a captcha now
=item Status:
- RS: 2009-06-07 OK
- NL: 2009-06-07 OK, captcha works
- OS: not working, captcha not supported
-- MU: not working, new captcha not supported
+- MU: 2009-06-08 OK, captcha works, requires db.png
- UT: 2009-06-07 OK
- HF: 2009-06-07 OK
- FF: 2009-06-07 OK
+- DF: 2009-06-07 OK
=item Wishlist:
- handle multiple alternatives for same file
@@ -32,6 +32,8 @@
use warnings;
use Time::HiRes;
+our $data_path = $ENV{PWD};
+
my $checklist = 1;
my %gotlist;
$SIG{CHLD} = "IGNORE";
@@ -96,6 +98,7 @@
package Curl; # {{{
use WWW::Curl::Easy;
use WWW::Curl::Multi;
+use URI::Escape;
my $curl_headers = [
'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
@@ -132,7 +135,7 @@
} else {
my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
$eurl =~ s#^.*/##;
- $self->{file_name} = $eurl;
+ $self->{file_name} = uri_unescape( $eurl );
}
{
@@ -1088,18 +1091,15 @@
$self->print("starting......");
$self->{referer} = $url;
- if ( $body =~ /The file you are trying to access is temporarily unavailable/ ) {
- return $self->error( "file temporarily unavailable" );
- }
- if ( $body =~ /Unfortunately, the link you have clicked is not available./ ) {
+ if ( $body =~ /The file you are trying to access is temporarily unavailable/
+ or $body =~ /Unfortunately, the link you have clicked is not available/
+ or $body =~ /This file has expired due to inactivity/ ) {
return $self->error( "file not found" );
}
my %search = (
- captcha_img => qr#<img src="(/capgen\.php\?[0-9a-f]+)"#,
- action => qr#<form method="POST" action="(.*?)"#,
- s2id => qr#<input type="hidden" name="d" value="(.*?)"#,
- s2icode => qr#<input type="hidden" name="imagecode" value="(.*?)"#,
- s2mevagar => qr#<input type="hidden" name="megavar" value="(.*?)"#,
+ captcha_img => qr#<img src="(http://.*/gencap\.php\?[0-9a-f]+\.gif)"#,
+ s2icode => qr#<INPUT type="hidden" name="captchacode" value="(.*?)"#,
+ s2mevagar => qr#<INPUT type="hidden" name="megavar" value="(.*?)"#,
);
foreach my $name ( keys %search ) {
@@ -1119,13 +1119,13 @@
my ($self, $body, $url) = @_;
$self->print("reading captcha");
- my $captcha = Get::MegaUpload::Captcha::resolve( $body );
+ my $captcha = Get::MegaUpload::Captcha::resolve( \$body );
unless ( defined $captcha ) {
return $self->stage1();
}
- my $post = "d=$self->{s2id}&imagecode=$self->{s2icode}&megavar=$self->{s2mevagar}&imagestring=$captcha";
+ my $post = "captchacode=$self->{s2icode}&megavar=$self->{s2mevagar}&captcha=$captcha";
$self->curl( $self->{action}, \&stage4, post => $post );
}
@@ -1136,34 +1136,21 @@
$self->print("starting.........");
$self->{referer} = $url;
- my %search = (
- s4wait => qr#x[0-9]+=([0-9]+);#,
- s4g => qr#var . = String\.fromCharCode\(Math.abs\(-?([0-9]+)\)\);#,
- s4j1 => qr#var . = '(.)' \+ String\.fromCharCode\(Math\.sqrt\([0-9]+\)\);#,
- s4j2 => qr#var . = '.' \+ String\.fromCharCode\(Math\.sqrt\(([0-9]+)\)\);#,
- s4href => qr#document\.getElementById\("dlbutton"\)\.innerHTML = '<a href="(.*?)"#,
- );
-
- foreach my $name ( keys %search ) {
- my $search = $search{$name};
- if ( $body =~ m/$search/ ) {
- $self->{$name} = $1;
- } else {
- return $self->problem( $name, $body )
- }
+ if ( $body =~ /id="captchaform"/ ) {
+ return $self->stage1( @_ );
}
- my $furl = $self->{s4href};
-
- my $g = chr $self->{s4g};
- my $j2 = chr sqrt $self->{s4j2};
- my $jg = $self->{s4j1} . $j2 . $g;
-
- $furl =~ s/' \+ . \+ . \+ '/$jg/;
-
- $self->{file_url} = $furl;
+ my $wait;
+ if ( $body =~ /count=([0-9]+);/ ) {
+ $wait = $1;
+ }
+ if ( $body =~ /<a href="(.*?)".*IMG SRC=".*?but_dnld_regular.gif/ ) {
+ $self->{file_url} = $1;
+ } else {
+ return $self->problem( "link", $body )
+ }
- $self->wait( $self->{s4wait}, \&stage5, "starting in" );
+ $self->wait( $wait, \&stage5, "starting in" );
}
sub stage5
@@ -1188,60 +1175,93 @@
# }}}
package Get::MegaUpload::Captcha; # {{{
-sub resolve
+my %size = (
+ A => 28, B => 22, C => 21, D => 27, E => 16,
+ F => 16, G => 26, H => 26, K => 20, M => 38,
+ N => 28, P => 21, Q => 30, R => 22, S => 18,
+ T => 19, U => 26, V => 22, W => 40, X => 23,
+ Y => 18, Z => 18
+);
+
+my @db;
+
+sub read_db()
+{
+ my $dbf = new Image::Magick;
+ $dbf->Read( $main::data_path . "/MU-captcha/db.png" );
+ foreach my $pos ( 0..3 ) {
+ my @list = sort keys %size;
+ @list = (1..9) if $pos == 3;
+
+ my $height = 32;
+ my $width = 40;
+ my $left = $width * $pos;
+ $width = 22 if $pos == 3;
+ my $top = 0;
+
+ my %db;
+ foreach my $char ( @list ) {
+ my $db = $dbf->Clone();
+ $db->Crop( width => $width, height => $height, x => $left, y => $top );
+ $db{$char} = $db;
+ $top += 32;
+ }
+ push @db, \%db;
+ }
+}
+
+sub get_char
{
- my $capdata = shift;
- require GD;
+ my ($src, $db, $width, $x) = @_;
- my $img = GD::Image->new( $capdata );
+ my $img = $src->Clone();
+ $img->Crop( width => $width, height => 32, x => $x, y => 0 );
+ $img->Extent( width => $width, height => 32, x => 0, y => 0 );
- my @img;
- foreach ( 0..2 ) {
- my $img = GD::Image->newPalette( 70, 32 );
- my $w = $img->colorAllocate( 255, 255, 255 );
- my $b = $img->colorAllocate( 0, 0, 0 );
- push @img, { img => $img, w => $w, b => $b };
+ my $min = 1;
+ my $min_char = undef;
+ foreach my $n ( keys %$db ) {
+ my $x = $img->Compare( image => $db->{$n} );
+ my ($e, $em) = $img->Get( 'error', 'mean-error' );
+ if ( $em < $min ) {
+ $min = $em;
+ $min_char = $n;
+ }
}
+ return $min_char;
+}
- my $bg = $img->getPixel( 0, 0 );
+sub resolve
+{
+ my $data_ref = shift;
- foreach my $y ( 1..30 ) {
- foreach my $x ( 1..68 ) {
- my $ci = $img->getPixel( $x, $y );
- next if $ci == $bg;
- my ($r, $g, $b ) = $img->rgb( $ci );
+ require Image::Magick;
- next if $g < 0x60 or $g > 0x80 or $b < 0x60 or $g > 0x80;
- if ( $r > 110 and $r < 130 ) {
- $img[ 0 ]->{img}->setPixel( $x, $y, $img[ 0 ]->{b} );
- }
- if ( $r > 160 and $r < 180 ) {
- $img[ 1 ]->{img}->setPixel( $x, $y, $img[ 1 ]->{b} );
- }
- if ( $r > 210 and $r < 230 ) {
- $img[ 2 ]->{img}->setPixel( $x, $y, $img[ 2 ]->{b} );
- }
- }
- }
+ read_db() unless @db;
- my @l;
- require IPC::Open2;
- foreach ( 0..2 ) {
- my $img = $img[$_]->{img};
- IPC::Open2::open2( *READ, *WRITE, "pngtopnm | gocr -f ASCII -m 56 -C A-Z - 2>/dev/null" );
- print WRITE $img->png;
- close WRITE;
- my $out = <READ> || "";
- close READ;
+ open IMAGE, '>', '.captcha.gif';
+ print IMAGE $$data_ref;
+ close IMAGE;
- if ( $out =~ /^([A-Z])/ ) {
- push @l, $1;
- } else {
- return undef;
- }
- }
+ my $img = new Image::Magick;
+ my $x = $img->Read( '.captcha.gif' );
+ unlink '.captcha.gif';
+ return if length $x;
+
+ my ($width, $height) = $img->Get( 'columns', 'rows' );
+
+ my $bg = new Image::Magick;
+ $bg->Set( size => $width."x32" );
+ $bg->Read( "xc:white" );
+ $bg->Composite( image => $img );
+
+ my @cap;
+ push @cap, get_char( $bg, $db[0], 40, 0 );
+ push @cap, get_char( $bg, $db[1], 40, $size{$cap[0]} - 6 );
+ push @cap, get_char( $bg, $db[2], 40, $width - 56 );
+ push @cap, get_char( $bg, $db[3], 22, $width - 22 );
- return join "", @l;
+ return join "", @cap;
}
# }}}
More information about the pld-cvs-commit
mailing list