SVN: toys/fun/rsget.pl

sparky sparky at pld-linux.org
Mon Jun 8 03:45:44 CEST 2009


Author: sparky
Date: Mon Jun  8 03:45:43 2009
New Revision: 10382

Modified:
   toys/fun/rsget.pl
Log:
- updated MegaUpload, including captcha (requires Image::Magick and db.png file)


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl	(original)
+++ toys/fun/rsget.pl	Mon Jun  8 03:45:43 2009
@@ -11,17 +11,17 @@
 - check all the URIs just after finding them in the list
   (catch non-existing files quickly)
 - restart download if same URI has been added second time
-- MegaUpload: update, there is new page, and new captcha
 - OdSiebie: there is a captcha now
 
 =item Status:
 - RS: 2009-06-07 OK
 - NL: 2009-06-07 OK, captcha works
 - OS: not working, captcha not supported
-- MU: not working, new captcha not supported
+- MU: 2009-06-08 OK, captcha works, requires db.png
 - UT: 2009-06-07 OK
 - HF: 2009-06-07 OK
 - FF: 2009-06-07 OK
+- DF: 2009-06-07 OK
 
 =item Wishlist:
 - handle multiple alternatives for same file
@@ -32,6 +32,8 @@
 use warnings;
 use Time::HiRes;
 
+our $data_path = $ENV{PWD};
+
 my $checklist = 1;
 my %gotlist;
 $SIG{CHLD} = "IGNORE";
@@ -96,6 +98,7 @@
 package Curl; # {{{
 use WWW::Curl::Easy;
 use WWW::Curl::Multi;
+use URI::Escape;
 
 my $curl_headers = [
 	'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
@@ -132,7 +135,7 @@
 	} else {
 		my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
 		$eurl =~ s#^.*/##;
-		$self->{file_name} = $eurl;
+		$self->{file_name} = uri_unescape( $eurl );
 	}
 
 	{
@@ -1088,18 +1091,15 @@
 	$self->print("starting......");
 	$self->{referer} = $url;
 
-	if ( $body =~ /The file you are trying to access is temporarily unavailable/ ) {
-		return $self->error( "file temporarily unavailable" );
-	}
-	if ( $body =~ /Unfortunately, the link you have clicked is not available./ ) {
+	if ( $body =~ /The file you are trying to access is temporarily unavailable/
+			or $body =~ /Unfortunately, the link you have clicked is not available/
+			or $body =~ /This file has expired due to inactivity/ ) {
 		return $self->error( "file not found" );
 	}
 	my %search = (
-		captcha_img => qr#<img src="(/capgen\.php\?[0-9a-f]+)"#,
-		action => qr#<form method="POST" action="(.*?)"#,
-		s2id => qr#<input type="hidden" name="d" value="(.*?)"#,
-		s2icode => qr#<input type="hidden" name="imagecode" value="(.*?)"#,
-		s2mevagar => qr#<input type="hidden" name="megavar" value="(.*?)"#,
+		captcha_img => qr#<img src="(http://.*/gencap\.php\?[0-9a-f]+\.gif)"#,
+		s2icode => qr#<INPUT type="hidden" name="captchacode" value="(.*?)"#,
+		s2mevagar => qr#<INPUT type="hidden" name="megavar" value="(.*?)"#,
 	);
 
 	foreach my $name ( keys %search ) {
@@ -1119,13 +1119,13 @@
 	my ($self, $body, $url) = @_;
 	$self->print("reading captcha");
 
-	my $captcha = Get::MegaUpload::Captcha::resolve( $body );
+	my $captcha = Get::MegaUpload::Captcha::resolve( \$body );
 
 	unless ( defined $captcha ) {
 		return $self->stage1();
 	}
 
-	my $post = "d=$self->{s2id}&imagecode=$self->{s2icode}&megavar=$self->{s2mevagar}&imagestring=$captcha";
+	my $post = "captchacode=$self->{s2icode}&megavar=$self->{s2mevagar}&captcha=$captcha";
 
 	$self->curl( $self->{action}, \&stage4, post => $post );
 }
@@ -1136,34 +1136,21 @@
 	$self->print("starting.........");
 	$self->{referer} = $url;
 
-	my %search = (
-		s4wait => qr#x[0-9]+=([0-9]+);#,
-		s4g => qr#var . = String\.fromCharCode\(Math.abs\(-?([0-9]+)\)\);#,
-		s4j1 => qr#var . = '(.)' \+ String\.fromCharCode\(Math\.sqrt\([0-9]+\)\);#,
-		s4j2 => qr#var . = '.' \+ String\.fromCharCode\(Math\.sqrt\(([0-9]+)\)\);#,
-		s4href => qr#document\.getElementById\("dlbutton"\)\.innerHTML = '<a href="(.*?)"#,
-	);
-
-	foreach my $name ( keys %search ) {
-		my $search = $search{$name};
-		if ( $body =~ m/$search/ ) {
-			$self->{$name} = $1;
-		} else {
-			return $self->problem( $name, $body )
-		}
+	if ( $body =~ /id="captchaform"/ ) {
+		return $self->stage1( @_ );
 	}
 
-	my $furl = $self->{s4href};
-
-	my $g = chr $self->{s4g};
-	my $j2 = chr sqrt $self->{s4j2};
-	my $jg = $self->{s4j1} . $j2 . $g;
-
-	$furl =~ s/' \+ . \+ . \+ '/$jg/;
-
-	$self->{file_url} = $furl;
+	my $wait;
+	if ( $body =~ /count=([0-9]+);/ ) {
+		$wait = $1;
+	}
+	if ( $body =~ /<a href="(.*?)".*IMG SRC=".*?but_dnld_regular.gif/ ) {
+		$self->{file_url} = $1;
+	} else {
+		return $self->problem( "link", $body )
+	}
 
-	$self->wait( $self->{s4wait}, \&stage5, "starting in" );
+	$self->wait( $wait, \&stage5, "starting in" );
 }
 
 sub stage5
@@ -1188,60 +1175,93 @@
 # }}}
 package Get::MegaUpload::Captcha; # {{{
 
-sub resolve
+my %size = (
+	A => 28, B => 22, C => 21, D => 27, E => 16,
+	F => 16, G => 26, H => 26, K => 20, M => 38,
+	N => 28, P => 21, Q => 30, R => 22, S => 18,
+	T => 19, U => 26, V => 22, W => 40, X => 23,
+	Y => 18, Z => 18
+);
+
+my @db;
+
+sub read_db()
+{
+	my $dbf = new Image::Magick;
+	$dbf->Read( $main::data_path . "/MU-captcha/db.png" );
+	foreach my $pos ( 0..3 ) {
+		my @list = sort keys %size;
+		@list = (1..9) if $pos == 3;
+
+		my $height = 32;
+		my $width = 40;
+		my $left = $width * $pos;
+		$width = 22 if $pos == 3;
+		my $top = 0;
+	
+		my %db;
+		foreach my $char ( @list ) {
+			my $db = $dbf->Clone();
+			$db->Crop( width => $width, height => $height, x => $left, y => $top );
+			$db{$char} = $db;
+			$top += 32;
+		}
+		push @db, \%db;
+	}
+}
+
+sub get_char
 {
-	my $capdata = shift;
-	require GD;
+	my ($src, $db, $width, $x) = @_;
 
-	my $img = GD::Image->new( $capdata );
+	my $img = $src->Clone();
+	$img->Crop( width => $width, height => 32, x => $x, y => 0 );
+	$img->Extent( width => $width, height => 32, x => 0, y => 0 );
 
-	my @img;
-	foreach ( 0..2 ) {
-		my $img = GD::Image->newPalette( 70, 32 );
-		my $w = $img->colorAllocate( 255, 255, 255 );
-		my $b = $img->colorAllocate( 0, 0, 0 );
-		push @img, { img => $img, w => $w, b => $b };
+	my $min = 1;
+	my $min_char = undef;
+	foreach my $n ( keys %$db ) {
+		my $x = $img->Compare( image => $db->{$n} );
+		my ($e, $em) = $img->Get( 'error', 'mean-error' );
+		if ( $em < $min ) {
+			$min = $em;
+			$min_char = $n;
+		}
 	}
+	return $min_char;
+}
 
-	my $bg = $img->getPixel( 0, 0 );
+sub resolve
+{
+	my $data_ref = shift;
 
-	foreach my $y ( 1..30 ) {
-		foreach my $x ( 1..68 ) {
-			my $ci = $img->getPixel( $x, $y );
-			next if $ci == $bg;
-			my ($r, $g, $b ) = $img->rgb( $ci );
+	require Image::Magick;
 
-			next if $g < 0x60 or $g > 0x80 or $b < 0x60 or $g > 0x80;
-			if ( $r > 110 and $r < 130 ) {
-				$img[ 0 ]->{img}->setPixel( $x, $y, $img[ 0 ]->{b} );
-			}
-			if ( $r > 160 and $r < 180 ) {
-				$img[ 1 ]->{img}->setPixel( $x, $y, $img[ 1 ]->{b} );
-			}
-			if ( $r > 210 and $r < 230 ) {
-				$img[ 2 ]->{img}->setPixel( $x, $y, $img[ 2 ]->{b} );
-			}
-		}
-	}
+	read_db() unless @db;
 
-	my @l;
-	require IPC::Open2;
-	foreach ( 0..2 ) {
-		my $img = $img[$_]->{img};
-		IPC::Open2::open2( *READ, *WRITE, "pngtopnm | gocr -f ASCII -m 56 -C A-Z - 2>/dev/null" );
-		print WRITE $img->png;
-		close WRITE;
-		my $out = <READ> || "";
-		close READ;
+	open IMAGE, '>', '.captcha.gif';
+	print IMAGE $$data_ref;
+	close IMAGE;
 
-		if ( $out =~ /^([A-Z])/ ) {
-			push @l, $1;
-		} else {
-			return undef;
-		}
-	}
+	my $img = new Image::Magick;
+	my $x = $img->Read( '.captcha.gif' );
+	unlink '.captcha.gif';
+	return if length $x;
+
+	my ($width, $height) = $img->Get( 'columns', 'rows' );
+
+	my $bg = new Image::Magick;
+	$bg->Set( size => $width."x32" );
+	$bg->Read( "xc:white" );
+	$bg->Composite( image => $img );
+
+	my @cap;
+	push @cap, get_char( $bg, $db[0], 40, 0 );
+	push @cap, get_char( $bg, $db[1], 40, $size{$cap[0]} - 6 );
+	push @cap, get_char( $bg, $db[2], 40, $width - 56 );
+	push @cap, get_char( $bg, $db[3], 22, $width - 22 );
 
-	return join "", @l;
+	return join "", @cap;
 }
 
 # }}}


More information about the pld-cvs-commit mailing list