SVN: toys/rsget.pl/Get/EasyShare

sparky sparky at pld-linux.org
Wed Oct 21 22:14:42 CEST 2009


Author: sparky
Date: Wed Oct 21 22:14:42 2009
New Revision: 10801

Modified:
   toys/rsget.pl/Get/EasyShare
Log:
- updated to use newest tools
- added captcha processor


Modified: toys/rsget.pl/Get/EasyShare
==============================================================================
--- toys/rsget.pl/Get/EasyShare	(original)
+++ toys/rsget.pl/Get/EasyShare	Wed Oct 21 22:14:42 2009
@@ -10,7 +10,6 @@
 	COOKIE( [ (qw(.easy-share.com TRUE / FALSE), time + 80000, 'language', 'en' ) ] );
 
 	GET( $-{_uri} );
-	$-{page} = $-{_referer};
 
 	ERROR( "file not found" ) if /File not found/;
 	ERROR( "file removed" ) if /Requested file is deleted/;
@@ -22,7 +21,7 @@
 		if m#You have downloaded over.*?during last hour#;
 
 	unless ( m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';# ) {
-		return $self->stage_captcha();
+		GOTO stage_captcha;
 	}
 	$-{uri} = $1;
 	my $wait = $2;
@@ -30,9 +29,12 @@
 	WAIT( $wait, "expecting captcha" );
 stage_cappage:
 
-	GET( $-{uri} );
+	GET( $-{uri}, keep_referer => 1 );
 stage_captcha:
 
+	RESTART( - ( 60 + int rand 300 ), "free limit reached" )
+		if m#You have downloaded over.*?during last hour#;
+
 	if ( m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';# ) {
 		$-{uri} = $1;
 		WAIT_NEXT( stage_cappage, $2, "still expecting captcha" );
@@ -47,14 +49,70 @@
 	! m#<p class="pb0"><img src="(.*?)" width="220" height="50" alt="" border="1"></p>#;
 	my $img = $1;
 
-	GET( $img );
+	GET( $img, keep_referer => 1 );
+
+	CAPTCHA(
+		qr/[2-9abcdeghkmnpqsuvxyz]{5,6}/,
+		process => \&es_decaptcha
+	);
 
-	CAPTCHA( "image/jpeg" );
-	my $captcha = $_;
+	GOTO stage_cappage unless $_;
 
-	DOWNLOAD( $-{file_uri}, post => { captcha => $captcha, id => $-{id} } );
+	DOWNLOAD( $-{file_uri}, post => { captcha => $_, id => $-{id} } );
 
-	RESTART( 1, "invalid captcha" ) if /Invalid characters/;
+	if ( /Invalid characters/ ) {
+		CAPTCHA_RESULT( "FAIL" );
+		RESTART( 1, "invalid captcha" );
+	}
 	RESTART( 1, "to slow ?" ) if m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';#;
 
+perl:
+
+sub es_color_get_luma
+{
+	# for real luma should convert to linear first
+	return ($_[0] * 0.30 + $_[1] * 0.59 + $_[2] * 0.11);
+}
+
+sub es_histogram_get_min_max
+{
+	my $h = shift;
+	my $min_sum = shift;
+	my $max_sum = shift;
+
+	my ( $max, $min, $sum );
+	$sum = 0;
+	for ( my $i = 255; $i >= 0; $i-- ) {
+		$sum += $h->[$i];
+		if ( $sum > $max_sum ) {
+			$max = $i;
+			last;
+		}
+	}
+	$sum = 0;
+	for ( my $i = 0; $i < 256; $i++ ) {
+		$sum += $h->[$i];
+		if ( $sum > $min_sum ) {
+			$min = $i;
+			last;
+		}
+	}
+
+	return ( $min, $max )
+}
+
+sub es_decaptcha
+{
+	my $img = shift;
+	my $name = shift;
+
+	$img->color_filter( \&es_color_get_luma );
+	my $h = $img->histogram();
+	$img->luma_emphasize( es_histogram_get_min_max( $h, 50, 3000 ) );
+	return $img->ocr();
+	#char_fix => { " " => "", G => 6, Z => 2, S => 8, B => 8, X => 'x', Q => 9, },
+}
+
+
+
 # vim:ts=4:sw=4


More information about the pld-cvs-commit mailing list