SVN: toys/rsget.pl/Get/EasyShare
sparky
sparky at pld-linux.org
Wed Oct 21 22:14:42 CEST 2009
Author: sparky
Date: Wed Oct 21 22:14:42 2009
New Revision: 10801
Modified:
toys/rsget.pl/Get/EasyShare
Log:
- updated to use newest tools
- added captcha processor
Modified: toys/rsget.pl/Get/EasyShare
==============================================================================
--- toys/rsget.pl/Get/EasyShare (original)
+++ toys/rsget.pl/Get/EasyShare Wed Oct 21 22:14:42 2009
@@ -10,7 +10,6 @@
COOKIE( [ (qw(.easy-share.com TRUE / FALSE), time + 80000, 'language', 'en' ) ] );
GET( $-{_uri} );
- $-{page} = $-{_referer};
ERROR( "file not found" ) if /File not found/;
ERROR( "file removed" ) if /Requested file is deleted/;
@@ -22,7 +21,7 @@
if m#You have downloaded over.*?during last hour#;
unless ( m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';# ) {
- return $self->stage_captcha();
+ GOTO stage_captcha;
}
$-{uri} = $1;
my $wait = $2;
@@ -30,9 +29,12 @@
WAIT( $wait, "expecting captcha" );
stage_cappage:
- GET( $-{uri} );
+ GET( $-{uri}, keep_referer => 1 );
stage_captcha:
+ RESTART( - ( 60 + int rand 300 ), "free limit reached" )
+ if m#You have downloaded over.*?during last hour#;
+
if ( m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';# ) {
$-{uri} = $1;
WAIT_NEXT( stage_cappage, $2, "still expecting captcha" );
@@ -47,14 +49,70 @@
! m#<p class="pb0"><img src="(.*?)" width="220" height="50" alt="" border="1"></p>#;
my $img = $1;
- GET( $img );
+ GET( $img, keep_referer => 1 );
+
+ CAPTCHA(
+ qr/[2-9abcdeghkmnpqsuvxyz]{5,6}/,
+ process => \&es_decaptcha
+ );
- CAPTCHA( "image/jpeg" );
- my $captcha = $_;
+ GOTO stage_cappage unless $_;
- DOWNLOAD( $-{file_uri}, post => { captcha => $captcha, id => $-{id} } );
+ DOWNLOAD( $-{file_uri}, post => { captcha => $_, id => $-{id} } );
- RESTART( 1, "invalid captcha" ) if /Invalid characters/;
+ if ( /Invalid characters/ ) {
+ CAPTCHA_RESULT( "FAIL" );
+ RESTART( 1, "invalid captcha" );
+ }
RESTART( 1, "to slow ?" ) if m#<script type="text/javascript">\s*u='(.*?)';\s*w='(\d+)';#;
+perl:
+
+sub es_color_get_luma
+{
+ # for real luma should convert to linear first
+ return ($_[0] * 0.30 + $_[1] * 0.59 + $_[2] * 0.11);
+}
+
+sub es_histogram_get_min_max
+{
+ my $h = shift;
+ my $min_sum = shift;
+ my $max_sum = shift;
+
+ my ( $max, $min, $sum );
+ $sum = 0;
+ for ( my $i = 255; $i >= 0; $i-- ) {
+ $sum += $h->[$i];
+ if ( $sum > $max_sum ) {
+ $max = $i;
+ last;
+ }
+ }
+ $sum = 0;
+ for ( my $i = 0; $i < 256; $i++ ) {
+ $sum += $h->[$i];
+ if ( $sum > $min_sum ) {
+ $min = $i;
+ last;
+ }
+ }
+
+ return ( $min, $max )
+}
+
+sub es_decaptcha
+{
+ my $img = shift;
+ my $name = shift;
+
+ $img->color_filter( \&es_color_get_luma );
+ my $h = $img->histogram();
+ $img->luma_emphasize( es_histogram_get_min_max( $h, 50, 3000 ) );
+ return $img->ocr();
+ #char_fix => { " " => "", G => 6, Z => 2, S => 8, B => 8, X => 'x', Q => 9, },
+}
+
+
+
# vim:ts=4:sw=4
More information about the pld-cvs-commit
mailing list