SVN: toys/rsget.pl/Get/NetLoad
sparky
sparky at pld-linux.org
Thu Oct 22 02:22:39 CEST 2009
Author: sparky
Date: Thu Oct 22 02:22:38 2009
New Revision: 10808
Modified:
toys/rsget.pl/Get/NetLoad
Log:
- updated to use newest tools
- dedicated captcha solver replaced with captcha-processor code
Modified: toys/rsget.pl/Get/NetLoad
==============================================================================
--- toys/rsget.pl/Get/NetLoad (original)
+++ toys/rsget.pl/Get/NetLoad Thu Oct 22 02:22:38 2009
@@ -4,20 +4,7 @@
short: NL
uri: qr{netload\.in/datei}
cookie: nl
-status: OK 2009-08-24
-
-pre:
- use IPC::Open2;
- use GD;
-
- my @missing;
- foreach my $prog ( qw(ocrad gocr pngtopnm) ) {
- push @missing, $prog unless require_prog( $prog );
- }
- if ( @missing ) {
- my $m = join ", ", @missing;
- die "Missing applications: $m\n";
- }
+status: OK 2009-10-22
start:
GET( $-{_uri} );
@@ -32,34 +19,38 @@
if /We will prepare your download/;
! /href="(.*?captcha=1)"/;
- GET( de_ml( $1 ) );
- $-{dl_page} = $-{_referer};
+ CLICK( de_ml( $1 ) );
RESTART( 1, "Still on the same page ?" ) if /"(.*?captcha=1)"/;
- SEARCH(
- action => qr#<form method="post" action="(.*?)">#,
- captcha_img => qr#"(share/includes/captcha\.php\?t=[0-9]+)"#,
- file_id => qr#input name="file_id" .*value="(.*?)"#,
- s3wait => qr#please wait .*countdown\(([0-9]+),#,
- );
+ ! m#please wait .*countdown\(([0-9]+),#;
+ $-{s3wait} = $1;
+
+ ! $-{capform} = $self->form( match => { body => qr/Please enter the Securitycode/ } );
- GET( $-{captcha_img} );
+ ! m#"(share/includes/captcha\.php\?t=[0-9]+)"#;
+ $-{captcha_img} = $1;
+
+stage_getcaptcha:
+ GET( $-{captcha_img}, keep_referer => 1 );
+
+ CAPTCHA(
+ qr/[0-9]{4}/,
+ process => \&nl_decaptcha
+ );
- $-{captcha} = Get::NetLoad::Captcha::resolve( \$_ );
- RESTART( 1, "Can't read captcha" ) unless defined $-{captcha};
+ GOTO stage_getcaptcha unless defined $_;
+ $-{capform}->set( captcha_check => $_ );
WAIT( $-{s3wait} / 100, "checking" );
- $-{_referer} = $-{dl_page};
- GET( $-{action}, post => {
- file_id => $-{file_id},
- captcha_check => $-{captcha},
- start => ''
- } );
+ GET( $-{capform}->post() );
- RESTART( 1, "Wrong captcha" )
- if /You may forgot the security code or it might be wrong/;
+ if ( /You may forgot the security code or it might be wrong/ ) {
+ PRINT( "Captcha failed" );
+ CAPTCHA_RESULT( "FAIL" );
+ GOTO stage_first;
+ }
ERROR( "file not found" )
if /This file is currently unavailable/;
RESTART( $1 / 100, "free limit reached" )
@@ -68,170 +59,97 @@
! /<a class="Orange_Link" href="(.*?)"/;
$-{file_uri} = $1;
+ CAPTCHA_RESULT( "OK" );
+
! /please wait .*countdown\(([0-9]+),/;
WAIT( $1 / 100, "starting download" );
- DOWNLOAD( $-{file_uri} );
+ CLICK_DOWNLOAD( $-{file_uri} );
perl:
-package Get::NetLoad::Captcha;
-sub blankline
+sub nl_color_select_grey
{
- my $img = shift;
- my $x = shift;
- my $n = 0;
- my $white = $img->colorClosest( 255, 255, 255 );
- foreach my $y ( 0..28 ) {
- my $ci = $img->getPixel( $x, $y );
- next if $ci == $white;
- $n++;
- return 0 if $n > 1;
- }
- return 1;
+ my @s = sort { $a <=> $b } @_;
+ return ( $s[2] - $s[0] > 50 ) ? 0xff : $s[0];
}
-sub blanklinev
+sub nl_filter_spots # remove dark pixels
{
- my $img = shift;
- my $y = shift;
- my $y2 = $y + shift;
- my $xmin = shift;
- my $xmax = shift;
- my $n = 0;
- my $white = $img->colorClosest( 255, 255, 255 );
- foreach my $x ( $xmin..$xmax ) {
- my $ci = $img->getPixel( $x, $y );
- $n++ if $ci != $white;
- $ci = $img->getPixel( $x, $y2 );
- $n++ if $ci != $white;
- return 0 if $n > 2;
+ my $pix = shift;
+
+ my $lim = 250;
+ return if $pix->isAbove( $lim );
+
+ my $whites = 0;
+ my @sides = ( $pix->up, $pix->down, $pix->left, $pix->right );
+ foreach my $spix ( @sides ) {
+ return unless $spix; # borders are taken care of
+ $whites++ if $spix->isAbove( $lim );
+ }
+ return if $whites <= 2;
+ if ( $whites == 4 ) {
+ $pix->set( 0xff );
+ return;
+ }
+
+ # possible double spot
+ my $bpix;
+ foreach my $spix ( @sides ) {
+ unless ( $spix->isAbove( $lim ) ) {
+ $bpix = $spix;
+ last;
+ }
}
- return 1;
-}
-sub charat
-{
- my $img = shift;
- my $trimg = shift;
- my $sx = shift;
+ $whites = 0;
+ my $sides = 0;
+ @sides = ( $bpix->up, $bpix->down, $bpix->left, $bpix->right );
+ foreach my $spix ( @sides ) {
+ next unless $spix;
+ $sides++;
+ $whites++ if $spix->isAbove( $lim );
+ }
- my $xmin = $sx;
- until( blankline( $img, $xmin ) ) {
- $xmin--;
- }
- my $xmax = $sx+1;
- until( blankline( $img, $xmax ) ) {
- $xmax++;
- }
- my $ymin = 14;
- until( blanklinev( $img, $ymin, -1, $xmin, $xmax ) ) {
- $ymin--;
- }
- my $ymax = 15;
- until( blanklinev( $img, $ymax, +1, $xmin, $xmax ) ) {
- $ymax++;
- }
-
- my $w = $xmax - $xmin;
- my $h = $ymax - $ymin;
- my $nimg = new GD::Image( $w * 4 + 16, ($h > 12 ? $h : 12 ) + 4 );
- my $nw = $nimg->colorAllocate( 255, 255, 255);
- $nimg->copy( $trimg, 1, 1, $xmin, $ymin, $w, $h );
- $nimg->copy( $trimg, 3 + 1*$w, 1, $xmin, $ymin, $w, $h );
- $nimg->copy( $trimg, 13 + 2*$w, 1, $xmin, $ymin, $w, $h );
- $nimg->copy( $trimg, 15 + 3*$w, 1, $xmin, $ymin, $w, $h );
-
- IPC::Open2::open2( *READ, *WRITE, "pngtopnm | gocr -f ASCII -a 5 -m 56 -C 0123456789 - 2>/dev/null" );
- print WRITE $nimg->png;
- close WRITE;
- my $num = <READ> || "";
- close READ;
-
- my ($gocr) = ($num =~ /^([0-9])/);
-
- IPC::Open2::open2( *READ, *WRITE, "pngtopnm | ocrad --filter=numbers_only - 2>/dev/null" );
- print WRITE $nimg->png;
- close WRITE;
- $num = <READ> || "";
- close READ;
-
- my ($ocrad) = ($num =~ /^([0-9])/);
-
- #print "G: $gocr, O: $ocrad\n";
- if ( defined $gocr ) {
- return 7 if ( defined $ocrad and $ocrad == 7 and $gocr == 1 );
- return $gocr;
- } elsif ( defined $ocrad ) {
- return $ocrad;
+ if ( $whites >= $sides - 1 ) {
+ # it is a double spot
+ $pix->set( 0xff );
+ $bpix->set( 0xff );
}
- return undef;
}
-sub resolve
+sub nl_filter_snow # remove light pixels
{
- my $capdata = shift;
-
- my $img = GD::Image->new( $$capdata );
- my $white = $img->colorClosest( 255, 255, 255 );
-
- foreach my $y ( 0..28 ) {
- $img->setPixel( 0, $y, $white );
- $img->setPixel( 73, $y, $white );
- }
- foreach my $x ( 0..73 ) {
- $img->setPixel( $x, 0, $white );
- $img->setPixel( $x, 28, $white );
- }
-
- foreach my $y ( 1..27 ) {
- FORX: foreach my $x ( 1..72 ) {
- my $ci = $img->getPixel( $x, $y );
- next if $ci == $white;
- my @xy = ( [0, 1], [0, -1], [1, 0], [-1, 0] );
-
- my $wrong = 0;
- foreach my $xy ( @xy ) {
- my $c = $img->getPixel( $x + $xy->[0], $y + $xy->[1] );
- if ( $c != $white ) {
- $wrong++;
- next FORX if $wrong > 1;
- }
- }
-
- $img->setPixel( $x, $y, $white );
- }
- }
+ my $pix = shift;
+ my $lim = 10;
+ return if $pix->isBelow( $lim );
- my $trimg = GD::Image->newTrueColor( 74, 29 );
- my $trwhite = $trimg->colorAllocate( 255, 255, 255 );
- $trimg->fill( 0, 0, $trwhite );
- foreach my $y ( 0..28 ) {
- foreach my $x ( 0..73 ) {
- my $ci = $img->getPixel( $x, $y );
- my ($r, $g, $b ) = $img->rgb( $ci );
- $r = (256 - $r) / 256;
- $g = (256 - $g) / 256;
- $b = (256 - $b) / 256;
- my $c = 256 - 256 * (($r * $g * $b) ** (1/3));
-
- my $gray = $trimg->colorResolve( $c, $c, $c );
-
- $trimg->setPixel( $x, $y, $gray );
- }
- }
-
- my @n;
- push @n, charat( $img, $trimg, 9 );
- push @n, charat( $img, $trimg, 28 );
- push @n, charat( $img, $trimg, 42 );
- push @n, charat( $img, $trimg, 58 );
- foreach (@n) {
- return undef unless defined $_;
+ my $black = 0;
+ my @sides = ( $pix->up, $pix->down, $pix->left, $pix->right );
+ foreach my $i ( (0..3) ) {
+ my $pix = $sides[ $i ];
+ next unless $pix;
+ $black |= 1 << $i if $pix->isBelow( $lim );
+ }
+ if ( ($black & 0x03) == 0x03
+ or ($black & 0x0c) == 0x0c ) {
+ $pix->set( 0 );
}
+}
- return join "", @n;
+sub nl_decaptcha
+{
+ my $img = shift;
+ $img->color_filter( \&nl_color_select_grey );
+ $img->set_border( 0xff );
+ $img->luma_emphasize( 180, 256 );
+ $img->pix_filter( \&nl_filter_spots );
+ $img->pix_filter( \&nl_filter_snow );
+ # TODO: chop to pieces and scan each digit separately
+ local $_ = $img->doublesize->ocr();
+ s/\s+//;
+ return $_;
}
# vim:ts=4:sw=4
More information about the pld-cvs-commit
mailing list