SVN: toys/fun: . rsget.pl

Sun Dec 28 01:11:14 CET 2008

Author: sparky
Date: Sun Dec 28 01:11:14 2008
New Revision: 10041

Added:
   toys/fun/
   toys/fun/rsget.pl   (contents, props changed)
Log:
- not related to PLD


Added: toys/fun/rsget.pl
==============================================================================

--- (empty file)
+++ toys/fun/rsget.pl	Sun Dec 28 01:11:14 2008
@@ -0,0 +1,205 @@
+#!/usr/bin/perl
+#
+# 2008 (c) Przemysław Iskra <sparky at pld-linux.org>
+# Use/modify/distribute under GPL v2 or newer.
+#
+use strict;
+use warnings;
+use WWW::Curl::Easy;
+
+$| = 1;
+
+my $curl_headers = [
+	'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
+	'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
+	'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+	'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
+	];
+
+sub body_file {
+	my ($chunk, $self) = @_;
+
+	unless ( exists $self->{total} ) {
+		my $info = $self->{curl}->getinfo(
+			CURLINFO_CONTENT_LENGTH_DOWNLOAD
+		);
+		$self->{total} = $info || '?';
+		$self->{start} = time;
+	}
+	my $len = length $chunk;
+	$self->{got} += $len;
+
+	my $file = $self->{file};
+	print $file $chunk;
+
+	my $size = "$self->{got} / $self->{total}";
+	if ( int $self->{total} > 0 ) {
+		$size .= sprintf " [%.2f%%]", $self->{got} * 100 / $self->{total}
+	}
+	my $speed = "???";
+	my $time = time;
+	if ( $time > $self->{start} ) {
+		$speed = sprintf "%.2f", $self->{got} / ($time - $self->{start}) / 1024;
+	}
+
+	print "\r$self->{fn}: $size; ${speed}KB/s   ";
+
+	return $len;
+}
+
+sub body_scalar {
+	my ($chunk, $scalar) = @_;
+	$$scalar .= $chunk;
+	return length $chunk;
+}
+
+open NULL, '>', '/dev/null';
+sub curl
+{
+	my $url = shift;
+	my %opts = @_;
+
+	my $curl = new WWW::Curl::Easy;
+
+	$curl->setopt(CURLOPT_WRITEHEADER, \*NULL);
+	$curl->setopt(CURLOPT_MAXREDIRS, 10);
+	$curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
+	$curl->setopt(CURLOPT_HTTPHEADER, $curl_headers);
+	$curl->setopt(CURLOPT_URL, $url);
+	$curl->setopt(CURLOPT_REFERER, $opts{referer})
+		if $opts{referer};
+	$curl->setopt(CURLOPT_ENCODING, 'gzip,deflate');
+
+	if ( $opts{post} ) {
+		my $post = $opts{post};
+		$curl->setopt(CURLOPT_POST, 1 );
+		$curl->setopt(CURLOPT_POSTFIELDS, $post );
+	}
+
+	my $body;
+	if ( $opts{file} ) {
+		$curl->setopt(CURLOPT_WRITEFUNCTION, \&body_file);
+		open my $f_out, '>', $opts{file};
+		$curl->setopt(CURLOPT_FILE, { curl => $curl, got => 0, file => $f_out,
+				fn => ($opts{net} || "").$opts{file} });
+	} else {
+		$curl->setopt(CURLOPT_WRITEFUNCTION, \&body_scalar);
+		$curl->setopt(CURLOPT_FILE, \$body);
+	}
+
+	if ( $curl->perform != 0 ) {
+		my $err = $curl->errbuf;
+		print "error: $err\n";
+		return undef;
+	}
+
+	if ( $opts{file} ) {
+		return $curl->getinfo(
+			CURLINFO_CONTENT_LENGTH_DOWNLOAD
+		);
+	}
+	return $body;
+}
+
+sub rsget
+{
+	my $file = shift;
+	(my $fn = $file) =~ s#^.*/##;
+	my $try = 10;
+
+rsget_restart:
+	$try -= 1;
+	if ( $try <= 0 ) {
+		return undef;
+	}
+
+	print "\r[RS] $fn: (re)starting...         ";
+	my $body = curl( $file );
+	unless ( $body ) {
+		goto rsget_restart;
+	}
+	$body =~ /form id="ff" action="(.*?)"/m;
+	my $link = $1;
+
+	sleep 1 + rand 5;
+
+	$body = curl( $link, post => 'dl.start=Free' );
+	unless ( $body ) {
+		goto rsget_restart;
+	}
+	if ( $body =~ /Please wait until the download is completed/m ) {
+		die "Multi-download not allowed\n";
+	}
+	if ( $body =~ /You have reached the download limit for free-users\./m ) {
+		$body =~ /Instant download access! Or try again in about ([0-9]+) minutes\./m;
+		my $m = $1;
+		for ( my $i = $m; $i > 0; $i-- ) {
+			print "\r[RS] $fn: waiting $i minutes ";
+			sleep 60;
+		}
+		sleep 20;
+		goto rsget_restart;
+	}
+	$body =~ /var c=([0-9]+);/m;
+	my $wait = $1;
+	print "\r[RS] $fn: waiting $wait seconds ";
+	sleep $wait;
+
+	$body =~ /form name="dlf" action="(.*?)"/m;
+	$link = $1;
+
+	curl( $link, post => 'mirror=on', file => $fn, net => '[RS] ' );
+	print "DONE!\n";
+
+	return $fn;
+}
+
+my $get_list = shift @ARGV || 'get.list';
+print "Using '$get_list' file list\n";
+-r $get_list || die "Can't read the list\n";
+my %gotlist;
+
+for (;;) {
+	my @newlist;
+	my $mtime = (stat $get_list)[9];
+	my $get_url = undef;
+	my $get_func = undef;
+	open my $list, '<', $get_list;
+	while ( my $line = <$list> ) {
+		if ( $get_func or $line =~ /^\s*(#.*)?$/ ) {
+			push @newlist, $line;
+			next;
+		}
+		if ( $line =~ m/^\s*(http:\/\/rapidshare\.com\/.*?)\s*$/ ) {
+			$get_url = $1;
+			if ( exists $gotlist{$get_url} ) {
+				push @newlist, "# " . $line;
+			} else {
+				$get_func = \&rsget;
+				push @newlist, $get_url . "\n";
+			}
+			next;
+		}
+		push @newlist, "# invalid url: $line";
+		print "\rinvalid url: $line";
+	}
+	close $list;
+	if ( -e ".${get_list}.swp" ) {
+		print "\rvim swap exists, not rewriting\n";
+	} else {
+		open my $newlist, '>', $get_list;
+		print $newlist @newlist;
+		close $newlist;
+	}
+
+	if ( $get_url and $get_func ) {
+		if ( my $file = &$get_func( $get_url ) ) {
+			$gotlist{$get_url} = $file;
+		}
+	} else {
+		print "\rwaiting for urls";
+		while ( $mtime == (stat $get_list)[9] ) {
+			sleep 5;
+		}
+	}
+}