SVN: toys/rsget.pl/RSGet/ListManager.pm

sparky sparky at pld-linux.org
Sun Oct 4 17:44:43 CEST 2009


Author: sparky
Date: Sun Oct  4 17:44:43 2009
New Revision: 10679

Modified:
   toys/rsget.pl/RSGet/ListManager.pm
Log:
- smarter uri extraction from text


Modified: toys/rsget.pl/RSGet/ListManager.pm
==============================================================================
--- toys/rsget.pl/RSGet/ListManager.pm	(original)
+++ toys/rsget.pl/RSGet/ListManager.pm	Sun Oct  4 17:44:43 2009
@@ -318,14 +318,24 @@
 		}
 	}
 
+	my $u = qr/[a-z0-9_-]+/;
+	my $tld = qr/[a-z]{2,8}/;
 	foreach ( split /\s+/s, $text ) {
-		next unless m{^(?:.*?([|#<>"'\(\)\{\}\[\]]))?(http://)?(.*?)$};
-		my $lim = $1;
+		next unless m{^(.*?)(https?://)?((?:$u\.)*$u\.$tld/.+)$};
+		my $pre = $1;
 		my $proto = $2 || "http://";
 		my $uri = $proto . $3;
-		if ( $lim ) {
-			$lim =~ tr/[](){}/][)(}{/;
-			$uri =~ s/\Q$lim\E.*//;
+		if ( $pre ) {
+			if ( $pre =~ /%([0-9A-F]{2})$/ ) {
+				my $l = chr hex $1;
+				$l =~ tr/[](){}<>/][)(}{></;
+				$l = sprintf "%.2X", ord $l;
+				$uri =~ s/%$l.*//i;
+			} elsif ( $pre =~ m{.*([^a-zA-Z0-9_/])$} ) {
+				my $l = $1;
+				$l =~ tr/[](){}<>/][)(}{></;
+				$uri =~ s/\Q$l\E.*//;
+			}
 		}
 		my $getter = RSGet::Dispatch::getter( $uri );
 		next unless $getter;


More information about the pld-cvs-commit mailing list