SVN: toys/rsget.pl/RSGet/ListManager.pm
sparky
sparky at pld-linux.org
Sun Oct 4 17:44:43 CEST 2009
Author: sparky
Date: Sun Oct 4 17:44:43 2009
New Revision: 10679
Modified:
toys/rsget.pl/RSGet/ListManager.pm
Log:
- smarter uri extraction from text
Modified: toys/rsget.pl/RSGet/ListManager.pm
==============================================================================
--- toys/rsget.pl/RSGet/ListManager.pm (original)
+++ toys/rsget.pl/RSGet/ListManager.pm Sun Oct 4 17:44:43 2009
@@ -318,14 +318,24 @@
}
}
+ my $u = qr/[a-z0-9_-]+/;
+ my $tld = qr/[a-z]{2,8}/;
foreach ( split /\s+/s, $text ) {
- next unless m{^(?:.*?([|#<>"'\(\)\{\}\[\]]))?(http://)?(.*?)$};
- my $lim = $1;
+ next unless m{^(.*?)(https?://)?((?:$u\.)*$u\.$tld/.+)$};
+ my $pre = $1;
my $proto = $2 || "http://";
my $uri = $proto . $3;
- if ( $lim ) {
- $lim =~ tr/[](){}/][)(}{/;
- $uri =~ s/\Q$lim\E.*//;
+ if ( $pre ) {
+ if ( $pre =~ /%([0-9A-F]{2})$/ ) {
+ my $l = chr hex $1;
+ $l =~ tr/[](){}<>/][)(}{></;
+ $l = sprintf "%.2X", ord $l;
+ $uri =~ s/%$l.*//i;
+ } elsif ( $pre =~ m{.*([^a-zA-Z0-9_/])$} ) {
+ my $l = $1;
+ $l =~ tr/[](){}<>/][)(}{></;
+ $uri =~ s/\Q$l\E.*//;
+ }
}
my $getter = RSGet::Dispatch::getter( $uri );
next unless $getter;
More information about the pld-cvs-commit
mailing list