Author: sparky
Date: Sun Oct  4 17:44:43 2009
New Revision: 10679

Modified:
   toys/rsget.pl/RSGet/ListManager.pm
Log:
- smarter uri extraction from text


Modified: toys/rsget.pl/RSGet/ListManager.pm
==============================================================================
--- toys/rsget.pl/RSGet/ListManager.pm  (original)
+++ toys/rsget.pl/RSGet/ListManager.pm  Sun Oct  4 17:44:43 2009
@@ -318,14 +318,24 @@
                }
        }
 
+       my $u = qr/[a-z0-9_-]+/;
+       my $tld = qr/[a-z]{2,8}/;
        foreach ( split /\s+/s, $text ) {
-               next unless m{^(?:.*?([|#<>"'\(\)\{\}\[\]]))?(http://)?(.*?)$};
-               my $lim = $1;
+               next unless m{^(.*?)(https?://)?((?:$u\.)*$u\.$tld/.+)$};
+               my $pre = $1;
                my $proto = $2 || "http://";;
                my $uri = $proto . $3;
-               if ( $lim ) {
-                       $lim =~ tr/[](){}/][)(}{/;
-                       $uri =~ s/\Q$lim\E.*//;
+               if ( $pre ) {
+                       if ( $pre =~ /%([0-9A-F]{2})$/ ) {
+                               my $l = chr hex $1;
+                               $l =~ tr/[](){}<>/][)(}{></;
+                               $l = sprintf "%.2X", ord $l;
+                               $uri =~ s/%$l.*//i;
+                       } elsif ( $pre =~ m{.*([^a-zA-Z0-9_/])$} ) {
+                               my $l = $1;
+                               $l =~ tr/[](){}<>/][)(}{></;
+                               $uri =~ s/\Q$l\E.*//;
+                       }
                }
                my $getter = RSGet::Dispatch::getter( $uri );
                next unless $getter;
_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to