Author: adam-guest
Date: 2008-06-22 15:10:54 +0000 (Sun, 22 Jun 2008)
New Revision: 1519

Modified:
   trunk/debian/changelog
   trunk/scripts/uscan.pl
Log:
  + If the site being checked redirects to another with a directory
    structure that does not match the original site, check for download URLs
    that refer to the redirected site rather than the original. Based on a
    patch by Raphael Geissert (Closes: #487436)

Modified: trunk/debian/changelog
===================================================================
--- trunk/debian/changelog      2008-06-21 12:58:58 UTC (rev 1518)
+++ trunk/debian/changelog      2008-06-22 15:10:54 UTC (rev 1519)
@@ -15,8 +15,13 @@
     + Add .cxx and .hxx to the default list of file extensions to check
       (Closes: #487384)
     + Add _MTN (monotone) to the default exclusion expression
-  * uscan: Add an example of a newer form of watch file syntax for SourceForge
-    based projects to the manpage
+  * uscan:
+    + Add an example of a newer form of watch file syntax for SourceForge
+      based projects to the manpage
+    + If the site being checked redirects to another with a directory
+      structure that does not match the original site, check for download URLs
+      that refer to the redirected site rather than the original. Based on a
+      patch by Raphael Geissert (Closes: #487436)
   * debian/control
     + Suggest libnet-smtp-ssl-perl (>= 1.01-2) in order to allow bts to
       suport SMTPS. The versioned dependency is required in order to ensure

Modified: trunk/scripts/uscan.pl
===================================================================
--- trunk/scripts/uscan.pl      2008-06-21 12:58:58 UTC (rev 1518)
+++ trunk/scripts/uscan.pl      2008-06-22 15:10:54 UTC (rev 1519)
@@ -653,13 +653,18 @@
 
     my $origline = $line;
     my ($base, $site, $dir, $filepattern, $pattern, $lastversion, $action);
+    my (@patterns, @sites, $response_uri);
     my %options = ();
 
     my ($request, $response);
     my ($newfile, $newversion);
     my $style='new';
     my $urlbase;
+    my $headers = HTTP::Headers->new;
 
+    # Comma-separated list of features that sites being queried might
+    # want to be aware of
+    $headers->header('X-uscan-features' => 'enhanced-matching');
     %dehs_tags = ('package' => $pkg);
 
     if ($watch_version == 1) {
@@ -787,6 +792,9 @@
        return 1;
     }
 
+    push @patterns, $pattern;
+    push @sites, $site;
+
     # What is the most recent file, based on the filenames?
     # We first have to find the candidates, then we sort them using
     # Devscripts::Versort::versort
@@ -795,13 +803,31 @@
            die "$progname: you must have the libcrypt-ssleay-perl package 
installed\nto use https URLs\n";
        }
        print STDERR "$progname debug: requesting URL $base\n" if $debug;
-       $request = HTTP::Request->new('GET', $base);
+       $request = HTTP::Request->new('GET', $base, $headers);
        $response = $user_agent->request($request);
        if (! $response->is_success) {
            warn "$progname warning: In watchfile $watchfile, reading webpage\n 
 $base failed: " . $response->status_line . "\n";
            return 1;
        }
 
+       $response_uri = $response->base;
+       if (! defined($response_uri)) {
+           warn "$progname warning: In watchfile $watchfile, failed to get 
base URI: \n";
+       }
+       
+       print STDERR "$progname debug: base URI: $response_uri\n"
+           if $debug;
+
+       if (defined($response_uri)) {
+           my $base_dir = $response_uri;
+           
+           $base_dir =~ s%^\w+://[^/]+/%/%;
+           if ($response_uri =~ m%^(\w+://[^/]+)%) {
+               push @patterns, "(?:(?:$1)?" . quotemeta($base_dir) . 
")?$filepattern";
+               push @sites, $1;
+           }
+       }
+
        my $content = $response->content;
        print STDERR "$progname debug: received content:\n$content\[End of 
received content]\n"
            if $debug;
@@ -821,26 +847,28 @@
            ($urlbase = $base) =~ s%/[^/]*$%/%;
        }
 
-       print STDERR "$progname debug: matching pattern $pattern\n" if $debug;
+       print STDERR "$progname debug: matching pattern(s) @patterns\n" if 
$debug;
        my @hrefs;
        while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/gi) {
            my $href = $2;
-           if ($href =~ m&^$pattern$&) {
-               if ($watch_version == 2) {
-                   # watch_version 2 only recognised one group; the code
-                   # below will break version 2 watchfiles with a construction
-                   # such as file-([\d\.]+(-\d+)?) (bug #327258)
-                   push @hrefs, [$1, $href];
-               } else {
-                   # need the map { ... } here to handle cases of (...)?
-                   # which may match but then return undef values
-                   my $mangled_version =
-                       join(".", map { $_ if defined($_) }
-                            $href =~ m&^$pattern$&);
-                   foreach my $pat (@{$options{'uversionmangle'}}) {
-                       eval "\$mangled_version =~ $pat;";
+           foreach my $_pattern (@patterns) {
+               if ($href =~ m&^$_pattern$&) {
+                   if ($watch_version == 2) {
+                       # watch_version 2 only recognised one group; the code
+                       # below will break version 2 watchfiles with a 
construction
+                       # such as file-([\d\.]+(-\d+)?) (bug #327258)
+                       push @hrefs, [$1, $href];
+                   } else {
+                       # need the map { ... } here to handle cases of (...)?
+                       # which may match but then return undef values
+                       my $mangled_version =
+                           join(".", map { $_ if defined($_) }
+                               $href =~ m&^$_pattern$&);
+                       foreach my $pat (@{$options{'uversionmangle'}}) {
+                           eval "\$mangled_version =~ $pat;";
+                       }
+                       push @hrefs, [$mangled_version, $href];
                    }
-                   push @hrefs, [$mangled_version, $href];
                }
            }
        }
@@ -1009,7 +1037,17 @@
        }
        # absolute filename?
        elsif ($newfile =~ m%^/%) {
-           $upstream_url = "$site$newfile";
+           # replace $site here with the one we were redirected to
+           foreach my $index (0 .. $#patterns) {
+               if ("$sites[$index]$newfile" =~ m&^$patterns[$index]$&) {
+                   $upstream_url = "$sites[$index]$newfile";
+                   last;
+               }
+           }
+           if (!defined($upstream_url)) {
+               warn "$progname warning: Unable to determine upstream url\n";
+               return 1;
+           }
        }
        # relative filename, we hope
        else {



-- 
To unsubscribe, send mail to [EMAIL PROTECTED]

Reply via email to