On Saturday 21 June 2008, Adam D. Barratt wrote:
> On Sat, 2008-06-21 at 13:26 -0500, Raphael Geissert wrote:
> > If the redirector redirects the request to let's say heanet, the regex
> > used to match the href is still:
> >
> > (?:(?:http://qa.debian.org)?
> > \/watch\/sf\.php\/kcheckgmail\/)?kcheckgmail-(.*)\.tar\.gz
> >
> > But it would be nice if it also tried to match whatever $response->base
> > returns. So in case the redirector redirects the request to kent it still
> > works.
> >
> > This feature would be very very useful in case of the redirector and some
> > other sites that not only redirect to another site, but actually change
> > the directory structure.
> >
> > Attached is a patch adding this feature, together with a HTTP header
> > called X-uscan-features which is now set to 'enhaced-matching'.
>
> I assume you meant enhanced :-)

Err, right. :)

>
> Unfortunately the new functionality only appears to work for href
> matching, not downloading. For example:
[...]

Attached patch should do it. E.g.

[...]
uscan.pl debug: matching pattern(s) (?:(?:http://qa.debian.org)?
\/watch\/sf\.php\/ffmpeg\-php\/)?ffmpeg-php-(.*)\.tbz2 (?:
(?:http://www.mirrorservice.org)?
\/sites\/download\.sourceforge\.net\/pub\/sourceforge\/f\/ff\/ffmpeg\-php\/)?ffmpeg-php-(.*)\.tbz2
-- Found the following matching hrefs:
[...]
     
/sites/download.sourceforge.net/pub/sourceforge/f/ff/ffmpeg-php/ffmpeg-php-0.5.3.tbz2
Newest version on remote site is 0.5.3.1, local version is 0.5.2.1
 => Newer version available from
    
http://www.mirrorservice.org/sites/download.sourceforge.net/pub/sourceforge/f/ff/ffmpeg-php/ffmpeg-php-0.5.3.1.tbz2
-- Scan finished

>
> Regards,
>
> Adam

Cheers,
-- 
Atomo64 - Raphael

Please avoid sending me Word, PowerPoint or Excel attachments.
See http://www.gnu.org/philosophy/no-word-attachments.html
Index: /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl
===================================================================
--- /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl	(revision 1518)
+++ /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl	(working copy)
@@ -653,13 +653,17 @@
 
     my $origline = $line;
     my ($base, $site, $dir, $filepattern, $pattern, $lastversion, $action);
+    my (@patterns, $response_uri, @sites);
     my %options = ();
 
     my ($request, $response);
     my ($newfile, $newversion);
     my $style='new';
     my $urlbase;
+    my $headers = HTTP::Headers->new;
 
+    # Please separate the features with commas, only add them if needed
+    $headers->header('X-uscan-features' => 'enhanced-matching');
     %dehs_tags = ('package' => $pkg);
 
     if ($watch_version == 1) {
@@ -787,6 +791,9 @@
 	return 1;
     }
 
+    push @patterns, $pattern;
+    push @sites, $site;
+
     # What is the most recent file, based on the filenames?
     # We first have to find the candidates, then we sort them using
     # Devscripts::Versort::versort
@@ -795,13 +802,31 @@
 	    die "$progname: you must have the libcrypt-ssleay-perl package installed\nto use https URLs\n";
 	}
 	print STDERR "$progname debug: requesting URL $base\n" if $debug;
-	$request = HTTP::Request->new('GET', $base);
+	$request = HTTP::Request->new('GET', $base, $headers);
 	$response = $user_agent->request($request);
 	if (! $response->is_success) {
 	    warn "$progname warning: In watchfile $watchfile, reading webpage\n  $base failed: " . $response->status_line . "\n";
 	    return 1;
 	}
 
+	$response_uri = $response->base;
+	if (! defined($response_uri)) {
+		warn "$progname warning: In watchfile $watchfile, failed to get base URI \n";
+	}
+	
+	print STDERR "$progname debug: base URI: $response_uri\n"
+		if $debug;
+
+	if (defined($response_uri)) {
+		my $base_dir = $response_uri;
+		
+		$base_dir =~ s%^\w+://[^/]+/%/%;
+		if ($response_uri =~ m%^(\w+://[^/]+)%) {
+			push @patterns, "(?:(?:$1)?" . quotemeta($base_dir) . ")?$filepattern";
+			push @sites, $1;
+		}
+	}
+
 	my $content = $response->content;
 	print STDERR "$progname debug: received content:\n$content\[End of received content]\n"
 	    if $debug;
@@ -821,27 +846,29 @@
 	    ($urlbase = $base) =~ s%/[^/]*$%/%;
 	}
 
-	print STDERR "$progname debug: matching pattern $pattern\n" if $debug;
+	print STDERR "$progname debug: matching pattern(s) @patterns\n" if $debug;
 	my @hrefs;
 	while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/gi) {
 	    my $href = $2;
-	    if ($href =~ m&^$pattern$&) {
-		if ($watch_version == 2) {
-		    # watch_version 2 only recognised one group; the code
-		    # below will break version 2 watchfiles with a construction
-		    # such as file-([\d\.]+(-\d+)?) (bug #327258)
-		    push @hrefs, [$1, $href];
-		} else {
-		    # need the map { ... } here to handle cases of (...)?
-		    # which may match but then return undef values
-		    my $mangled_version =
-			join(".", map { $_ if defined($_) }
-			     $href =~ m&^$pattern$&);
-		    foreach my $pat (@{$options{'uversionmangle'}}) {
-			eval "\$mangled_version =~ $pat;";
+	    foreach my $_pattern (@patterns) {
+		    if ($href =~ m&^$_pattern$&) {
+			if ($watch_version == 2) {
+			    # watch_version 2 only recognised one group; the code
+			    # below will break version 2 watchfiles with a construction
+			    # such as file-([\d\.]+(-\d+)?) (bug #327258)
+			    push @hrefs, [$1, $href];
+			} else {
+			    # need the map { ... } here to handle cases of (...)?
+			    # which may match but then return undef values
+			    my $mangled_version =
+				join(".", map { $_ if defined($_) }
+				     $href =~ m&^$_pattern$&);
+			    foreach my $pat (@{$options{'uversionmangle'}}) {
+				eval "\$mangled_version =~ $pat;";
+			    }
+			    push @hrefs, [$mangled_version, $href];
+			}
 		    }
-		    push @hrefs, [$mangled_version, $href];
-		}
 	    }
 	}
 	if (@hrefs) {
@@ -1009,7 +1036,18 @@
 	}
 	# absolute filename?
 	elsif ($newfile =~ m%^/%) {
-	    $upstream_url = "$site$newfile";
+	# replace $site here with the one where we were redirected to
+	    foreach my $_pattern (@patterns) {
+		foreach my $_site (@sites) {
+		    if ("$_site$newfile" =~ m&^$_pattern$&) {
+			$upstream_url = "$_site$newfile";
+		    }
+		}
+	    }
+	    if (!defined($upstream_url)) {
+		    warn "$progname warning: Unable to determine upstream url\n";
+		    return 1;
+	    }
 	}
 	# relative filename, we hope
 	else {

Attachment: signature.asc
Description: This is a digitally signed message part.

Reply via email to