On Saturday 21 June 2008, Adam D. Barratt wrote: > On Sat, 2008-06-21 at 13:26 -0500, Raphael Geissert wrote: > > If the redirector redirects the request to let's say heanet, the regex > > used to match the href is still: > > > > (?:(?:http://qa.debian.org)? > > \/watch\/sf\.php\/kcheckgmail\/)?kcheckgmail-(.*)\.tar\.gz > > > > But it would be nice if it also tried to match whatever $response->base > > returns. So in case the redirector redirects the request to kent it still > > works. > > > > This feature would be very very useful in case of the redirector and some > > other sites that not only redirect to another site, but actually change > > the directory structure. > > > > Attached is a patch adding this feature, together with a HTTP header > > called X-uscan-features which is now set to 'enhaced-matching'. > > I assume you meant enhanced :-)
Err, right. :) > > Unfortunately the new functionality only appears to work for href > matching, not downloading. For example: [...] Attached patch should do it. E.g. [...] uscan.pl debug: matching pattern(s) (?:(?:http://qa.debian.org)? \/watch\/sf\.php\/ffmpeg\-php\/)?ffmpeg-php-(.*)\.tbz2 (?: (?:http://www.mirrorservice.org)? \/sites\/download\.sourceforge\.net\/pub\/sourceforge\/f\/ff\/ffmpeg\-php\/)?ffmpeg-php-(.*)\.tbz2 -- Found the following matching hrefs: [...] /sites/download.sourceforge.net/pub/sourceforge/f/ff/ffmpeg-php/ffmpeg-php-0.5.3.tbz2 Newest version on remote site is 0.5.3.1, local version is 0.5.2.1 => Newer version available from http://www.mirrorservice.org/sites/download.sourceforge.net/pub/sourceforge/f/ff/ffmpeg-php/ffmpeg-php-0.5.3.1.tbz2 -- Scan finished > > Regards, > > Adam Cheers, -- Atomo64 - Raphael Please avoid sending me Word, PowerPoint or Excel attachments. See http://www.gnu.org/philosophy/no-word-attachments.html
Index: /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl
===================================================================
--- /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl (revision 1518)
+++ /home/raphael/Deb/devscripts/trunk/scripts/uscan.pl (working copy)
@@ -653,13 +653,17 @@
my $origline = $line;
my ($base, $site, $dir, $filepattern, $pattern, $lastversion, $action);
+ my (@patterns, $response_uri, @sites);
my %options = ();
my ($request, $response);
my ($newfile, $newversion);
my $style='new';
my $urlbase;
+ my $headers = HTTP::Headers->new;
+ # Please separate the features with commas, only add them if needed
+ $headers->header('X-uscan-features' => 'enhanced-matching');
%dehs_tags = ('package' => $pkg);
if ($watch_version == 1) {
@@ -787,6 +791,9 @@
return 1;
}
+ push @patterns, $pattern;
+ push @sites, $site;
+
# What is the most recent file, based on the filenames?
# We first have to find the candidates, then we sort them using
# Devscripts::Versort::versort
@@ -795,13 +802,31 @@
die "$progname: you must have the libcrypt-ssleay-perl package installed\nto use https URLs\n";
}
print STDERR "$progname debug: requesting URL $base\n" if $debug;
- $request = HTTP::Request->new('GET', $base);
+ $request = HTTP::Request->new('GET', $base, $headers);
$response = $user_agent->request($request);
if (! $response->is_success) {
warn "$progname warning: In watchfile $watchfile, reading webpage\n $base failed: " . $response->status_line . "\n";
return 1;
}
+ $response_uri = $response->base;
+ if (! defined($response_uri)) {
+ warn "$progname warning: In watchfile $watchfile, failed to get base URI \n";
+ }
+
+ print STDERR "$progname debug: base URI: $response_uri\n"
+ if $debug;
+
+ if (defined($response_uri)) {
+ my $base_dir = $response_uri;
+
+ $base_dir =~ s%^\w+://[^/]+/%/%;
+ if ($response_uri =~ m%^(\w+://[^/]+)%) {
+ push @patterns, "(?:(?:$1)?" . quotemeta($base_dir) . ")?$filepattern";
+ push @sites, $1;
+ }
+ }
+
my $content = $response->content;
print STDERR "$progname debug: received content:\n$content\[End of received content]\n"
if $debug;
@@ -821,27 +846,29 @@
($urlbase = $base) =~ s%/[^/]*$%/%;
}
- print STDERR "$progname debug: matching pattern $pattern\n" if $debug;
+ print STDERR "$progname debug: matching pattern(s) @patterns\n" if $debug;
my @hrefs;
while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/gi) {
my $href = $2;
- if ($href =~ m&^$pattern$&) {
- if ($watch_version == 2) {
- # watch_version 2 only recognised one group; the code
- # below will break version 2 watchfiles with a construction
- # such as file-([\d\.]+(-\d+)?) (bug #327258)
- push @hrefs, [$1, $href];
- } else {
- # need the map { ... } here to handle cases of (...)?
- # which may match but then return undef values
- my $mangled_version =
- join(".", map { $_ if defined($_) }
- $href =~ m&^$pattern$&);
- foreach my $pat (@{$options{'uversionmangle'}}) {
- eval "\$mangled_version =~ $pat;";
+ foreach my $_pattern (@patterns) {
+ if ($href =~ m&^$_pattern$&) {
+ if ($watch_version == 2) {
+ # watch_version 2 only recognised one group; the code
+ # below will break version 2 watchfiles with a construction
+ # such as file-([\d\.]+(-\d+)?) (bug #327258)
+ push @hrefs, [$1, $href];
+ } else {
+ # need the map { ... } here to handle cases of (...)?
+ # which may match but then return undef values
+ my $mangled_version =
+ join(".", map { $_ if defined($_) }
+ $href =~ m&^$_pattern$&);
+ foreach my $pat (@{$options{'uversionmangle'}}) {
+ eval "\$mangled_version =~ $pat;";
+ }
+ push @hrefs, [$mangled_version, $href];
+ }
}
- push @hrefs, [$mangled_version, $href];
- }
}
}
if (@hrefs) {
@@ -1009,7 +1036,18 @@
}
# absolute filename?
elsif ($newfile =~ m%^/%) {
- $upstream_url = "$site$newfile";
+ # replace $site here with the one where we were redirected to
+ foreach my $_pattern (@patterns) {
+ foreach my $_site (@sites) {
+ if ("$_site$newfile" =~ m&^$_pattern$&) {
+ $upstream_url = "$_site$newfile";
+ }
+ }
+ }
+ if (!defined($upstream_url)) {
+ warn "$progname warning: Unable to determine upstream url\n";
+ return 1;
+ }
}
# relative filename, we hope
else {
signature.asc
Description: This is a digitally signed message part.
