This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new ac3854d Handle alternate mirror selector syntax
ac3854d is described below
commit ac3854d3255796662edd4f081f94bf9f636798a9
Author: Sebb <[email protected]>
AuthorDate: Mon Sep 13 22:45:27 2021 +0100
Handle alternate mirror selector syntax
e.g. for vcl.a.o
---
tools/download_check.rb | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/tools/download_check.rb b/tools/download_check.rb
index 30ae2bf..9d4f345 100755
--- a/tools/download_check.rb
+++ b/tools/download_check.rb
@@ -249,7 +249,7 @@ def check_hash_loc(h, tlp)
end
# get the https? links as Array of [href, text]
-def get_links(body, checkSpaces=false)
+def get_links(path, body, checkSpaces=false)
doc = Nokogiri::HTML(body)
nodeset = doc.css('a[href]') # Get anchors w href attribute via css
nodeset.map { |node|
@@ -258,6 +258,9 @@ def get_links(body, checkSpaces=false)
if checkSpaces && tmp != href
W "Spurious space(s) in '#{tmp}'"
end
+ if href =~ %r{^?Preferred=https?://}
+ href = path + URI.decode_www_form_component(href)
+ end
text = node.text.gsub(/[[:space:]]+/, ' ').strip
[href, text] unless href =~ %r{/httpcomponents.+/xdoc/downloads.xml} #
breadcrumb link to source
}.select {|x, _y| x =~ %r{^(https?:)?//} }
@@ -388,7 +391,7 @@ def _checkDownloadPage(path, tlp, version)
deprecated = Time.parse('2018-01-01')
- links = get_links(body, true)
+ links = get_links(path, body, true)
if links.size < 6 # source+binary * archive+sig+hash
E "Page does not have enough links: #{links.size} < 6 -- perhaps it needs
JavaScript?"
end
@@ -641,7 +644,7 @@ def _checkDownloadPage(path, tlp, version)
else
bdy = check_page(h, :E, false)
if bdy
- lks = get_links(bdy)
+ lks = get_links(path, bdy)
lks.each do |l, _t|
# Don't want to match archive server (closer.cgi defaults to it
if file is not found)
if l.end_with?(name) and l !~ %r{//archive\.apache\.org/}