This is an automated email from the ASF dual-hosted git repository.

sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git


The following commit(s) were added to refs/heads/master by this push:
     new ac3854d  Handle alternate mirror selector syntax
ac3854d is described below

commit ac3854d3255796662edd4f081f94bf9f636798a9
Author: Sebb <[email protected]>
AuthorDate: Mon Sep 13 22:45:27 2021 +0100

    Handle alternate mirror selector syntax
    
    e.g. for vcl.a.o
---
 tools/download_check.rb | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/download_check.rb b/tools/download_check.rb
index 30ae2bf..9d4f345 100755
--- a/tools/download_check.rb
+++ b/tools/download_check.rb
@@ -249,7 +249,7 @@ def check_hash_loc(h, tlp)
 end
 
 # get the https? links as Array of [href, text]
-def get_links(body, checkSpaces=false)
+def get_links(path, body, checkSpaces=false)
   doc = Nokogiri::HTML(body)
   nodeset = doc.css('a[href]')    # Get anchors w href attribute via css
   nodeset.map { |node|
@@ -258,6 +258,9 @@ def get_links(body, checkSpaces=false)
     if checkSpaces && tmp != href
       W "Spurious space(s) in '#{tmp}'"
     end
+    if href =~ %r{^?Preferred=https?://}
+      href = path + URI.decode_www_form_component(href)
+    end
     text = node.text.gsub(/[[:space:]]+/, ' ').strip
     [href, text] unless href =~ %r{/httpcomponents.+/xdoc/downloads.xml} # 
breadcrumb link to source
   }.select {|x, _y| x =~ %r{^(https?:)?//} }
@@ -388,7 +391,7 @@ def _checkDownloadPage(path, tlp, version)
 
   deprecated = Time.parse('2018-01-01')
 
-  links = get_links(body, true)
+  links = get_links(path, body, true)
   if links.size < 6 # source+binary * archive+sig+hash
     E "Page does not have enough links: #{links.size} < 6 -- perhaps it needs 
JavaScript?"
   end
@@ -641,7 +644,7 @@ def _checkDownloadPage(path, tlp, version)
         else
           bdy = check_page(h, :E, false)
           if bdy
-            lks = get_links(bdy)
+            lks = get_links(path, bdy)
             lks.each do |l, _t|
               # Don't want to match archive server (closer.cgi defaults to it 
if file is not found)
               if l.end_with?(name) and l !~ %r{//archive\.apache\.org/}

Reply via email to