This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 99ab4cca Show host name counts
99ab4cca is described below
commit 99ab4cca5027594a70c4459477b73b3f3c47ad1d
Author: Sebb <[email protected]>
AuthorDate: Sat Apr 30 21:40:14 2022 +0100
Show host name counts
---
tools/asf-site-check.rb | 8 ++++++++
tools/site-scan.rb | 9 ++++-----
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/tools/asf-site-check.rb b/tools/asf-site-check.rb
index ee1378cb..b619e9cc 100644
--- a/tools/asf-site-check.rb
+++ b/tools/asf-site-check.rb
@@ -66,6 +66,14 @@ module ASFDOMAIN
return true # a relative link
end
end
+ # Return external host name or nil
+ # extracts hostname and calls asfhost?
+ def self.to_ext_host(url)
+ if url =~ %r{\Ahttps?://(.+?)(/|\z)}i
+ return $1 unless asfhost?($1)
+ end
+ return nil
+ end
end
if __FILE__ == $0
diff --git a/tools/site-scan.rb b/tools/site-scan.rb
index b519c3a5..9859bd7c 100755
--- a/tools/site-scan.rb
+++ b/tools/site-scan.rb
@@ -148,11 +148,10 @@ def parse(id, site, name)
data[:image] = ASF::SiteImage.find(id)
# Check for resource loading from non-ASF domains
- js_urls = doc.xpath('//script/@src').map(&:content).reject {|x|
ASFDOMAIN.asfurl? x}
- css_urls = doc.xpath('//link/@href').map(&:content).reject {|x|
ASFDOMAIN.asfurl? x}
- img_urls = doc.xpath('//img/@src').map(&:content).reject {|x|
ASFDOMAIN.asfurl? x}
- resources = js_urls.size + css_urls.size + img_urls.size
- data[:resources] = "Found #{resources} external resources"
+ ext_urls = doc.xpath('//script/@src', '//link/@href', '//img/@src').
+ map(&:content).map {|x| ASFDOMAIN.to_ext_host x}.compact.tally
+ resources = ext_urls.values.sum
+ data[:resources] = "Found #{resources} external resources: #{ext_urls}"
# TODO: does not find js references such as:
# ga.src = ('https:' == document.location.protocol ? 'https://ssl' :
'http://www') + '.google-analytics.com/ga.js';