This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/whimsy.git
The following commit(s) were added to refs/heads/master by this push:
new 24cbf135 Option to save the parsed page
24cbf135 is described below
commit 24cbf1358eeb70d795fe0e9ebc46da7d0a4a28ee
Author: Sebb <[email protected]>
AuthorDate: Thu Mar 21 11:40:41 2024 +0000
Option to save the parsed page
---
tools/site-scan.rb | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tools/site-scan.rb b/tools/site-scan.rb
index 7e928bf7..b22d830e 100755
--- a/tools/site-scan.rb
+++ b/tools/site-scan.rb
@@ -84,6 +84,11 @@ def parse(id, site, name)
return data
end
doc = Nokogiri::HTML(response)
+ if $saveparse
+ file = File.join('/tmp',"site-scan_#{$$}.txt")
+ File.write(file, doc.to_s)
+ $stderr.puts "Wrote parsed input to #{file}"
+ end
data[:uri] = uri.to_s
# FIRST: scan each link's a_href to see if we need to capture it
@@ -272,6 +277,7 @@ results = {}
podlings = {}
$cache = Cache.new(dir: 'site-scan')
$verbose = ARGV.delete '--verbose'
+$saveparse = ARGV.delete '--saveparse'
$skipresourcecheck = ARGV.delete '--noresource'
puts "Started: #{Time.now}" # must agree with site-scan monitor