This is an automated email from the ASF dual-hosted git repository. curcuru pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/whimsy.git
commit 908e1f5c4a7e3ebe36414621565737e12831bfe7 Author: Shane Curcuru <[email protected]> AuthorDate: Wed May 9 12:34:09 2018 -0400 Rearchitect to use new sitestandards module In preparation for making site.cgi and pods.cgi both use same core data/functions --- www/site.cgi | 468 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 210 insertions(+), 258 deletions(-) diff --git a/www/site.cgi b/www/site.cgi index f2c43e1..6054c9d 100755 --- a/www/site.cgi +++ b/www/site.cgi @@ -1,4 +1,5 @@ #!/usr/bin/env ruby +PAGETITLE = "Apache Project Website Checks" # Wvisible:sites,brand # ensure that there is a path (even a slash will do) after the script name unless ENV['PATH_INFO'] and not ENV['PATH_INFO'].empty? @@ -12,86 +13,13 @@ $LOAD_PATH.unshift File.realpath(File.expand_path('../../lib', __FILE__)) require 'json' require 'net/http' require 'time' # for httpdate +require 'whimsy/sitestandards' -PAGETITLE = "Apache Project Website Checks" # Wvisible:sites,brand -SITE_PASS = 'label-success' -SITE_WARN = 'label-warning' -SITE_FAIL = 'label-danger' -cols = %w( uri events foundation image license sponsorship security thanks copyright trademarks ) -CHECKS = { - 'uri' => %r{https?://[^.]+\.apache\.org}, - 'copyright' => %r{[Cc]opyright [^.]+ Apache Software Foundation}, # Do we need '[Tt]he ASF'? - 'foundation' => %r{.}, - 'image' => %r{.}, - # TODO more checks needed here, e.g. ASF registered and 3rd party marks - 'trademarks' => %r{trademarks of [Tt]he Apache Software Foundation}, - 'events' => %r{^https?://.*apache.org/events/current-event}, - 'license' => %r{^https?://.*apache.org/licenses/$}, # should link to parent license page only - 'sponsorship' => %r{^https?://.*apache.org/foundation/sponsorship}, - 'security' => %r{^https?://.*apache.org/[Ss]ecurity}, - 'thanks' => %r{^https?://.*apache.org/foundation/thanks}, -} -DOCS = { - 'uri' => ['https://www.apache.org/foundation/marks/pmcs#websites', - 'The homepage for any ProjectName must be served from http://ProjectName.apache.org'], - 'copyright' => ['https://www.apache.org/legal/src-headers.html#headers', - 'All website content SHOULD include a copyright notice for the ASF.'], - 'foundation' => ['https://www.apache.org/foundation/marks/pmcs#navigation', - 'All projects must feature some prominent link back to the main ASF homepage at http://www.apache.org/'], - 'image' => ['https://www.apache.org/img/', - 'Projects SHOULD include a 212px wide copy of their logo in https://www.apache.org/img/ to be included in ASF homepage.'], - 'trademarks' => ['https://www.apache.org/foundation/marks/pmcs#attributions', - 'All project or product homepages must feature a prominent trademark attribution of all applicable Apache trademarks'], - 'events' => ['https://www.apache.org/events/README.txt', - 'Projects SHOULD include a link to any current ApacheCon event, as provided by VP, Conferences.'], - 'license' => ['https://www.apache.org/foundation/marks/pmcs#navigation', - '"License" should link to: http://www.apache.org/licenses/'], - 'sponsorship' => ['https://www.apache.org/foundation/marks/pmcs#navigation', - '"Sponsorship" or "Donate" should link to: http://www.apache.org/foundation/sponsorship.html'], - 'security' => ['https://www.apache.org/foundation/marks/pmcs#navigation', - '"Security" should link to either to a project-specific page [...], or to the main http://www.apache.org/security/ page'], - 'thanks' => ['https://www.apache.org/foundation/marks/pmcs#navigation', - '"Thanks" should link to: http://www.apache.org/foundation/thanks.html'], -} -DATAURI = 'https://whimsy.apache.org/public/' -SCAN_DATA_FILE = 'site-scan.json' - -def analyze(sites) - success = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) } - counts = Hash.new { |h, k| h[k] = Hash.new(&h.default_proc) } - CHECKS.each do |nam, pat| - success[nam] = sites.select{ |k, site| site[nam] =~ pat }.keys - counts[nam][SITE_PASS] = success[nam].count - counts[nam][SITE_WARN] = 0 # Reorder output - counts[nam][SITE_FAIL] = sites.select{ |k, site| site[nam].nil? }.count - counts[nam][SITE_WARN] = sites.size - counts[nam][SITE_PASS] - counts[nam][SITE_FAIL] - end - - [ - counts, { - SITE_PASS => '# Sites with links to primary ASF page', - SITE_WARN => '# Sites with link, but not an expected ASF one', - SITE_FAIL => '# Sites with no link for this topic' - }, success - ] -end - -def getsites(filename) - local_copy = File.expand_path("../public/#{filename}", __FILE__).untaint - if File.exist? local_copy - crawl_time = File.mtime(local_copy).httpdate # show time in same format as last-mod - sites = JSON.parse(File.read(local_copy)) - else - response = Net::HTTP.get_response(URI("#{DATAURI}#{filename}")) - crawl_time = response['last-modified'] - sites = JSON.parse(response.body) - end - return sites, crawl_time -end - -sites, crawl_time = getsites(SCAN_DATA_FILE) - -analysis = analyze(sites) +# Gather and analyze scans for TLP websites +cgi_for_tlps = true +sites, crawl_time = SiteStandards.get_sites(cgi_for_tlps) +checks_performed = SiteStandards.get_checks(cgi_for_tlps) +analysis = SiteStandards.analyze(sites, checks_performed) # Allow CLI testing, e.g. "PATH_INFO=/ ruby www/site.cgi >test.json" # SCRIPT_NAME will always be set for a CGI invocation @@ -108,25 +36,68 @@ require 'wunderbar' require 'wunderbar/bootstrap' require 'wunderbar/jquery/stupidtable' -# Determine the color of a given table cell, given: -# - overall analysis of the sites, in particular the third column -# which is a list projects that successfully matched the check -# - list of links for the project in question -# - the column in question (which indicates the check being reported on) -# - the name of the project -def label(analysis, links, col, name) - if not links[col] - SITE_FAIL - elsif analysis[2].include? col and not analysis[2][col].include? name - SITE_WARN - else - SITE_PASS +_html do + _head do + _style %{ + .table td {font-size: smaller;} + } + end + _body? do + _whimsy_body( + title: PAGETITLE, + subtitle: "Checking #{cgi_for_tlps ? 'Project' : 'Podling'} Websites For required content", + related: { + "/committers/tools" => "Whimsy Tool Listing", + "https://www.apache.org/foundation/marks/pmcs#navigation" => "Required PMC Links Policy", + "https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" => "See This Source Code", + "mailto:[email protected]?subject=[SITE] Website Checker Question" => "Questions? Email Whimsy PMC" + }, + helpblock: -> { + _p do + _ 'This script periodically crawls all Apache project and podling websites to check them for a few specific links or text blocks that all projects are expected to have.' + _ 'The checks include verifying that all ' + _a 'required links', href: 'https://www.apache.org/foundation/marks/pmcs#navigation' + _ ' appear on a project homepage, along with an "image" check if project logo files are in apache.org/img' + end + _p! do + _a 'View the crawler code', href: 'https://github.com/apache/whimsy/blob/master/tools/site-scan.rb' + _ ', ' + _a 'website display code', href: "https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" + _ ', and ' + _a 'raw JSON data', href: "#{SiteStandards.get_url()}#{SiteStandards.get_filename(cgi_for_tlps)}" + _ '.' + _br + _ "Last crawl time: #{crawl_time} over #{sites.size} websites." + end + } + ) do + # Encapsulate data display (same for projects and podlings) + display_application(path_info, sites, analysis, checks_performed, cgi_for_tlps) + end + + _script %{ + var table = $(".table").stupidtable(); + table.on("aftertablesort", function (event, data) { + var th = $(this).find("th"); + th.find(".arrow").remove(); + var dir = $.fn.stupidtable.dir; + var arrow = data.direction === dir.ASC ? "↑" : "↓"; + th.eq(data.column).append('<span class="arrow">' + arrow +'</span>'); + }); + } end end -def displayProject(project, links, cols, analysis) +# Encapsulate (most) display of website checks between projects|podlings +# Display data for a single project's checks +# @param project id of project +# @param links site data for that specific project +# @param columns list of check types to report on +# @param analysis complete scan data +# @param tlp true if project (default); podling otherwise +def display_project(project, links, analysis, checks, tlp = true) _whimsy_panel_table( - title: "Site Check For Project - #{links['display_name']}", + title: "Site Check For #{tlp ? 'Project' : 'Podling'} - #{links['display_name']}", helpblock: -> { _a href: '../', aria_label: 'Home to site checker' do _span.glyphicon.glyphicon_home :aria_hidden @@ -146,8 +117,8 @@ def displayProject(project, links, cols, analysis) _th! 'Check Description' end end - cols.each do |col| - cls = label(analysis, links, col, project) + checks.keys.each do |col| + cls = SiteStandards.label(analysis, links, col, project) _tr do _td do _a col.capitalize, href: "../check/#{col}" @@ -162,14 +133,17 @@ def displayProject(project, links, cols, analysis) end _td do - if cls != SITE_PASS - if CHECKS.include? col - _ 'Expected to match regular expression: ' - _code CHECKS[col].source - if DOCS.include? col - _ ' ' - _a DOCS[col][1], href: DOCS[col][0] + if cls != SiteStandards::SITE_PASS + if checks.keys.include? col + if checks[col][SiteStandards::CHECK_TYPE] + _ 'URL expected to match regular expression: ' + _code checks[col][SiteStandards::CHECK_VALIDATE].source + else + _ 'Text of a link expected to match regular expression: ' + _code checks[col][SiteStandards::CHECK_TEXT].source end + _br + _a checks[col][SiteStandards::CHECK_DOC], href: checks[col][SiteStandards::CHECK_POLICY] else _ '' end @@ -182,185 +156,163 @@ def displayProject(project, links, cols, analysis) end end -def displayError(path) +# Display data for a single check across all projects/podlings +# @param col id of check to display +# @param sites site data for all projects +# @param analysis complete scan data +# @param checks complete set of checks performed +# @param tlp true if project (default); podling otherwise +def display_check(col, sites, analysis, checks, tlp = true) _whimsy_panel_table( - title: "ERROR", + title: "Site Check Of Type - #{col.capitalize}", helpblock: -> { _a href: '../', aria_label: 'Home to site checker' do _span.glyphicon.glyphicon_home :aria_hidden end _span.glyphicon.glyphicon_menu_right - _span.text_danger "ERROR: The path #{path} is not a recognized command for this tool, sorry! " + if checks.keys.include? col + if checks[col][SiteStandards::CHECK_TYPE] + _ 'Check Results URL expected to match regular expression: ' + _code checks[col][SiteStandards::CHECK_VALIDATE].source + else + _ 'Check Results Text of a link expected to match regular expression: ' + _code checks[col][SiteStandards::CHECK_TEXT].source + end + if checks.include? col + _br + _a checks[col][SiteStandards::CHECK_DOC], href: checks[col][SiteStandards::CHECK_POLICY] + end + _li.small " Click column badges to sort" + else + _span.text_danger "WARNING: the site checker may not understand type: #{col}, results may not be complete/available." + end } ) do - _p.bold 'ERROR - please try again.' + _table.table.table_condensed.table_striped do + _thead do + _tr do + _th! "#{tlp ? 'Project' : 'Podling'}", data_sort: 'string-ins' + _th! data_sort: 'string' do + _ 'Check Results' + _br + analysis[0][col].each do |cls, val| + _ ' ' + _span.label val, class: cls + end + end + end + end + _tbody do + sites.each do |n, links| + _tr do + _td do + _a links['display_name'], href: "../project/#{n}" + end + + if links[col] =~ /^https?:/ + _td class: SiteStandards.label(analysis, links, col, n) do + _a links[col], href: links[col] + end + else + _td links[col], class: SiteStandards.label(analysis, links, col, n) + end + end + end + end + end end end -_html do - _head do - _style %{ - .table td {font-size: smaller;} - } - end - _body? do - _whimsy_body( - title: PAGETITLE, - subtitle: 'Checking Project/Podling Websites For required content', - related: { - "/committers/tools" => "Whimsy Tool Listing", - "https://www.apache.org/foundation/marks/pmcs#navigation" => "Required PMC Links Policy", - "https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" => "See This Source Code", - "mailto:[email protected]?subject=[SITE] Website Checker Question" => "Questions? Email Whimsy PMC" - }, +# Display an overview of all checks/sites +# @param sites site data for all projects +# @param analysis complete scan data +# @param checks complete set of checks performed +# @param tlp true if project (default); podling otherwise +def display_overview(sites, analysis, checks, tlp = true) + _whimsy_panel_table( + title: "Site Check - All #{tlp ? 'Project' : 'Podling'} Results", helpblock: -> { - _p do - _ 'This script periodically crawls all Apache project and podling websites to check them for a few specific links or text blocks that all projects are expected to have.' - _ 'The checks include verifying that all ' - _a 'required links', href: 'https://www.apache.org/foundation/marks/pmcs#navigation' - _ ' appear on a project homepage, along with an "image" check if project logo files are in apache.org/img' - end - _p! do - _a 'View the crawler code', href: 'https://github.com/apache/whimsy/blob/master/tools/site-scan.rb' - _ ', ' - _a 'website display code', href: "https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" - _ ', and ' - _a 'raw JSON data', href: "#{DATAURI}#{SCAN_DATA_FILE}" - _ '.' - _br - _ "Last crawl time: #{crawl_time} over #{sites.size} websites." + _ul.list_inline do + _li.small "Data key: " + analysis[1].each do |cls, desc| + _li.label desc, class: cls + end + _li.small " Click column badges to sort" end } - ) do - - if path_info =~ %r{/project/(.+)} - # details for an individual project - project = $1 - if sites[project] - displayProject(project, sites[project], cols, analysis) - else - displayError(path_info) - end - elsif path_info =~ %r{/check/(.+)} - # details for a single check - col = $1 - _whimsy_panel_table( - title: "Site Check Of Type - #{col.capitalize}", - helpblock: -> { - _a href: '../', aria_label: 'Home to site checker' do - _span.glyphicon.glyphicon_home :aria_hidden - end - _span.glyphicon.glyphicon_menu_right - if CHECKS.include? col - _ ' Check Results are expected to match the regular expression: ' - _code CHECKS[col].source - if DOCS.include? col + ) do + _table.table.table_condensed.table_striped do + _thead do + _tr do + _th! "#{tlp ? 'Project' : 'Podling'}", data_sort: 'string-ins' + checks.keys.each do |col| + _th! data_sort: 'string' do + _a col.capitalize, href: "check/#{col}" + _br + analysis[0][col].each do |cls, val| _ ' ' - _a DOCS[col][1], href: DOCS[col][0] - end - _li.small " Click column badges to sort" - else - _span.text_danger "WARNING: the site checker may not understand type: #{col}, results may not be complete/available." - end - } - ) do - _table.table.table_condensed.table_striped do - _thead do - _tr do - _th! 'Project', data_sort: 'string-ins' - _th! data_sort: 'string' do - _ 'Check Results' - _br - analysis[0][col].each do |cls, val| - _ ' ' - _span.label val, class: cls - end - end - end - end - _tbody do - sites.each do |n, links| - _tr do - _td do - _a links['display_name'], href: "../project/#{n}" - end - - if links[col] =~ /^https?:/ - _td class: label(analysis, links, col, n) do - _a links[col], href: links[col] - end - else - _td links[col], class: label(analysis, links, col, n) - end - end + _span.label val, class: cls end end end end - else - # overview - _whimsy_panel_table( - title: "Site Check - All Project/Podling Results", - helpblock: -> { - _ul.list_inline do - _li.small "Data key: " - analysis[1].each do |cls, desc| - _li.label desc, class: cls - end - _li.small " Click column badges to sort" - end - } - ) do - _table.table.table_condensed.table_striped do - _thead do - _tr do - _th! 'Project', data_sort: 'string-ins' - cols.each do |col| - _th! data_sort: 'string' do - _a col.capitalize, href: "check/#{col}" - _br - analysis[0][col].each do |cls, val| - _ ' ' - _span.label val, class: cls - end - end - end - end + end + + sort_order = { + SiteStandards::SITE_PASS => 1, + SiteStandards::SITE_WARN => 2, + SiteStandards::SITE_FAIL => 3 + } + + _tbody do + sites.each do |n, links| + _tr do + _td do + _a "#{links['display_name']}", href: "project/#{n}" end - - sort_order = { - SITE_PASS => 1, - SITE_WARN => 2, - SITE_FAIL => 3 - } - - _tbody do - sites.each do |n, links| - _tr do - _td do - _a "#{links['display_name']}", href: "project/#{n}" - end - cols.each do |c| - cls = label(analysis, links, c, n) - _td '', class: cls, data_sort_value: sort_order[cls] - end - end - end + checks.keys.each do |c| + cls = SiteStandards.label(analysis, links, c, n) + _td '', class: cls, data_sort_value: sort_order[cls] end end - end # of _whimsy_panel_table + end end end - - _script %{ - var table = $(".table").stupidtable(); - table.on("aftertablesort", function (event, data) { - var th = $(this).find("th"); - th.find(".arrow").remove(); - var dir = $.fn.stupidtable.dir; - var arrow = data.direction === dir.ASC ? "↑" : "↓"; - th.eq(data.column).append('<span class="arrow">' + arrow +'</span>'); - }); + end +end + +# Display an error page if a suburl we're given isn't supported +def display_error(path) + _whimsy_panel_table( + title: "ERROR - bad url provided", + helpblock: -> { + _a href: '../', aria_label: 'Home to site checker' do + _span.glyphicon.glyphicon_home :aria_hidden + end + _span.glyphicon.glyphicon_menu_right + _span.text_danger "ERROR: The path #{path} is not a recognized command for this tool, sorry! " } + ) do + _a.bold 'ERROR - please try again.', href: '../' end end + +# Display our application's data - handles / and project|check/id paths +def display_application(path, sites, analysis, checks, tlp = true) + if path =~ %r{/project/(.+)} # Display a single project + if sites[$1] + display_project($1, sites[$1], analysis, checks, tlp) + else + display_error(path) + end + elsif path =~ %r{/check/(.+)} # Display a single check + if checks[$1] + display_check($1, sites, analysis, checks, tlp) + else + display_error(path) + end + else + display_overview(sites, analysis, checks, tlp) + end +end + -- To stop receiving notification emails like this one, please contact [email protected].
