This is an automated email from the ASF dual-hosted git repository.
skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 619bc3e Remove regression checks for website links (#12507)
619bc3e is described below
commit 619bc3ea3c9093b72634d16e91596b3a65f3f1fc
Author: Sandeep Krishnamurthy <[email protected]>
AuthorDate: Wed Sep 12 08:45:54 2018 -0700
Remove regression checks for website links (#12507)
* Remove regression checks for website links
* Add redirection ignore regex
---
.../broken_link_checker_test/JenkinsfileForBLC | 4 --
tests/nightly/broken_link_checker_test/README.md | 5 +--
.../broken_link_checker.sh | 3 --
.../broken_link_checker_test/check_regression.sh | 46 ----------------------
.../broken_link_checker_test/test_broken_links.py | 6 ++-
5 files changed, 5 insertions(+), 59 deletions(-)
diff --git a/tests/nightly/broken_link_checker_test/JenkinsfileForBLC
b/tests/nightly/broken_link_checker_test/JenkinsfileForBLC
index 782bf74..4c3f053 100755
--- a/tests/nightly/broken_link_checker_test/JenkinsfileForBLC
+++ b/tests/nightly/broken_link_checker_test/JenkinsfileForBLC
@@ -34,11 +34,7 @@ core_logic: {
timeout(time: 60, unit: 'MINUTES') {
try {
utils.init_git()
- sh 'aws s3 cp s3://mxnet-ci-prod-slave-data/url_list.txt
./tests/nightly/broken_link_checker_test/url_list.txt'
utils.docker_run('ubuntu_blc', 'broken_link_checker', false)
- } finally {
- sh "echo Storing the new url_list.txt to S3 bucket"
- sh 'aws s3 cp
./tests/nightly/broken_link_checker_test/url_list.txt
s3://mxnet-ci-prod-slave-data/url_list.txt'
}
}
}
diff --git a/tests/nightly/broken_link_checker_test/README.md
b/tests/nightly/broken_link_checker_test/README.md
index a925d1b..c39abd0 100755
--- a/tests/nightly/broken_link_checker_test/README.md
+++ b/tests/nightly/broken_link_checker_test/README.md
@@ -1,6 +1,6 @@
# Broken link checker test
-This folder contains the scripts that are required to run the nightly job of
checking the broken links. The job also checks whether the link that were
published before are still accessible.
+This folder contains the scripts that are required to run the nightly job of
checking the broken links.
## JenkinsfileForBLC
This is configuration file for jenkins job.
@@ -8,6 +8,3 @@ This is configuration file for jenkins job.
## Details
The `broken_link_checker.sh` is a top level script that invokes the
`test_broken_links.py` and `check_regression.sh` scripts.
The `test_broken_links.py` invokes broken link checker tool (blc) from nodeJs
and reports the list of URLs that are not accessible.
-The `check_regression.sh` scripts downloads the file `url_list.txt` that
contains links that are publicly accessible from s3 bucket
-The scripts merges this list with the output of `test_broken_links.py` and
checks whether all those links are accessible using 'curl' command.
-The updated `url_list.txt` is uploaded to s3 bucket.
diff --git a/tests/nightly/broken_link_checker_test/broken_link_checker.sh
b/tests/nightly/broken_link_checker_test/broken_link_checker.sh
index 2107c96..450cd65 100755
--- a/tests/nightly/broken_link_checker_test/broken_link_checker.sh
+++ b/tests/nightly/broken_link_checker_test/broken_link_checker.sh
@@ -28,6 +28,3 @@ echo `pwd`
echo "Running test_broken_links.py"
python test_broken_links.py
-
-echo "Running check_regression.sh"
-./check_regression.sh
diff --git a/tests/nightly/broken_link_checker_test/check_regression.sh
b/tests/nightly/broken_link_checker_test/check_regression.sh
deleted file mode 100755
index c21577f..0000000
--- a/tests/nightly/broken_link_checker_test/check_regression.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#! /bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-echo "Running the check_regression.sh script"
-cat blc_output.txt | uniq | grep -Eo "(http|https).* " | sort| uniq >
unique_current_urls.txt
-
-cat url_list.txt unique_current_urls.txt | sort | uniq > new_url_list.txt
-regression=false
-while IFS= read -r line
-do
- err=$(curl -Is $line | head -n 1 | grep 404)
- if [ "$err" ]; then
- if [ "$regression" = false ] ; then
- echo "FAIL: REGRESSION"
- regression=true
- fi
- echo "BROKEN $line $err"
- fi
- unset err
-done < new_url_list.txt
-mv new_url_list.txt url_list.txt
-rm -rf unique_current_urls.txt
-rm -rf blc_output.txt
-if [ $regression ]; then
- echo "FAIL: Found Regression in broken link checker"
- exit 1
-else
- echo "SUCCESS: No Regression found"
-fi
diff --git a/tests/nightly/broken_link_checker_test/test_broken_links.py
b/tests/nightly/broken_link_checker_test/test_broken_links.py
index 593e008..b1cbac7 100755
--- a/tests/nightly/broken_link_checker_test/test_broken_links.py
+++ b/tests/nightly/broken_link_checker_test/test_broken_links.py
@@ -31,6 +31,8 @@ def prepare_link_test_result(command_output):
# Whitelisted broken links patterns
HTTP_403_REGEX = "(HTTP_403)"
HTTP_401_REGEX = "(HTTP_401)"
+ HTTP_409_REGEX = "(HTTP_409)"
+ HTTP_3XX_REGEX = "(HTTP_3"
BLC_UNKNOWN_REGEX = "(BLC_UNKNOWN)"
HTTP_UNDEFINED = "HTTP_undefined"
FALSE_SCALA_API_DOC_LINK = "java$lang.html"
@@ -53,8 +55,8 @@ def prepare_link_test_result(command_output):
current_page_broken_links = ""
if line.find(BROKEN_PAGE_START_REGEX) != -1:
- # Skip (401, 403, unknown issues)
- if HTTP_403_REGEX not in line and HTTP_401_REGEX not in line and
BLC_UNKNOWN_REGEX not in line and HTTP_UNDEFINED not in line and
FALSE_SCALA_API_DOC_LINK not in line and FALSE_SCALA_API_DEPRECATED_LINK not in
line and FALSE_PAPER_LINK not in line:
+ # Skip (401, 403, 409, unknown issues)
+ if HTTP_403_REGEX not in line and HTTP_401_REGEX not in line and
HTTP_409_REGEX not in line and HTTP_3XX_REGEX not in line and BLC_UNKNOWN_REGEX
not in line and HTTP_UNDEFINED not in line and FALSE_SCALA_API_DOC_LINK not in
line and FALSE_SCALA_API_DEPRECATED_LINK not in line and FALSE_PAPER_LINK not
in line:
current_page_broken = True
current_page_broken_links +=
line.split(BROKEN_PAGE_START_REGEX)[1] + "\n"