aaltay commented on a change in pull request #11554:
URL: https://github.com/apache/beam/pull/11554#discussion_r422428122



##########
File path: website/www/check-links.sh
##########
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+
+function redraw_progress_bar { # int barsize, int base, int current, int top
+    # Source: https://stackoverflow.com/a/20311674
+    local barsize=$1
+    local base=$2
+    local current=$3
+    local top=$4
+    local j=0
+    local progress=$(( (barsize * (current - base)) / (top - base ) ))
+    echo -n "["
+    for ((j=0; j < progress; j++)) ; do echo -n '='; done
+    echo -n '=>'
+    for ((j=progress; j < barsize ; j++)) ; do echo -n ' '; done
+    echo -n "] $current / $top " $'\r'
+}
+
+if ! command -v lynx; then
+    echo "This script requires lynx to work properly."
+    echo
+    echo "For more information, look at: http://lynx.browser.org/";
+    exit

Review comment:
       exit with an error?

##########
File path: website/www/check-links.sh
##########
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+
+function redraw_progress_bar { # int barsize, int base, int current, int top
+    # Source: https://stackoverflow.com/a/20311674
+    local barsize=$1
+    local base=$2
+    local current=$3
+    local top=$4
+    local j=0
+    local progress=$(( (barsize * (current - base)) / (top - base ) ))
+    echo -n "["
+    for ((j=0; j < progress; j++)) ; do echo -n '='; done
+    echo -n '=>'
+    for ((j=progress; j < barsize ; j++)) ; do echo -n ' '; done
+    echo -n "] $current / $top " $'\r'
+}
+
+if ! command -v lynx; then
+    echo "This script requires lynx to work properly."
+    echo
+    echo "For more information, look at: http://lynx.browser.org/";
+    exit
+fi
+
+MY_DIR="$(cd "$(dirname "$0")" && pwd)"
+pushd "${MY_DIR}" &>/dev/null || exit 1
+
+echo "Working directory: ${MY_DIR}"
+
+DIST_DIR=${1:-"./dist"}
+echo "Dist directory: ${DIST_DIR}"
+
+echo ""
+
+if [[ ! -f "${DIST_DIR}/index.html" ]]; then
+   echo "You should build website first."
+   exit 1
+fi
+
+mkdir -pv "${DIST_DIR}"
+
+readarray -d '' pages < <(find "${DIST_DIR}" -name '*.html' -print0)
+echo "Found ${#pages[@]} HTML files."
+
+echo "Searching links."
+mapfile -t links < <(printf '%s\n' "${pages[@]}" | xargs -n 1 lynx -listonly 
-nonumbers -dump -display_charset=iso-8859-1 | grep -v " ")
+mapfile -t external_links < <(printf '%s\n' "${links[@]}" | grep "^https\?://" 
| grep -v "http://localhost"; | grep -v "http://link/"; | grep -v 
"http://docker.local"; | grep -v 
"https://github.com/apache/beam/edit/master/website/www/site/content/"; | sort | 
uniq)
+echo "Found ${#links[@]} links including ${#external_links[@]} unique external 
links."
+
+echo "Checking links."
+invalid_links=()
+i=1
+for external_link in "${external_links[@]}"
+do
+    redraw_progress_bar 50 1 $i ${#external_links[@]}
+
+    if ! curl -sSfL --max-time 60 --connect-timeout 30 --retry 3 -4 
"${external_link}" > /dev/null ; then
+        invalid_links+=("${external_link}")
+        echo "${external_link}"

Review comment:
       We will echo the same thing in L87. Maybe we can remove this duplication.

##########
File path: website/www/check-links.sh
##########
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+
+function redraw_progress_bar { # int barsize, int base, int current, int top
+    # Source: https://stackoverflow.com/a/20311674
+    local barsize=$1
+    local base=$2
+    local current=$3
+    local top=$4
+    local j=0
+    local progress=$(( (barsize * (current - base)) / (top - base ) ))
+    echo -n "["
+    for ((j=0; j < progress; j++)) ; do echo -n '='; done
+    echo -n '=>'
+    for ((j=progress; j < barsize ; j++)) ; do echo -n ' '; done
+    echo -n "] $current / $top " $'\r'
+}
+
+if ! command -v lynx; then
+    echo "This script requires lynx to work properly."
+    echo
+    echo "For more information, look at: http://lynx.browser.org/";
+    exit
+fi
+
+MY_DIR="$(cd "$(dirname "$0")" && pwd)"
+pushd "${MY_DIR}" &>/dev/null || exit 1
+
+echo "Working directory: ${MY_DIR}"
+
+DIST_DIR=${1:-"./dist"}
+echo "Dist directory: ${DIST_DIR}"
+
+echo ""
+
+if [[ ! -f "${DIST_DIR}/index.html" ]]; then
+   echo "You should build website first."
+   exit 1
+fi
+
+mkdir -pv "${DIST_DIR}"
+
+readarray -d '' pages < <(find "${DIST_DIR}" -name '*.html' -print0)
+echo "Found ${#pages[@]} HTML files."
+
+echo "Searching links."
+mapfile -t links < <(printf '%s\n' "${pages[@]}" | xargs -n 1 lynx -listonly 
-nonumbers -dump -display_charset=iso-8859-1 | grep -v " ")
+mapfile -t external_links < <(printf '%s\n' "${links[@]}" | grep "^https\?://" 
| grep -v "http://localhost"; | grep -v "http://link/"; | grep -v 
"http://docker.local"; | grep -v 
"https://github.com/apache/beam/edit/master/website/www/site/content/"; | sort | 
uniq)
+echo "Found ${#links[@]} links including ${#external_links[@]} unique external 
links."
+
+echo "Checking links."
+invalid_links=()
+i=1
+for external_link in "${external_links[@]}"
+do
+    redraw_progress_bar 50 1 $i ${#external_links[@]}
+
+    if ! curl -sSfL --max-time 60 --connect-timeout 30 --retry 3 -4 
"${external_link}" > /dev/null ; then
+        invalid_links+=("${external_link}")
+        echo "${external_link}"
+    fi
+    i=$((i+1))
+done
+# Clear line - hide progress bar
+echo -n -e "\033[2K"
+
+
+if [[ ${#invalid_links[@]} -ne 0 ]]; then
+    echo "Found ${#invalid_links[@]} invalid links: "
+    printf '%s\n' "${invalid_links[@]}"
+else
+    echo "All links work"
+fi
+
+popd &>/dev/null || exit 1

Review comment:
       what does popd &>/dev/null  do ?

##########
File path: website/www/check-links.sh
##########
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+
+function redraw_progress_bar { # int barsize, int base, int current, int top

Review comment:
       Do we really need this graphical thing? Most of these runs will be in 
jenkins.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to