This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch HDDS-9225-website-v2
in repository https://gitbox.apache.org/repos/asf/ozone-site.git


The following commit(s) were added to refs/heads/HDDS-9225-website-v2 by this 
push:
     new 3073baf7 HDDS-10353. Add GitHub Actions check of all generated URLs in 
the sitemap. (#77)
3073baf7 is described below

commit 3073baf7c0aca03cf17aa811eabfdd18f931ff7d
Author: Ethan Rose <[email protected]>
AuthorDate: Thu Feb 22 11:05:13 2024 -0800

    HDDS-10353. Add GitHub Actions check of all generated URLs in the sitemap. 
(#77)
---
 .github/scripts/url.sh           | 47 ++++++++++++++++++++++++++++++++++++++++
 .github/workflows/docusaurus.yml | 16 ++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/.github/scripts/url.sh b/.github/scripts/url.sh
new file mode 100755
index 00000000..ac8df495
--- /dev/null
+++ b/.github/scripts/url.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env sh
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Checks that all URLs contained in the sitemap.xml file from the build 
contain only allowed characters.
+# All URLs on the Ozone site should use kebab-case with lowercase letters.
+
+sitemap="$1"
+
+if [ -z "$sitemap" ]; then
+    echo "Requires path to sitemap.xml file as the first argument." 1>&2
+    exit 1
+fi
+
+# Update this to 'https://ozone\.apache\.org/' when the website is published.
+base_url_regex='https://ozone-site-v2\.staged\.apache\.org/'
+valid_url_regex="$base_url_regex[a-z0-9][a-z0-9\./-]*[a-z0-9/]"
+
+rc=0
+
+for url in $(yq --output-format=props '.urlset.url[].loc' "$sitemap"); do
+    if echo "$url" | grep -vx "$base_url_regex" | grep -Exvq 
"$valid_url_regex"; then
+        echo "Disallowed URL format: $url" 1>&2
+        rc=1
+    fi
+done
+
+if [ "$rc" != 0 ]; then
+    echo "Found URLs in the build that do not match the allowed URL regex: 
$valid_url_regex" 1>&2
+    echo "All URLs should use kebab case and lowercase letters." 1>&2
+fi
+
+exit "$rc"
diff --git a/.github/workflows/docusaurus.yml b/.github/workflows/docusaurus.yml
index 54257741..1b2bdf89 100644
--- a/.github/workflows/docusaurus.yml
+++ b/.github/workflows/docusaurus.yml
@@ -82,6 +82,22 @@ jobs:
         path: build
         if-no-files-found: error
         retention-days: 1
+  check-sitemap:
+    needs:
+    - build-website
+    runs-on: ubuntu-latest
+    steps:
+    - name: Get URL check script
+      uses: actions/checkout@v4
+    - name: Download website build artifact
+      uses: actions/download-artifact@v4
+      with:
+        name: build
+        path: build
+    - name: Check sitemap.xml
+      working-directory: ${{ env.script_dir }}
+      run: |
+        ./url.sh "$GITHUB_WORKSPACE"/build/sitemap.xml
   run-website:
     needs:
     - build-website


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to