imbajin commented on code in PR #452:
URL:
https://github.com/apache/incubator-hugegraph-doc/pull/452#discussion_r2791628517
##########
dist/validate-links.sh:
##########
@@ -1,63 +1,249 @@
#!/bin/bash
-# Configuration
CONTENT_DIR="content"
EXIT_CODE=0
+VERBOSE="${VERBOSE:-0}"
+
+log_verbose() {
+ [[ "$VERBOSE" == "1" ]] && echo "Info: $*"
+}
+
+
+ASSET_EXTENSIONS_REGEX='png|jpg|jpeg|svg|gif|webp|avif|ico|xml|yaml|yml|json|css|js|pdf|zip|tar.gz|woff|woff2|ttf|eot|mp4|webm'
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit 1
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" || exit 1
+CONTENT_ROOT="$(cd "$REPO_ROOT/$CONTENT_DIR" && pwd)" || exit 1
+
+if [[ ! -d "$CONTENT_ROOT" ]]; then
+ echo "Error: content directory not found. Run from repository root."
+ exit 1
+fi
+
+normalize_link() {
+ local link="$1"
+
+ # Decode common URL-encoded characters explicitly
+ link="${link//%20/ }" # space
+ link="${link//%23/#}" # hash
+ link="${link//%2F/\/}" # forward slash
+
+ # Generic percent-decoding for remaining cases
+ link="${link//%/\\x}"
+ link="$(printf '%b' "$link")"
+
+ link="${link%%#*}"
+ link="${link%%\?*}"
+
+ if [[ "$link" != "/" ]]; then
+ link="${link%/}"
+ fi
+
+ printf "%s" "$link"
+}
+
+canonicalize_path() {
+ local path="$1"
+ local result=()
+ local part
+
+ IFS='/' read -r -a parts <<< "$path"
+
+ for part in "${parts[@]}"; do
+ if [[ -z "$part" || "$part" == "." ]]; then
+ continue
+ elif [[ "$part" == ".." ]]; then
+ if [[ ${#result[@]} -gt 0 ]]; then
+ unset 'result[-1]'
+ fi
+ else
+ result+=("$part")
+ fi
+ done
+
+ if [[ ${#result[@]} -eq 0 ]]; then
+ printf "/"
+ else
+ ( IFS='/'; printf "/%s" "${result[*]}" )
+ fi
+}
+
+resolve_real_path() {
+ local path="$1"
+
+ if command -v python3 >/dev/null 2>&1; then
+ # Use Python to compute realpath which resolves symlinks AND
normalizes paths
+ # Python's os.path.realpath is tolerant of non-existent final targets
+ python3 - <<'PY' "$path"
+import os
+import sys
+p = sys.argv[1]
+print(os.path.realpath(p))
+PY
+ else
+ # Fallback: Normalize without symlink resolution if Python3 unavailable
+ # Note: This won't resolve symlinks, only normalize .. and . components
+ canonicalize_path "$path"
+ fi
+}
+
+check_internal_link() {
+ local link="$1"
+ local file="$2"
+ local line_no="$3"
+ local clean_link
+ local target_path
+ local location
+
+ clean_link="$(normalize_link "$link")"
+
+ [[ -z "$clean_link" || "$clean_link" == "#" ]] && return 0
+
+ if [[ "$clean_link" == "{{"* ]]; then
+ log_verbose "Skipping Hugo shortcode link: $link ($file:$line_no)"
+ return 0
+ fi
+
+ local clean_lower="${clean_link,,}"
+
+ if [[ "$clean_lower" == http://* || "$clean_lower" == https://* ||
"$clean_lower" == "//"* ]]; then
+ log_verbose "Skipping external link: $link ($file:$line_no)"
+ return 0
+ fi
+
+ case "$clean_lower" in
+ mailto:*|tel:*|javascript:*|data:*)
+ return 0
+ ;;
+ esac
+
+ if [[ "$clean_link" == /docs/* ]]; then
+ target_path="$CONTENT_ROOT/en${clean_link}"
+ elif [[ "$clean_link" == /cn/docs/* ]]; then
+ target_path="$CONTENT_ROOT${clean_link}"
+ elif [[ "$clean_link" == /* ]]; then
Review Comment:
⚠️ This branch silently skips validation for absolute internal paths outside
`/docs/*` and `/cn/docs/*`. That can hide broken internal links and make the
check non-deterministic. Consider failing these unknown absolute paths instead
of warning-and-skip.
```suggestion
elif [[ "$clean_link" == /* ]]; then
location="$file"
[[ -n "$line_no" ]] && location="$file:$line_no"
echo "Error: Unsupported absolute internal path (cannot validate
deterministically)"
echo " File: $location"
echo " Link: $link"
EXIT_CODE=1
return
```
##########
dist/validate-links.sh:
##########
@@ -1,63 +1,249 @@
#!/bin/bash
-# Configuration
CONTENT_DIR="content"
EXIT_CODE=0
+VERBOSE="${VERBOSE:-0}"
+
+log_verbose() {
+ [[ "$VERBOSE" == "1" ]] && echo "Info: $*"
+}
+
+
+ASSET_EXTENSIONS_REGEX='png|jpg|jpeg|svg|gif|webp|avif|ico|xml|yaml|yml|json|css|js|pdf|zip|tar.gz|woff|woff2|ttf|eot|mp4|webm'
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit 1
Review Comment:
‼️ The script currently continues after shell/runtime errors and may still
print `Link validation passed!`, which risks false positives. Please fail fast
so any parsing/runtime error immediately fails validation.
```suggestion
set -o errexit
set -o pipefail
```
##########
dist/validate-links.sh:
##########
@@ -1,63 +1,249 @@
#!/bin/bash
-# Configuration
CONTENT_DIR="content"
EXIT_CODE=0
+VERBOSE="${VERBOSE:-0}"
+
+log_verbose() {
+ [[ "$VERBOSE" == "1" ]] && echo "Info: $*"
+}
+
+
+ASSET_EXTENSIONS_REGEX='png|jpg|jpeg|svg|gif|webp|avif|ico|xml|yaml|yml|json|css|js|pdf|zip|tar.gz|woff|woff2|ttf|eot|mp4|webm'
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" || exit 1
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" || exit 1
+CONTENT_ROOT="$(cd "$REPO_ROOT/$CONTENT_DIR" && pwd)" || exit 1
+
+if [[ ! -d "$CONTENT_ROOT" ]]; then
+ echo "Error: content directory not found. Run from repository root."
+ exit 1
+fi
+
+normalize_link() {
+ local link="$1"
+
+ # Decode common URL-encoded characters explicitly
+ link="${link//%20/ }" # space
+ link="${link//%23/#}" # hash
+ link="${link//%2F/\/}" # forward slash
+
+ # Generic percent-decoding for remaining cases
+ link="${link//%/\\x}"
+ link="$(printf '%b' "$link")"
+
+ link="${link%%#*}"
+ link="${link%%\?*}"
+
+ if [[ "$link" != "/" ]]; then
+ link="${link%/}"
+ fi
+
+ printf "%s" "$link"
+}
+
+canonicalize_path() {
+ local path="$1"
+ local result=()
+ local part
+
+ IFS='/' read -r -a parts <<< "$path"
+
+ for part in "${parts[@]}"; do
+ if [[ -z "$part" || "$part" == "." ]]; then
+ continue
+ elif [[ "$part" == ".." ]]; then
+ if [[ ${#result[@]} -gt 0 ]]; then
+ unset 'result[-1]'
+ fi
+ else
+ result+=("$part")
+ fi
+ done
+
+ if [[ ${#result[@]} -eq 0 ]]; then
+ printf "/"
+ else
+ ( IFS='/'; printf "/%s" "${result[*]}" )
+ fi
+}
+
+resolve_real_path() {
+ local path="$1"
+
+ if command -v python3 >/dev/null 2>&1; then
+ # Use Python to compute realpath which resolves symlinks AND
normalizes paths
+ # Python's os.path.realpath is tolerant of non-existent final targets
+ python3 - <<'PY' "$path"
+import os
+import sys
+p = sys.argv[1]
+print(os.path.realpath(p))
+PY
+ else
+ # Fallback: Normalize without symlink resolution if Python3 unavailable
+ # Note: This won't resolve symlinks, only normalize .. and . components
+ canonicalize_path "$path"
+ fi
+}
+
+check_internal_link() {
+ local link="$1"
+ local file="$2"
+ local line_no="$3"
+ local clean_link
+ local target_path
+ local location
+
+ clean_link="$(normalize_link "$link")"
+
+ [[ -z "$clean_link" || "$clean_link" == "#" ]] && return 0
+
+ if [[ "$clean_link" == "{{"* ]]; then
+ log_verbose "Skipping Hugo shortcode link: $link ($file:$line_no)"
+ return 0
+ fi
+
+ local clean_lower="${clean_link,,}"
+
+ if [[ "$clean_lower" == http://* || "$clean_lower" == https://* ||
"$clean_lower" == "//"* ]]; then
+ log_verbose "Skipping external link: $link ($file:$line_no)"
+ return 0
+ fi
+
+ case "$clean_lower" in
+ mailto:*|tel:*|javascript:*|data:*)
+ return 0
+ ;;
+ esac
+
+ if [[ "$clean_link" == /docs/* ]]; then
+ target_path="$CONTENT_ROOT/en${clean_link}"
+ elif [[ "$clean_link" == /cn/docs/* ]]; then
+ target_path="$CONTENT_ROOT${clean_link}"
+ elif [[ "$clean_link" == /* ]]; then
+ # Skip validation for ambiguous absolute paths (Hugo runtime URLs)
+ location="$file"
+ [[ -n "$line_no" ]] && location="$file:$line_no"
+ echo "Warning: Skipping validation for ambiguous absolute path"
+ echo " File: $location"
+ echo " Link: $link"
+ echo " Reason: Hugo runtime URL (not directly mappable to filesystem)"
+ return 0
+ else
+ local file_dir
+ file_dir="$(cd "$(dirname "$file")" && pwd)"
+ target_path="$file_dir/$clean_link"
+ fi
+
+ target_path="$(canonicalize_path "$target_path")"
+ target_path="$(resolve_real_path "$target_path")"
+
+ case "$target_path" in
+ "$CONTENT_ROOT"/*) ;;
+ *)
+ location="$file"
+ [[ -n "$line_no" ]] && location="$file:$line_no"
+ echo "Error: Link resolves outside content directory"
+ echo " File: $location"
+ echo " Link: $link"
+ EXIT_CODE=1
+ return
+ ;;
+ esac
+
+ if [[ "$clean_lower" =~ \.(${ASSET_EXTENSIONS_REGEX})$ ]]; then
+ if [[ -f "$target_path" ]]; then
+ return 0
+ else
+ location="$file"
+ [[ -n "$line_no" ]] && location="$file:$line_no"
+ echo "Error: Broken link"
+ echo " File: $location"
+ echo " Link: $link"
+ echo " Target: $target_path"
+ EXIT_CODE=1
+ return
+ fi
+ fi
+
+ if [[ -f "$target_path" || -f "$target_path.md" || -f
"$target_path/_index.md" || -f "$target_path/README.md" ]]; then
+ return 0
+ fi
+
+ location="$file"
+ [[ -n "$line_no" ]] && location="$file:$line_no"
+
+ echo "Error: Broken link"
+ echo " File: $location"
+ echo " Link: $link"
+ echo " Target: $target_path"
+ EXIT_CODE=1
+}
+
echo "Starting link validation..."
-# Find all markdown files and verify links
while read -r FILE; do
- # Extract internal links starting with /docs/ or /cn/docs/
- # We look for [text](url) pattern where url starts with /docs/ or /cn/docs/
- # Using grep to find all matching links in the file
- while read -r MATCH; do
- if [ -z "$MATCH" ]; then continue; fi
+ declare -A CODE_LINES
Review Comment:
‼️ Bash 4-only features are used in this script (for example `declare -A`
here and `${clean_link,,}` below). On macOS default Bash 3.2 this fails at
runtime and can still produce a false "passed" result. Please either enforce
Bash 4+ explicitly or rewrite these parts with Bash 3-compatible syntax.
```suggestion
if (( BASH_VERSINFO[0] < 4 )); then
echo "Error: dist/validate-links.sh requires Bash 4+ (found
${BASH_VERSION})"
exit 2
fi
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]