This is an automated email from the ASF dual-hosted git repository.
jeffreyh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push:
new 2d796a1519d Fixed the issue that the broken link detection did not
detect community documents (#2571)
2d796a1519d is described below
commit 2d796a1519dd569ca6ad349cb377e2696c469565
Author: yangon <[email protected]>
AuthorDate: Mon Jun 30 19:43:36 2025 +0800
Fixed the issue that the broken link detection did not detect community
documents (#2571)
---
.github/workflows/build-check.yml | 14 +++---
check_move_global.py | 37 +++++++++++-----
scripts/check_move.js | 90 +++++++++++++++++++++++++++++++++++++++
3 files changed, 126 insertions(+), 15 deletions(-)
diff --git a/.github/workflows/build-check.yml
b/.github/workflows/build-check.yml
index ec37fe1a1b8..399dbd5d073 100644
--- a/.github/workflows/build-check.yml
+++ b/.github/workflows/build-check.yml
@@ -41,6 +41,11 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: '3.9'
+
+ - name: Use Node.js
+ uses: actions/setup-node@v3
+ with:
+ node-version: 20
- name: Refactor PR commits and check move
run: |
@@ -49,6 +54,10 @@ jobs:
git reset --soft origin/master
git commit -m "All this PR changed(for test)"
python check_move.py `git rev-parse HEAD`
+
+ - name: Run Node.js dead link checker
+ run: |
+ node ./scripts/check_move.js `git rev-parse HEAD`
- name: Check exit code
run: |
@@ -56,11 +65,6 @@ jobs:
echo "Have detected not processed link changes, please fix
them."
exit 1
fi
-
- - name: Use Node.js
- uses: actions/setup-node@v3
- with:
- node-version: 20
- name: Build
run: |
diff --git a/check_move_global.py b/check_move_global.py
index 57be298346c..13ea19ea6d5 100644
--- a/check_move_global.py
+++ b/check_move_global.py
@@ -1,3 +1,17 @@
+# Detect global dead links
+#
+# Core logic:
+# Traverse all documents, match the links in the documents, and determine
whether it is a dead link by the link address;
+# if it is a dead link, try to fix it. If the fix fails, it will print: ❌
xxxx/xxxx.md: Could not fix broken link ${target_link};
+# if the fix is successful, it will print: 🛠️ xxxx/xxxx.md: Fixed broken
link ${dead_link} -> ${link}
+#
+# Repair the logic of broken links:
+# Traverse all the documents in the directory with the current broken link
layer by layer to see if the document name is consistent with the document name
in the broken link.
+# If they are consistent, the current directory is considered to be the
correct directory of the broken link.
+# The above situation is the case where the original link document directory
has been migrated. If the document is deleted, the correction will fail.
+#
+# Absolute paths or broken links starting with http/https cannot be judged
+
import argparse
import subprocess
import re
@@ -9,7 +23,7 @@ from urllib.parse import urlparse
move_pairs = []
deletes = []
change_detected = False
-search_dirs = ["docs", "i18n", "versioned_docs"]
+search_dirs = ["docs", "i18n", "versioned_docs", "community"]
def is_same_file(path1, path2):
return os.path.normpath(path1) == os.path.normpath(path2)
@@ -21,10 +35,10 @@ def remove_suffix(text: str, suffix: str):
def find_nearest_file(file_base, start_dir):
"""
- 在 start_dir 向上查找最近的 file_base(.md/.mdx),否则全局搜索
+ Look for the nearest file_base (.md/.mdx) in start_dir upwards, otherwise
search globally
"""
cur_dir = start_dir
- # 向上搜索最多 10 层,避免卡死
+ # Search up to 10 levels upwards to avoid stuck
for _ in range(10):
for ext in [".md", ".mdx"]:
candidate = os.path.join(cur_dir, file_base + ext)
@@ -35,7 +49,7 @@ def find_nearest_file(file_base, start_dir):
break
cur_dir = parent
- # 全局搜索
+ # Global Search
for base_dir in search_dirs:
for root, dirs, files in os.walk(base_dir):
for file in files:
@@ -59,7 +73,7 @@ def process_md_file(file_path):
if not full_path.endswith(".md") and not
full_path.endswith(".mdx"):
full_path += ".md"
- # 处理 rename 情况
+ # Handling rename situations
for [from_path, to_path] in move_pairs:
from_base, from_ext = os.path.splitext(from_path)
to_base, to_ext = os.path.splitext(to_path)
@@ -74,15 +88,15 @@ def process_md_file(file_path):
new_content = new_content.replace(f"({link})",
f"({relative_to_path})")
change_detected = True
- # 处理 delete 情况
+ # Handling delete cases
for deleted_path in deletes:
if is_same_file(full_path, deleted_path):
print(f"⚠️ {file_path}: Link to deleted file {link}")
change_detected = True
- # 处理死链修复
+ # Dealing with broken link repair
if not os.path.exists(full_path):
- # 说明当前 link 是坏的
+ # Indicates that the current link is broken
file_base = os.path.basename(link)
file_base = remove_suffix(file_base, ".md")
file_base = remove_suffix(file_base, ".mdx")
@@ -92,11 +106,14 @@ def process_md_file(file_path):
relative_to_path = os.path.relpath(found_path,
os.path.dirname(file_path))
relative_to_path = remove_suffix(relative_to_path, ".md")
relative_to_path = remove_suffix(relative_to_path, ".mdx")
- print(f"🛠️ {file_path}: Fixed broken link {link} ->
{relative_to_path}")
+ if "version-1.2" not in file_path and "version-2.0" not in
file_path:
+ print(f"🛠️ {file_path}: Fixed broken link {link} ->
{relative_to_path}")
+
new_content = new_content.replace(f"({link})",
f"({relative_to_path})")
change_detected = True
else:
- print(f"❌ {file_path}: Could not fix broken link {link}")
+ if "version-1.2" not in file_path and "version-2.0" not in
file_path:
+ print(f"❌ {file_path}: Could not fix broken link
{link}")
change_detected = True
if new_content != content:
diff --git a/scripts/check_move.js b/scripts/check_move.js
new file mode 100644
index 00000000000..0ad15df06b2
--- /dev/null
+++ b/scripts/check_move.js
@@ -0,0 +1,90 @@
+#!/usr/bin/env node
+
+const { execSync } = require("child_process");
+const fs = require("fs");
+const path = require("path");
+
+const commitHash = process.argv[2];
+
+if (!commitHash) {
+ console.error("❌ Please provide the commit hash, such as: node
check-dead-links.js <commit-hash>");
+ process.exit(1);
+}
+
+const linkRegex = /\[.*?\]\((.*?)\)/g;
+let hasBrokenLinks = false;
+
+// Get the modified or newly added .md/.mdx files in the commit
+function getModifiedMarkdownFiles(commit) {
+ const output = execSync(`git show --name-status ${commit}`, { encoding:
"utf-8" });
+ const lines = output.split("\n");
+ const files = [];
+
+ for (const line of lines) {
+ const parts = line.trim().split(/\s+/);
+ if (parts.length === 2) {
+ const [status, filePath] = parts;
+ if ((status === "A" || status === "M") && (filePath.endsWith(".md") ||
filePath.endsWith(".mdx"))) {
+ files.push(filePath);
+ }
+ }
+ }
+
+ return files;
+}
+
+// Checks if the link points to an existing local file
+function isLocalLink(link) {
+ return !link.startsWith("http://") &&
+ !link.startsWith("https://") &&
+ !link.startsWith("mailto:") &&
+ !link.startsWith("#") &&
+ !path.isAbsolute(link);
+}
+
+// Check links in files
+function checkFileLinks(filePath) {
+ const content = fs.readFileSync(filePath, "utf-8");
+ const dir = path.dirname(filePath);
+ const matches = [...content.matchAll(linkRegex)];
+
+ for (const match of matches) {
+ const rawLink = match[1].split("#")[0]; // Remove anchor point
+ if (!isLocalLink(rawLink)) continue;
+
+ let fullPath = path.resolve(dir, rawLink);
+ if (!fs.existsSync(fullPath)) {
+ // Try adding a .md/.mdx suffix and try again
+ if (fs.existsSync(fullPath + ".md")) continue;
+ if (fs.existsSync(fullPath + ".mdx")) continue;
+
+ console.error(`❌ ${filePath}: Broken link -> ${rawLink}`);
+ hasBrokenLinks = true;
+ }
+ }
+}
+
+// Main function
+function main() {
+ const files = getModifiedMarkdownFiles(commitHash);
+ if (files.length === 0) {
+ console.log("✅ Unmodified Markdown files");
+ return;
+ }
+
+ for (const file of files) {
+ if (fs.existsSync(file)) {
+ checkFileLinks(file);
+ }
+ }
+
+
+ if (hasBrokenLinks) {
+ console.error("❗ A broken link was detected. Please fix it and submit.");
+ process.exit(1);
+ } else {
+ console.log("✅ All links are OK");
+ }
+}
+
+main();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]