This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 45eb97cbb9 TIKA-4743 -- fix search result links (#2860)
45eb97cbb9 is described below

commit 45eb97cbb94f1246df31d071400201795a34584b
Author: Tim Allison <[email protected]>
AuthorDate: Mon Jun 1 09:51:17 2026 -0400

    TIKA-4743 -- fix search result links (#2860)
---
 docs/publish-docs.sh | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/docs/publish-docs.sh b/docs/publish-docs.sh
index e7e353506c..217fd793a1 100755
--- a/docs/publish-docs.sh
+++ b/docs/publish-docs.sh
@@ -61,6 +61,22 @@ if [[ ! -d target/site ]]; then
     exit 1
 fi
 
+# Run sed and replace $output atomically. The plain 'sed IN > OUT' form
+# truncates OUT before sed runs, so a missing input or sed failure leaves an
+# empty file behind; this writes to OUT.tmp first and only renames on success.
+# Important for PUBLISH_DIR/search-index.js, which persists across runs (a
+# corrupted one would stay corrupted until the next successful publish).
+sed_atomic() {
+    local script="$1" input="$2" output="$3"
+    if [[ ! -f "${input}" ]]; then
+        echo "${input} not found." >&2
+        echo "Re-run the docs build: cd .. && ./mvnw package -pl docs" >&2
+        exit 1
+    fi
+    sed "${script}" "${input}" > "${output}.tmp"
+    mv "${output}.tmp" "${output}"
+}
+
 mkdir -p "${DOCS_DIR}"
 
 # Strip the 'tika/' component dir prefix so URLs are /docs/X.Y.Z/...
@@ -77,12 +93,17 @@ fi
 rm -rf "${PUBLISH_DIR}/_"
 cp -r target/site/_ "${PUBLISH_DIR}/_"
 # Fix the root redirect and sitemap to match the flattened layout
-sed 's|tika/||g' target/site/index.html > "${DOCS_DIR}/index.html"
-sed 's|/docs/tika/|/docs/|g' target/site/sitemap.xml > 
"${DOCS_DIR}/sitemap.xml"
+sed_atomic 's|tika/||g' target/site/index.html "${DOCS_DIR}/index.html"
+sed_atomic 's|/docs/tika/|/docs/|g' target/site/sitemap.xml 
"${DOCS_DIR}/sitemap.xml"
 cp target/site/404.html "${DOCS_DIR}/"
 # Lunr index lives next to _/ (one level above docs/), since HTML uses 
../../search-index.js.
 # Remove the stale copy from its old publish/docs/ location left by earlier 
runs.
 rm -f "${DOCS_DIR}/search-index.js"
-cp target/site/search-index.js "${PUBLISH_DIR}/"
+# Rewrite URLs in the search index from /tika/X.Y.Z/... (Antora's component-
+# prefixed publish path) to /docs/X.Y.Z/... (the deployed layout). The HTML
+# pages and sitemap.xml above are similarly flattened; without this rewrite,
+# clicking a search result lands on https://tika.apache.org/tika/... which
+# 404s. See TIKA-4743.
+sed_atomic 's|"url":"/tika/|"url":"/docs/|g' target/site/search-index.js 
"${PUBLISH_DIR}/search-index.js"
 
 echo "Published to: ${DOCS_DIR}/"

Reply via email to