This is an automated email from the ASF dual-hosted git repository.

moonming pushed a commit to branch feat/seo-prune-doc-versions
in repository https://gitbox.apache.org/repos/asf/apisix-website.git

commit 942f017eaa022855681c775c11822d3e19d8e257
Author: Ming Wen <[email protected]>
AuthorDate: Mon Jun 22 11:37:59 2026 +0800

    fix(seo): publish only the latest version of each sub-project's docs
    
    The docs sync pulled *every* release branch/tag for the non-apisix
    sub-projects (ingress-controller, helm-chart, docker, plugin runners), so 
the
    sitemap carried ~800 sub-project URLs including ancient versions (ingress
    0.4.0-2.0.0, docker apisix-2.10.x) and their thin /tags/ pages. apisix 
itself
    is already curated via config/apisix-versions.js; the sub-projects were not.
    
    - sync-docs.js: keep only the newest released version of each sub-project
      (SUBPROJECT_VERSIONS_TO_KEEP=1; bump for a wider window). The latest is
      served unversioned at /docs/<project>/ and indexed; 'next' stays
      robots-disallowed. Old versions remain in each project's source repo.
    - update-sitemap-loc.js: the version-exclusion regex only matched 2-part
      versions (apisix 3.14); it missed 3-part semver (ingress 2.0.0) and
      prefixed (docker apisix-2.10.0), which is why sub-project versioned docs
      leaked into the sitemap. Broaden it to cover all three forms.
---
 scripts/sync-docs.js          | 12 +++++++++++-
 scripts/update-sitemap-loc.js |  6 ++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/scripts/sync-docs.js b/scripts/sync-docs.js
index b0863aa16b0..9974e876a7b 100644
--- a/scripts/sync-docs.js
+++ b/scripts/sync-docs.js
@@ -18,6 +18,14 @@ const websitePath = '../doc';
 const gitMap = {};
 const projectReleases = {};
 
+// SEO: only the newest N released versions of each non-apisix sub-project
+// (ingress-controller, helm-chart, docker, *-plugin-runner) are built and
+// published. Publishing every historical release bloated the sitemap with
+// hundreds of thin/duplicate pages (e.g. ingress 0.4.0–2.0.0, docker
+// apisix-2.10.x) and orphaned 403 landing dirs. apisix itself is curated
+// separately in config/apisix-versions.js. Increase this for a wider window.
+const SUBPROJECT_VERSIONS_TO_KEEP = 1;
+
 const tasks = new Listr([
   {
     title: 'Start documents sync',
@@ -92,7 +100,9 @@ const tasks = new Listr([
                 .map((release) => (isIngressController
                   ? release.replace('remotes/origin/v', '')
                   : release.replace('remotes/origin/release/', '')))
-                .sort((a, b) => semver.compare(semver.coerce(a).version, 
semver.coerce(b).version));
+                .sort((a, b) => semver.compare(semver.coerce(a).version, 
semver.coerce(b).version))
+                // SEO: keep only the newest N released versions (see constant 
above).
+                .slice(-SUBPROJECT_VERSIONS_TO_KEEP);
             }
           },
         }));
diff --git a/scripts/update-sitemap-loc.js b/scripts/update-sitemap-loc.js
index 92034f86b43..6650ff27c6a 100644
--- a/scripts/update-sitemap-loc.js
+++ b/scripts/update-sitemap-loc.js
@@ -29,8 +29,10 @@ const sitemapXMLs = [
  *   pages, also blocked by robots.txt.
  */
 const excludePatterns = [
-  // Versioned docs: /docs/<project>/<version>/ where version is digits.digits
-  /\/docs\/[\w-]+\/\d+\.\d+\//,
+  // Versioned docs: /docs/<project>/<version>/ — only the unversioned (latest)
+  // path should be indexed. Matches 2-part (apisix 3.14), 3-part semver
+  // (ingress 2.0.0), and prefixed (docker apisix-2.10.0) version segments.
+  /\/docs\/[\w-]+\/(?:[\w-]+-)?\d+\.\d+(?:\.\d+)?\//,
   // Development "next" docs
   /\/docs\/[\w-]+\/next\//,
   // Search pages (blocked by robots.txt)

Reply via email to