This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-site.git
The following commit(s) were added to refs/heads/main by this push:
new 46d6923686 Building framework to allow multiple types of back
referencing docs (#794)
46d6923686 is described below
commit 46d6923686ad4f54707c7a3de7f4427d7eec5f9b
Author: Amogh Desai <[email protected]>
AuthorDate: Sun Jun 11 17:50:20 2023 +0530
Building framework to allow multiple types of back referencing docs (#794)
---------
Co-authored-by: Amogh <[email protected]>
---
post-docs/README.md | 9 ++++
post-docs/add-back-references.py | 104 ++++++++++++++++++++++++++-------------
2 files changed, 79 insertions(+), 34 deletions(-)
diff --git a/post-docs/README.md b/post-docs/README.md
new file mode 100644
index 0000000000..12aa941fd6
--- /dev/null
+++ b/post-docs/README.md
@@ -0,0 +1,9 @@
+# Post Documentation Process
+
+In order to support backward compatibility of the documentation generation
process, this additional step is needed to add refresh HTMLs for older released
docs.
+The issue persists across helm, provider, and regular airflow docs.
+
+To generate these back referencing (refresh HTMLs), run the script in the
following manner:
+```commandline
+python add-back-reference.py [airflow | providers | helm]
+```
diff --git a/post-docs/add-back-references.py b/post-docs/add-back-references.py
index d82a2f8fce..d89d0a2b58 100644
--- a/post-docs/add-back-references.py
+++ b/post-docs/add-back-references.py
@@ -14,16 +14,29 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+import enum
+import logging
import os
+import sys
+from pathlib import Path
from urllib.request import urlopen
import semver
-docs_link =
"https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
+airflow_redirects_link =
"https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
+helm_redirects_link =
"https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt"
+providers_redirect_link = "populate-this"
+
docs_archive_path = "../docs-archive"
-apache_airflow_path = docs_archive_path + "/apache-airflow"
-stable_version_path = apache_airflow_path + "/stable.txt"
-new_docs_version = "2.5.1"
+airflow_docs_path = docs_archive_path + "/apache-airflow"
+helm_docs_path = docs_archive_path + "/helm-chart"
+providers_docs_path = docs_archive_path + "/apache-airflow-providers"
+
+
+# types of generations supported
+class GenerationType(enum.Enum):
+ airflow = 1
+ helm = 2
+ providers = 3
def download_file(url):
@@ -60,8 +73,8 @@ def construct_mapping():
return old_to_new_map
-def version_is_less_than(a):
- return semver.compare(a, new_docs_version) == -1
+def version_is_less_than(a, baseline):
+ return semver.compare(a, baseline) == -1
def get_redirect_content(url: str):
@@ -71,35 +84,58 @@ def get_redirect_content(url: str):
def create_back_reference_html(back_ref_url, path):
content = get_redirect_content(back_ref_url)
+ if Path(path).exists():
+ logging.error(f'skipping file:{path}, redirects already exist', path)
+ return
+
# Creating an HTML file
with open(path, "w") as f:
f.write(content)
-download_file(docs_link)
-old_to_new = construct_mapping()
-
-versions = [f.path.split("/")[-1] for f in os.scandir(apache_airflow_path) if
f.is_dir()]
-versions = [v for v in versions if version_is_less_than(v)]
-
-for version in versions:
- r = apache_airflow_path + "/" + version
-
- for p in old_to_new:
- old = p
- new = old_to_new[p]
-
- # only if old file exists, add the back reference
- if os.path.exists(r + "/" + p):
- d = old_to_new[p].split("/")
- file_name = old_to_new[p].split("/")[-1]
- dest_dir = r + "/" + "/".join(d[: len(d) - 1])
-
- # finds relative path of old file wrt new, handles case of
different file names too
- relative_path = os.path.relpath(old, new)
- # remove one directory level because file path was used above
- relative_path = relative_path.replace("../", "", 1)
-
- os.makedirs(dest_dir, exist_ok=True)
- dest_file_path = dest_dir + "/" + file_name
- create_back_reference_html(relative_path, dest_file_path)
+def generate_back_references(link, base_path):
+ download_file(link)
+ old_to_new = construct_mapping()
+
+ versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if
f.is_dir()]
+
+ for version in versions:
+ r = base_path + "/" + version
+
+ for p in old_to_new:
+ old = p
+ new = old_to_new[p]
+
+ # only if old file exists, add the back reference
+ if os.path.exists(r + "/" + p):
+ d = old_to_new[p].split("/")
+ file_name = old_to_new[p].split("/")[-1]
+ dest_dir = r + "/" + "/".join(d[: len(d) - 1])
+
+ # finds relative path of old file wrt new, handles case of
different file names too
+ relative_path = os.path.relpath(old, new)
+ # remove one directory level because file path was used above
+ relative_path = relative_path.replace("../", "", 1)
+
+ os.makedirs(dest_dir, exist_ok=True)
+ dest_file_path = dest_dir + "/" + file_name
+ create_back_reference_html(relative_path, dest_file_path)
+
+
+# total arguments
+n = len(sys.argv)
+if n != 2:
+ logging.Logger.error("missing required arguments, syntax: python
add-back-references.py [airflow | providers | "
+ "helm]")
+
+gen_type = GenerationType[sys.argv[1]]
+if gen_type == GenerationType.airflow:
+ generate_back_references(airflow_redirects_link, airflow_docs_path)
+elif gen_type == GenerationType.helm:
+ generate_back_references(helm_redirects_link, helm_docs_path)
+elif gen_type == GenerationType.providers:
+ # solve this properly for different providers
+ generate_back_references(providers_redirect_link, providers_docs_path)
+else:
+ logging.Logger.error("invalid type of doc generation required. Pass one of
[airflow | providers | "
+ "helm]")