This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 85c247ae10 Add scripts that provide good links to example dags (#24348)
85c247ae10 is described below

commit 85c247ae10da5ee93f26352d369f794ff4f2e47c
Author: Jarek Potiuk <[email protected]>
AuthorDate: Thu Jun 9 19:33:11 2022 +0200

    Add scripts that provide good links to example dags (#24348)
    
    The documentation generated used "main" in the URL of the
    example DAGs.
    
    The generation of the links have been fixed in the #24307, but this
    PR adds a tool that has been used to fix existing links in generated
    documentation resulting in https://github.com/apache/airflow-site/pull/610
    
    Fixes: #24331
---
 dev/example_dags/README.md                    |  21 +++++
 dev/example_dags/update_example_dags_paths.py | 119 ++++++++++++++++++++++++++
 2 files changed, 140 insertions(+)

diff --git a/dev/example_dags/README.md b/dev/example_dags/README.md
new file mode 100644
index 0000000000..369cf460d6
--- /dev/null
+++ b/dev/example_dags/README.md
@@ -0,0 +1,21 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+
+Run this script in a `docs-archive` folder of checked out 
`github.com:apache/airflow-site.git` repo
+to refresh links to example dags to the right versions.
diff --git a/dev/example_dags/update_example_dags_paths.py 
b/dev/example_dags/update_example_dags_paths.py
new file mode 100755
index 0000000000..2f7b47aee7
--- /dev/null
+++ b/dev/example_dags/update_example_dags_paths.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+import re
+from pathlib import Path
+from typing import Optional
+
+import requests
+from rich.console import Console
+from rich.progress import Progress
+
+if __name__ not in ("__main__", "__mp_main__"):
+    raise SystemExit(
+        "This file is intended to be executed as an executable program. You 
cannot use it as a module."
+        f"To run this script, run the ./{__file__} command [FILE] ..."
+    )
+
+
+console = Console(color_system="standard", width=200)
+
+AIRFLOW_SOURCES_ROOT = Path(__file__).parents[3].resolve()
+
+
+EXAMPLE_DAGS_URL_MATCHER = re.compile(
+    
r"^(.*)(https://github.com/apache/airflow/tree/(.*)/airflow/providers/(.*)/example_dags)(/?\".*)$"
+)
+
+SYSTEM_TESTS_URL_MATCHER = re.compile(
+    
r"^(.*)(https://github.com/apache/airflow/tree/(.*)/tests/system/providers/(.*))(/?\".*)$"
+)
+
+
+def check_if_url_exists(url: str) -> bool:  # type: ignore[return]
+    response = requests.head(url)
+    if response.status_code == 200:
+        return True
+    if response.status_code == 404:
+        return False
+    console.print(f"[red]Unexpected error received: {response.status_code}[/]")
+    response.raise_for_status()
+
+
+def replace_match(file: str, line: str, provider: str, version: str) -> 
Optional[str]:
+    for matcher in [EXAMPLE_DAGS_URL_MATCHER, SYSTEM_TESTS_URL_MATCHER]:
+        match = matcher.match(line)
+        if match:
+            url_path_to_dir = match.group(4)
+            branch = match.group(3)
+            if branch.startswith("providers-"):
+                console.print(f"[green]Already corrected[/]: 
{provider}:{version}")
+                continue
+            system_tests_url = (
+                
f"https://github.com/apache/airflow/tree/providers-{provider}/{version}";
+                f"/tests/system/providers/{url_path_to_dir}"
+            )
+            example_dags_url = (
+                
f"https://github.com/apache/airflow/tree/providers-{provider}/{version}";
+                f"/airflow/providers/{url_path_to_dir}/example_dags"
+            )
+            if check_if_url_exists(system_tests_url):
+                new_line = re.sub(matcher, r"\1" + system_tests_url + r"\5", 
line)
+            elif check_if_url_exists(example_dags_url):
+                new_line = re.sub(matcher, r"\1" + example_dags_url + r"\5", 
line)
+            else:
+                console.print(
+                    f"[yellow] Neither example dags nor system tests folder"
+                    f" exists for {provider}:{version} -> removing:[/]"
+                )
+                console.print(line)
+                return None
+            if line != new_line:
+                console.print(f'[yellow] Replacing in 
{file}[/]\n{line.strip()}\n{new_line.strip()}')
+                return new_line
+    return line
+
+
+def find_matches(_file: Path, provider: str, version: str):
+    lines = _file.read_text().splitlines(keepends=True)
+    new_lines = []
+    for index, line in enumerate(lines):
+        new_line = replace_match(str(_file), line, provider, version)
+        if new_line:
+            new_lines.append(new_line)
+    _file.write_text("".join(new_lines))
+
+
+if __name__ == '__main__':
+    curdir = Path(os.curdir).resolve()
+    dirs = list(filter(os.path.isdir, curdir.iterdir()))
+    with Progress(console=console) as progress:
+        task = progress.add_task(f"Updating {len(dirs)}", total=len(dirs))
+        for directory in dirs:
+            if directory.name.startswith('apache-airflow-providers-'):
+                provider = directory.name[len('apache-airflow-providers-') :]
+                console.print(f"[bright_blue] Processing {directory}")
+                version_dirs = list(filter(os.path.isdir, directory.iterdir()))
+                for version_dir in version_dirs:
+                    version = version_dir.name
+                    console.print(version)
+                    for file_name in ["index.html", 'example-dags.html']:
+                        candidate_file = version_dir / file_name
+                        if candidate_file.exists():
+                            find_matches(candidate_file, provider, version)
+            progress.advance(task)

Reply via email to