vincbeck commented on code in PR #36333:
URL: https://github.com/apache/airflow/pull/36333#discussion_r1432938003


##########
docs/apache-airflow-providers-fab/img/diagram_fab_auth_manager_airflow_architecture.py:
##########
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   You mixed the two diagrams. This one goes to Airflow doc and the other goes 
here in the fab provider (look at the images, they are not the same)



##########
CONTRIBUTING.rst:
##########
@@ -981,6 +981,58 @@ Documentation for ``apache-airflow`` package and other 
packages that are closely
 providers packages are in ``/docs/`` directory. For detailed information on 
documentation development,
 see: `docs/README.rst <docs/README.rst>`_
 
+Diagrams
+========
+
+We started to use (and gradually convert old diagrams to use it) `Diagrams 
<https://diagrams.mingrammer.com/>`_
+as our tool of choice to generate diagrams. The diagrams are generated from 
Python code and can be
+automatically updated when the code changes. The diagrams are generated using 
pre-commit hooks (See
+static checks below) but they can also be generated manually by running the 
corresponding Python code.
+
+To run the code you need to install the dependencies in the virtualenv you use 
to run it:
+* ``pip install diagrams rich``
+
+You need to have graphviz installed in your system (``brew install graphviz`` 
on macOS for example).
+
+The source code of the diagrams are next to the generated diagram, the 
difference is that the source
+code has ``.py`` extension and the generated diagram has ``.png`` extension. 
The pre-commit hook
+we have ``generate-airflow-diagrams`` will look for ``diagram_*.py`` files in 
the directories where it

Review Comment:
   ```suggestion
   ``generate-airflow-diagrams`` will look for ``diagram_*.py`` files in the 
directories where it
   ```



##########
scripts/ci/pre_commit/pre_commit_generate_airflow_diagrams.py:
##########
@@ -34,201 +30,28 @@
 AIRFLOW_SOURCES_ROOT = Path(__file__).parents[3]
 DOCS_IMAGES_DIR = AIRFLOW_SOURCES_ROOT / "docs" / "apache-airflow" / "img"
 FAB_PROVIDER_DOCS_IMAGES_DIR = AIRFLOW_SOURCES_ROOT / "docs" / 
"apache-airflow-providers-fab" / "img"
-PYTHON_MULTIPROCESS_LOGO = AIRFLOW_SOURCES_ROOT / "images" / "diagrams" / 
"python_multiprocess_logo.png"
 
-BASIC_ARCHITECTURE_IMAGE_NAME = "diagram_basic_airflow_architecture"
-DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME = 
"diagram_dag_processor_airflow_architecture"
-AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME = 
"diagram_auth_manager_airflow_architecture"
-FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME = 
"diagram_fab_auth_manager_airflow_architecture"
-DIAGRAM_HASH_FILE_NAME = "diagram_hash.txt"
 
-
-def generate_basic_airflow_diagram():
-    basic_architecture_image_file = (DOCS_IMAGES_DIR / 
BASIC_ARCHITECTURE_IMAGE_NAME).with_suffix(".png")
-    console.print(f"[bright_blue]Generating architecture image 
{basic_architecture_image_file}")
-    with Diagram(
-        name="", show=False, direction="LR", curvestyle="ortho", 
filename=BASIC_ARCHITECTURE_IMAGE_NAME
-    ):
-        with Cluster("Parsing & Scheduling"):
-            schedulers = Custom("Scheduler(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-        metadata_db = PostgreSQL("Metadata DB")
-
-        dag_author = User("DAG Author")
-        dag_files = MultipleDocuments("DAG files")
-
-        dag_author >> Edge(color="black", style="dashed", reverse=False) >> 
dag_files
-
-        with Cluster("Execution"):
-            workers = Custom("Worker(s)", PYTHON_MULTIPROCESS_LOGO.as_posix())
-            triggerer = Custom("Triggerer(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-        schedulers - Edge(color="blue", style="dashed", taillabel="Executor") 
- workers
-
-        schedulers >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-        workers >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-        triggerer >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-
-        operations_user = User("Operations User")
-        with Cluster("UI"):
-            webservers = Custom("Webserver(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-        webservers >> Edge(color="black", style="dashed", reverse=True) >> 
operations_user
-
-        metadata_db >> Edge(color="red", style="dotted", reverse=True) >> 
webservers
-
-        dag_files >> Edge(color="brown", style="solid") >> workers
-        dag_files >> Edge(color="brown", style="solid") >> schedulers
-        dag_files >> Edge(color="brown", style="solid") >> triggerer
-    console.print(f"[green]Generating architecture image 
{basic_architecture_image_file}")
-
-
-def generate_dag_processor_airflow_diagram():
-    dag_processor_architecture_image_file = (
-        DOCS_IMAGES_DIR / DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME
-    ).with_suffix(".png")
-    console.print(f"[bright_blue]Generating architecture image 
{dag_processor_architecture_image_file}")
-    with Diagram(
-        name="",
-        show=False,
-        direction="LR",
-        curvestyle="ortho",
-        filename=DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME,
-    ):
-        operations_user = User("Operations User")
-        with Cluster("No DAG Python Code Execution", graph_attr={"bgcolor": 
"lightgrey"}):
-            with Cluster("Scheduling"):
-                schedulers = Custom("Scheduler(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-            with Cluster("UI"):
-                webservers = Custom("Webserver(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-        webservers >> Edge(color="black", style="dashed", reverse=True) >> 
operations_user
-
-        metadata_db = PostgreSQL("Metadata DB")
-
-        dag_author = User("DAG Author")
-        with Cluster("DAG Python Code Execution"):
-            with Cluster("Execution"):
-                workers = Custom("Worker(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-                triggerer = Custom("Triggerer(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-            with Cluster("Parsing"):
-                dag_processors = Custom("DAG\nProcessor(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-            dag_files = MultipleDocuments("DAG files")
-
-        dag_author >> Edge(color="black", style="dashed", reverse=False) >> 
dag_files
-
-        workers - Edge(color="blue", style="dashed", headlabel="Executor") - 
schedulers
-
-        metadata_db >> Edge(color="red", style="dotted", reverse=True) >> 
webservers
-        metadata_db >> Edge(color="red", style="dotted", reverse=True) >> 
schedulers
-        dag_processors >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-        workers >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-        triggerer >> Edge(color="red", style="dotted", reverse=True) >> 
metadata_db
-
-        dag_files >> Edge(color="brown", style="solid") >> workers
-        dag_files >> Edge(color="brown", style="solid") >> dag_processors
-        dag_files >> Edge(color="brown", style="solid") >> triggerer
-    console.print(f"[green]Generating architecture image 
{dag_processor_architecture_image_file}")
-
-
-def generate_auth_manager_airflow_diagram():
-    auth_manager_architecture_image_file = (
-        DOCS_IMAGES_DIR / AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME
-    ).with_suffix(".png")
-    console.print(f"[bright_blue]Generating architecture image 
{auth_manager_architecture_image_file}")
-    with Diagram(
-        name="",
-        show=False,
-        direction="LR",
-        curvestyle="ortho",
-        filename=AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME,
-    ):
-        user = User("User")
-        with Cluster("Airflow environment"):
-            webserver = Custom("Webserver(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-            with Cluster("Provider X"):
-                auth_manager = Custom("X auth manager", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-            with Cluster("Core Airflow"):
-                auth_manager_interface = Custom(
-                    "Auth manager\ninterface", 
PYTHON_MULTIPROCESS_LOGO.as_posix()
-                )
-
-        (user >> Edge(color="black", style="solid", reverse=True, 
label="Access to the console") >> webserver)
-
-        (
-            webserver
-            >> Edge(color="black", style="solid", reverse=True, label="Is user 
authorized?")
-            >> auth_manager
-        )
-
-        (
-            auth_manager
-            >> Edge(color="black", style="dotted", reverse=False, 
label="Inherit")
-            >> auth_manager_interface
-        )
-
-    console.print(f"[green]Generating architecture image 
{auth_manager_architecture_image_file}")
-
-
-def generate_fab_auth_manager_airflow_diagram():
-    auth_manager_architecture_image_file = (
-        FAB_PROVIDER_DOCS_IMAGES_DIR / 
FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME
-    ).with_suffix(".png")
-    console.print(f"[bright_blue]Generating architecture image 
{auth_manager_architecture_image_file}")
-    with Diagram(
-        name="",
-        show=False,
-        direction="LR",
-        curvestyle="ortho",
-        filename=FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME,
-    ):
-        user = User("User")
-        with Cluster("Airflow environment"):
-            webserver = Custom("Webserver(s)", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-
-            with Cluster("FAB provider"):
-                fab_auth_manager = Custom("FAB auth manager", 
PYTHON_MULTIPROCESS_LOGO.as_posix())
-            with Cluster("Core Airflow"):
-                auth_manager_interface = Custom(
-                    "Auth manager\ninterface", 
PYTHON_MULTIPROCESS_LOGO.as_posix()
-                )
-
-            db = PostgreSQL("Metadata DB")
-
-        user >> Edge(color="black", style="solid", reverse=True, label="Access 
to the console") >> webserver
-        (
-            webserver
-            >> Edge(color="black", style="solid", reverse=True, label="Is user 
authorized?")
-            >> fab_auth_manager
-        )
-        (fab_auth_manager >> Edge(color="black", style="solid", reverse=True) 
>> db)
-        (
-            fab_auth_manager
-            >> Edge(color="black", style="dotted", reverse=False, 
label="Inherit")
-            >> auth_manager_interface
-        )
-
-    console.print(f"[green]Generating architecture image 
{auth_manager_architecture_image_file}")
+def _get_file_hash(file_to_check: Path) -> str:
+    hash_md5 = hashlib.md5()
+    hash_md5.update(Path(file_to_check).resolve().read_bytes())
+    return hash_md5.hexdigest()
 
 
 def main():
-    hash_md5 = hashlib.md5()
-    hash_md5.update(Path(__file__).resolve().read_bytes())
-    my_file_hash = hash_md5.hexdigest()
-    hash_file = LOCAL_DIR / DIAGRAM_HASH_FILE_NAME
-    if not hash_file.exists() or not hash_file.read_text().strip() == 
str(my_file_hash).strip():
-        os.chdir(DOCS_IMAGES_DIR)
-        generate_basic_airflow_diagram()
-        generate_dag_processor_airflow_diagram()
-        generate_auth_manager_airflow_diagram()
-        os.chdir(FAB_PROVIDER_DOCS_IMAGES_DIR)
-        generate_fab_auth_manager_airflow_diagram()
-        os.chdir(DOCS_IMAGES_DIR)
-        hash_file.write_text(str(my_file_hash) + "\n")
-    else:
-        console.print("[bright_blue]No changes to generation script. Not 
regenerating the images.")
+    for directory in DOCS_IMAGES_DIR, FAB_PROVIDER_DOCS_IMAGES_DIR:

Review Comment:
   Would not it be better to set `pass_filenames` to `True` in 
`.pre-commit-config.yaml` and loop over these files (ignoring md5 files). This 
way, no need to hardcode the different directories



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to