vincbeck commented on code in PR #36333: URL: https://github.com/apache/airflow/pull/36333#discussion_r1432938003
########## docs/apache-airflow-providers-fab/img/diagram_fab_auth_manager_airflow_architecture.py: ########## @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one Review Comment: You mixed the two diagrams. This one goes to Airflow doc and the other goes here in the fab provider (look at the images, they are not the same) ########## CONTRIBUTING.rst: ########## @@ -981,6 +981,58 @@ Documentation for ``apache-airflow`` package and other packages that are closely providers packages are in ``/docs/`` directory. For detailed information on documentation development, see: `docs/README.rst <docs/README.rst>`_ +Diagrams +======== + +We started to use (and gradually convert old diagrams to use it) `Diagrams <https://diagrams.mingrammer.com/>`_ +as our tool of choice to generate diagrams. The diagrams are generated from Python code and can be +automatically updated when the code changes. The diagrams are generated using pre-commit hooks (See +static checks below) but they can also be generated manually by running the corresponding Python code. + +To run the code you need to install the dependencies in the virtualenv you use to run it: +* ``pip install diagrams rich`` + +You need to have graphviz installed in your system (``brew install graphviz`` on macOS for example). + +The source code of the diagrams are next to the generated diagram, the difference is that the source +code has ``.py`` extension and the generated diagram has ``.png`` extension. The pre-commit hook +we have ``generate-airflow-diagrams`` will look for ``diagram_*.py`` files in the directories where it Review Comment: ```suggestion ``generate-airflow-diagrams`` will look for ``diagram_*.py`` files in the directories where it ``` ########## scripts/ci/pre_commit/pre_commit_generate_airflow_diagrams.py: ########## @@ -34,201 +30,28 @@ AIRFLOW_SOURCES_ROOT = Path(__file__).parents[3] DOCS_IMAGES_DIR = AIRFLOW_SOURCES_ROOT / "docs" / "apache-airflow" / "img" FAB_PROVIDER_DOCS_IMAGES_DIR = AIRFLOW_SOURCES_ROOT / "docs" / "apache-airflow-providers-fab" / "img" -PYTHON_MULTIPROCESS_LOGO = AIRFLOW_SOURCES_ROOT / "images" / "diagrams" / "python_multiprocess_logo.png" -BASIC_ARCHITECTURE_IMAGE_NAME = "diagram_basic_airflow_architecture" -DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME = "diagram_dag_processor_airflow_architecture" -AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME = "diagram_auth_manager_airflow_architecture" -FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME = "diagram_fab_auth_manager_airflow_architecture" -DIAGRAM_HASH_FILE_NAME = "diagram_hash.txt" - -def generate_basic_airflow_diagram(): - basic_architecture_image_file = (DOCS_IMAGES_DIR / BASIC_ARCHITECTURE_IMAGE_NAME).with_suffix(".png") - console.print(f"[bright_blue]Generating architecture image {basic_architecture_image_file}") - with Diagram( - name="", show=False, direction="LR", curvestyle="ortho", filename=BASIC_ARCHITECTURE_IMAGE_NAME - ): - with Cluster("Parsing & Scheduling"): - schedulers = Custom("Scheduler(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - metadata_db = PostgreSQL("Metadata DB") - - dag_author = User("DAG Author") - dag_files = MultipleDocuments("DAG files") - - dag_author >> Edge(color="black", style="dashed", reverse=False) >> dag_files - - with Cluster("Execution"): - workers = Custom("Worker(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - triggerer = Custom("Triggerer(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - schedulers - Edge(color="blue", style="dashed", taillabel="Executor") - workers - - schedulers >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - workers >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - triggerer >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - - operations_user = User("Operations User") - with Cluster("UI"): - webservers = Custom("Webserver(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - webservers >> Edge(color="black", style="dashed", reverse=True) >> operations_user - - metadata_db >> Edge(color="red", style="dotted", reverse=True) >> webservers - - dag_files >> Edge(color="brown", style="solid") >> workers - dag_files >> Edge(color="brown", style="solid") >> schedulers - dag_files >> Edge(color="brown", style="solid") >> triggerer - console.print(f"[green]Generating architecture image {basic_architecture_image_file}") - - -def generate_dag_processor_airflow_diagram(): - dag_processor_architecture_image_file = ( - DOCS_IMAGES_DIR / DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME - ).with_suffix(".png") - console.print(f"[bright_blue]Generating architecture image {dag_processor_architecture_image_file}") - with Diagram( - name="", - show=False, - direction="LR", - curvestyle="ortho", - filename=DAG_PROCESSOR_AIRFLOW_ARCHITECTURE_IMAGE_NAME, - ): - operations_user = User("Operations User") - with Cluster("No DAG Python Code Execution", graph_attr={"bgcolor": "lightgrey"}): - with Cluster("Scheduling"): - schedulers = Custom("Scheduler(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - with Cluster("UI"): - webservers = Custom("Webserver(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - webservers >> Edge(color="black", style="dashed", reverse=True) >> operations_user - - metadata_db = PostgreSQL("Metadata DB") - - dag_author = User("DAG Author") - with Cluster("DAG Python Code Execution"): - with Cluster("Execution"): - workers = Custom("Worker(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - triggerer = Custom("Triggerer(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - with Cluster("Parsing"): - dag_processors = Custom("DAG\nProcessor(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - dag_files = MultipleDocuments("DAG files") - - dag_author >> Edge(color="black", style="dashed", reverse=False) >> dag_files - - workers - Edge(color="blue", style="dashed", headlabel="Executor") - schedulers - - metadata_db >> Edge(color="red", style="dotted", reverse=True) >> webservers - metadata_db >> Edge(color="red", style="dotted", reverse=True) >> schedulers - dag_processors >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - workers >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - triggerer >> Edge(color="red", style="dotted", reverse=True) >> metadata_db - - dag_files >> Edge(color="brown", style="solid") >> workers - dag_files >> Edge(color="brown", style="solid") >> dag_processors - dag_files >> Edge(color="brown", style="solid") >> triggerer - console.print(f"[green]Generating architecture image {dag_processor_architecture_image_file}") - - -def generate_auth_manager_airflow_diagram(): - auth_manager_architecture_image_file = ( - DOCS_IMAGES_DIR / AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME - ).with_suffix(".png") - console.print(f"[bright_blue]Generating architecture image {auth_manager_architecture_image_file}") - with Diagram( - name="", - show=False, - direction="LR", - curvestyle="ortho", - filename=AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME, - ): - user = User("User") - with Cluster("Airflow environment"): - webserver = Custom("Webserver(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - with Cluster("Provider X"): - auth_manager = Custom("X auth manager", PYTHON_MULTIPROCESS_LOGO.as_posix()) - with Cluster("Core Airflow"): - auth_manager_interface = Custom( - "Auth manager\ninterface", PYTHON_MULTIPROCESS_LOGO.as_posix() - ) - - (user >> Edge(color="black", style="solid", reverse=True, label="Access to the console") >> webserver) - - ( - webserver - >> Edge(color="black", style="solid", reverse=True, label="Is user authorized?") - >> auth_manager - ) - - ( - auth_manager - >> Edge(color="black", style="dotted", reverse=False, label="Inherit") - >> auth_manager_interface - ) - - console.print(f"[green]Generating architecture image {auth_manager_architecture_image_file}") - - -def generate_fab_auth_manager_airflow_diagram(): - auth_manager_architecture_image_file = ( - FAB_PROVIDER_DOCS_IMAGES_DIR / FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME - ).with_suffix(".png") - console.print(f"[bright_blue]Generating architecture image {auth_manager_architecture_image_file}") - with Diagram( - name="", - show=False, - direction="LR", - curvestyle="ortho", - filename=FAB_AUTH_MANAGER_AIRFLOW_ARCHITECTURE_IMAGE_NAME, - ): - user = User("User") - with Cluster("Airflow environment"): - webserver = Custom("Webserver(s)", PYTHON_MULTIPROCESS_LOGO.as_posix()) - - with Cluster("FAB provider"): - fab_auth_manager = Custom("FAB auth manager", PYTHON_MULTIPROCESS_LOGO.as_posix()) - with Cluster("Core Airflow"): - auth_manager_interface = Custom( - "Auth manager\ninterface", PYTHON_MULTIPROCESS_LOGO.as_posix() - ) - - db = PostgreSQL("Metadata DB") - - user >> Edge(color="black", style="solid", reverse=True, label="Access to the console") >> webserver - ( - webserver - >> Edge(color="black", style="solid", reverse=True, label="Is user authorized?") - >> fab_auth_manager - ) - (fab_auth_manager >> Edge(color="black", style="solid", reverse=True) >> db) - ( - fab_auth_manager - >> Edge(color="black", style="dotted", reverse=False, label="Inherit") - >> auth_manager_interface - ) - - console.print(f"[green]Generating architecture image {auth_manager_architecture_image_file}") +def _get_file_hash(file_to_check: Path) -> str: + hash_md5 = hashlib.md5() + hash_md5.update(Path(file_to_check).resolve().read_bytes()) + return hash_md5.hexdigest() def main(): - hash_md5 = hashlib.md5() - hash_md5.update(Path(__file__).resolve().read_bytes()) - my_file_hash = hash_md5.hexdigest() - hash_file = LOCAL_DIR / DIAGRAM_HASH_FILE_NAME - if not hash_file.exists() or not hash_file.read_text().strip() == str(my_file_hash).strip(): - os.chdir(DOCS_IMAGES_DIR) - generate_basic_airflow_diagram() - generate_dag_processor_airflow_diagram() - generate_auth_manager_airflow_diagram() - os.chdir(FAB_PROVIDER_DOCS_IMAGES_DIR) - generate_fab_auth_manager_airflow_diagram() - os.chdir(DOCS_IMAGES_DIR) - hash_file.write_text(str(my_file_hash) + "\n") - else: - console.print("[bright_blue]No changes to generation script. Not regenerating the images.") + for directory in DOCS_IMAGES_DIR, FAB_PROVIDER_DOCS_IMAGES_DIR: Review Comment: Would not it be better to set `pass_filenames` to `True` in `.pre-commit-config.yaml` and loop over these files (ignoring md5 files). This way, no need to hardcode the different directories -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
