This is an automated email from the ASF dual-hosted git repository. kaxilnaik pushed a commit to branch v3-0-test in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v3-0-test by this push: new 9afb41032b6 Docs setup for the Task SDK (#51153) (#52682) 9afb41032b6 is described below commit 9afb41032b6826ca5c8d2322778c12ee22616d44 Author: Ankit Chaurasia <8670962+sunank...@users.noreply.github.com> AuthorDate: Wed Jul 2 16:49:22 2025 +0545 Docs setup for the Task SDK (#51153) (#52682) This PR is a continuation of https://github.com/apache/airflow/pull/47357 To enhance the experience (i.e., rather than just being given an alphabetical list of classes/functions), I have opted not to use the AutoAPI extension's auto-generate feature, allowing me to precisely control the order and grouping of the classes. I have organized Task-SDK API into these sections: * **Defining DAGs**: `DAG`, `dag` * **Decorators**: `task`, `task_group`, `setup`, `teardown` * **Bases**: `BaseOperator`, `BaseSensorOperator`, `BaseNotifier`, `BaseOperatorLink`, `XComArg` * **Connections & Variables**: `Connection`, `Variable` * **Tasks & Operators**: `TaskGroup`, `get_current_context`, `get_parsing_context`, `Param`, `chain`, `chain_linear`, `cross_downstream` * **Assets**: `Asset`, `AssetAlias`, `AssetAll`, `AssetAny`, `AssetWatcher`, `Metadata` I also ensured that all documented references use the top-level SDK path (e.g., `airflow.sdk.DAG`) instead of exposing the underlying module paths (e.g., `airflow.sdk.definitions.dag.DAG`). I have incorporated a test that compares the items in the generated `objects.inv` with the elements I re-export from `airflow/sdk/__init__.py`. The `airflow/sdk/__init__.pyi` type stub is unfortunately necessary, as without it (and with the imports solely within the `if TYPE_CHECKING` block), the re-exported classes were not appearing where I wanted them. Specifically, I want users to import items directly from `airflow.sdk`, not the submodule. stubgen -o task-sdk/src -m airflow.sdk Test this with breeze build-docs task-sdk or uv run --group docs build-docs task-sdk **Screenshots** <img width="1720" alt="Screenshot 2025-06-02 at 6 15 40 PM" src="https://github.com/user-attachments/assets/7ce06d9f-1af3-4156-acf7-642d5f37f907" /> <img width="1726" alt="Screenshot 2025-06-02 at 7 04 10 PM" src="https://github.com/user-attachments/assets/adfa99b3-1c40-4c37-b523-4d6ee27381b2" /> <img width="1707" alt="Screenshot 2025-06-02 at 6 44 01 PM" src="https://github.com/user-attachments/assets/d5ccbabc-dfbd-465d-ae32-3697acdce827" /> <img width="1728" alt="Screenshot 2025-06-11 at 1 06 26 AM" src="https://github.com/user-attachments/assets/c8a5792b-59ad-4855-8937-4877a31a9a55" /> closes: #43010 closes: #51283 (cherry picked from commit ebfc0de97ecb7bc550a2182def9a35cd12a8699e) --- airflow-core/docs/index.rst | 9 + .../airflow/example_dags/example_asset_alias.py | 2 + .../src/airflow/example_dags/example_assets.py | 4 +- .../airflow/example_dags/example_dag_decorator.py | 2 +- .../example_dags/example_dynamic_task_mapping.py | 3 + .../example_setup_teardown_taskflow.py | 2 + .../airflow/example_dags/example_simplest_dag.py | 3 + .../example_dags/example_task_group_decorator.py | 2 +- .../src/airflow/example_dags/example_xcomargs.py | 2 + dev/README_RELEASE_AIRFLOW.md | 4 +- .../src/sphinx_exts/docs_build/docs_builder.py | 6 + .../src/sphinx_exts/docs_build/spelling_checks.py | 8 +- devel-common/src/sphinx_exts/exampleinclude.py | 1 + docs/README.md | 239 +++++++++++++++++++++ docs/spelling_wordlist.txt | 5 + task-sdk/docs/.gitignore | 1 + task-sdk/docs/api.rst | 132 ++++++++++++ task-sdk/docs/conf.py | 86 ++++++++ task-sdk/docs/examples.rst | 86 ++++++++ task-sdk/docs/index.rst | 92 ++++++++ task-sdk/pyproject.toml | 3 + task-sdk/src/airflow/sdk/__init__.pyi | 95 ++++++++ task-sdk/src/airflow/sdk/definitions/context.py | 2 + task-sdk/src/airflow/sdk/definitions/dag.py | 4 +- task-sdk/src/airflow/sdk/io/path.py | 21 +- task-sdk/tests/test_docs_inventory.py | 82 +++++++ task-sdk/tests/test_public_api.py | 74 +++++++ 27 files changed, 950 insertions(+), 20 deletions(-) diff --git a/airflow-core/docs/index.rst b/airflow-core/docs/index.rst index a7a21e671c9..1927774703f 100644 --- a/airflow-core/docs/index.rst +++ b/airflow-core/docs/index.rst @@ -32,6 +32,15 @@ Airflow workflows are defined entirely in Python. This "workflows as code" appro - **Extensible**: The Airflow framework includes a wide range of built-in operators and can be extended to fit your needs. - **Flexible**: Airflow leverages the `Jinja <https://jinja.palletsprojects.com>`_ templating engine, allowing rich customizations. +.. _task-sdk-docs: + +Task SDK +======== + +For Airflow Task SDK, see the standalone reference & tutorial site: + + https://airflow.apache.org/docs/task-sdk/stable/ + Dags ----------------------------------------- diff --git a/airflow-core/src/airflow/example_dags/example_asset_alias.py b/airflow-core/src/airflow/example_dags/example_asset_alias.py index d0a6a20188a..5c4df1aa09c 100644 --- a/airflow-core/src/airflow/example_dags/example_asset_alias.py +++ b/airflow-core/src/airflow/example_dags/example_asset_alias.py @@ -32,6 +32,7 @@ thus update its schedule to the asset "s3://bucket/my-task" and will also be tri from __future__ import annotations +# [START example_asset_alias] import pendulum from airflow.sdk import DAG, Asset, AssetAlias, task @@ -94,3 +95,4 @@ with DAG( print(event) consume_asset_event_from_asset_alias() +# [END example_asset_alias] diff --git a/airflow-core/src/airflow/example_dags/example_assets.py b/airflow-core/src/airflow/example_dags/example_assets.py index 2bb3cffc527..3ab37211258 100644 --- a/airflow-core/src/airflow/example_dags/example_assets.py +++ b/airflow-core/src/airflow/example_dags/example_assets.py @@ -52,6 +52,7 @@ automatically as they depend on assets that do not get updated or are not produc from __future__ import annotations +# [START asset_def] import pendulum from airflow.providers.standard.operators.bash import BashOperator @@ -59,9 +60,7 @@ from airflow.sdk import DAG, Asset from airflow.timetables.assets import AssetOrTimeSchedule from airflow.timetables.trigger import CronTriggerTimetable -# [START asset_def] dag1_asset = Asset("s3://dag1/output_1.txt", extra={"hi": "bye"}) -# [END asset_def] dag2_asset = Asset("s3://dag2/output_1.txt", extra={"hi": "bye"}) dag3_asset = Asset("s3://dag3/output_3.txt", extra={"hi": "bye"}) @@ -189,3 +188,4 @@ with DAG( task_id="conditional_asset_and_time_based_timetable", bash_command="sleep 5", ) +# [END asset_def] diff --git a/airflow-core/src/airflow/example_dags/example_dag_decorator.py b/airflow-core/src/airflow/example_dags/example_dag_decorator.py index c1f5b392332..5d1312a888e 100644 --- a/airflow-core/src/airflow/example_dags/example_dag_decorator.py +++ b/airflow-core/src/airflow/example_dags/example_dag_decorator.py @@ -17,6 +17,7 @@ # under the License. from __future__ import annotations +# [START dag_decorator_usage] from typing import TYPE_CHECKING, Any import httpx @@ -43,7 +44,6 @@ class GetRequestOperator(BaseOperator): return httpx.get(self.url).json() -# [START dag_decorator_usage] @dag( schedule=None, start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), diff --git a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py index 9f4f45511cf..750c3da1ec1 100644 --- a/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py +++ b/airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_dynamic_task_mapping] from datetime import datetime from airflow.sdk import DAG, task @@ -56,3 +57,5 @@ with DAG( _get_nums = get_nums() _times_2 = times_2.expand(num=_get_nums) add_10.expand(num=_times_2) + +# [END example_dynamic_task_mapping] diff --git a/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py b/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py index e554b4f9cae..8b68f85ef82 100644 --- a/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py +++ b/airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_setup_teardown_taskflow] import pendulum from airflow.sdk import DAG, setup, task, task_group, teardown @@ -104,3 +105,4 @@ with DAG( # and let's put section 1 inside the outer setup and teardown tasks section_1() +# [END example_setup_teardown_taskflow] diff --git a/airflow-core/src/airflow/example_dags/example_simplest_dag.py b/airflow-core/src/airflow/example_dags/example_simplest_dag.py index fad6f57950a..660f38c2e00 100644 --- a/airflow-core/src/airflow/example_dags/example_simplest_dag.py +++ b/airflow-core/src/airflow/example_dags/example_simplest_dag.py @@ -18,6 +18,7 @@ from __future__ import annotations +# [START simplest_dag] from airflow.sdk import dag, task @@ -30,4 +31,6 @@ def example_simplest_dag(): my_task() +# [END simplest_dag] + example_simplest_dag() diff --git a/airflow-core/src/airflow/example_dags/example_task_group_decorator.py b/airflow-core/src/airflow/example_dags/example_task_group_decorator.py index 580b8bca522..5ed2a59ae3b 100644 --- a/airflow-core/src/airflow/example_dags/example_task_group_decorator.py +++ b/airflow-core/src/airflow/example_dags/example_task_group_decorator.py @@ -19,12 +19,12 @@ from __future__ import annotations +# [START howto_task_group_decorator] import pendulum from airflow.sdk import DAG, task, task_group -# [START howto_task_group_decorator] # Creating Tasks @task def task_start(): diff --git a/airflow-core/src/airflow/example_dags/example_xcomargs.py b/airflow-core/src/airflow/example_dags/example_xcomargs.py index 6337cf482d9..a64beb513ba 100644 --- a/airflow-core/src/airflow/example_dags/example_xcomargs.py +++ b/airflow-core/src/airflow/example_dags/example_xcomargs.py @@ -19,6 +19,7 @@ from __future__ import annotations +# [START example_xcomargs] import logging import pendulum @@ -63,3 +64,4 @@ with DAG( xcom_args_b = print_value("second!") bash_op1 >> xcom_args_a >> xcom_args_b >> bash_op2 +# [END example_xcomargs] diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index 32cf8999290..a61ec4496a7 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -362,7 +362,7 @@ The command does the following: 3. Triggers S3 to GitHub Sync ```shell script - breeze workflow-run publish-docs --ref <tag> --site-env <staging/live/auto> + breeze workflow-run publish-docs --ref <tag> --site-env <staging/live/auto> apache-airflow docker-stack task-sdk ``` The `--ref` parameter should be the tag of the release candidate you are publishing. @@ -387,7 +387,7 @@ The release manager publishes the documentation using GitHub Actions workflow the tag you use - pre-release tags go to staging. But you can also override it and specify the destination manually to be `live` or `staging`. -You should specify 'apache-airflow docker-stack' passed as packages to be +You should specify 'apache-airflow docker-stack task-sdk' passed as packages to be built. After that step, the provider documentation should be available under https://airflow.stage.apache.org// diff --git a/devel-common/src/sphinx_exts/docs_build/docs_builder.py b/devel-common/src/sphinx_exts/docs_build/docs_builder.py index 44ba83e6f7a..4b4c39b96d0 100644 --- a/devel-common/src/sphinx_exts/docs_build/docs_builder.py +++ b/devel-common/src/sphinx_exts/docs_build/docs_builder.py @@ -117,6 +117,10 @@ class AirflowDocsBuilder: if self.package_name.startswith("apache-airflow-providers-"): package_paths = self.package_name[len("apache-airflow-providers-") :].split("-") return (AIRFLOW_CONTENT_ROOT_PATH / "providers").joinpath(*package_paths) / "docs" + if self.package_name == "apache-airflow-ctl": + return AIRFLOW_CONTENT_ROOT_PATH / "airflow-ctl" / "docs" + if self.package_name == "task-sdk": + return AIRFLOW_CONTENT_ROOT_PATH / "task-sdk" / "docs" console.print(f"[red]Unknown package name: {self.package_name}") sys.exit(1) @@ -330,6 +334,8 @@ def get_available_packages(include_suspended: bool = False, short_form: bool = F "apache-airflow", *provider_names, "apache-airflow-providers", + "apache-airflow-ctl", + "task-sdk", "helm-chart", "docker-stack", ] diff --git a/devel-common/src/sphinx_exts/docs_build/spelling_checks.py b/devel-common/src/sphinx_exts/docs_build/spelling_checks.py index 0b69b4fb17f..58197f13409 100644 --- a/devel-common/src/sphinx_exts/docs_build/spelling_checks.py +++ b/devel-common/src/sphinx_exts/docs_build/spelling_checks.py @@ -71,17 +71,17 @@ class SpellingError(NamedTuple): line_no_b: int = other.line_no or 0 context_line_a: str = self.context_line or "" context_line_b: str = other.context_line or "" - left: tuple[Path, int, int, str, str] = ( + left: tuple[Path, int, str, str, str] = ( file_path_a, line_no_a, context_line_a, self.spelling or "", self.message or "", ) - right: tuple[Path, int, int, str, str] = ( + right: tuple[Path, int, str, str, str] = ( file_path_b, - line_no_b or 0, - context_line_b or 0, + line_no_b, + context_line_b, other.spelling or "", other.message or "", ) diff --git a/devel-common/src/sphinx_exts/exampleinclude.py b/devel-common/src/sphinx_exts/exampleinclude.py index 2731f6522bf..7bb4b0b132e 100644 --- a/devel-common/src/sphinx_exts/exampleinclude.py +++ b/devel-common/src/sphinx_exts/exampleinclude.py @@ -78,6 +78,7 @@ class ExampleInclude(SphinxDirective): "emphasize-lines": directives.unchanged_required, "class": directives.class_option, "name": directives.unchanged, + "caption": directives.unchanged_required, "diff": directives.unchanged_required, } diff --git a/docs/README.md b/docs/README.md index eeba9dfbf68..e240161502e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,6 +21,15 @@ **Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* - [Documentation configuration](#documentation-configuration) +- [Architecture of documentation for Airflow](#architecture-of-documentation-for-airflow) +- [Diagrams of the documentation architecture](#diagrams-of-the-documentation-architecture) +- [Staging site](#staging-site) +- [Typical workflows](#typical-workflows) + - [Publishing the documentation by the release manager](#publishing-the-documentation-by-the-release-manager) + - [Publishing changes to the website (including theme)](#publishing-changes-to-the-website-including-theme) +- [Fixing historical documentation](#fixing-historical-documentation) + - [Manually publishing documentation directly to S3](#manually-publishing-documentation-directly-to-s3) + - [Manually publishing documentation via `apache-airflow-site-archive` repo](#manually-publishing-documentation-via-apache-airflow-site-archive-repo) <!-- END doctoc generated TOC please keep comment here to allow auto update --> @@ -44,3 +53,233 @@ Documentation for general overview and summaries not connected with any specific * `docker-stack-docs` - documentation for Docker Stack' * `providers-summary-docs` - documentation for provider summary page + +# Architecture of documentation for Airflow + +Building documentation for Airflow is optimized for speed and for convenience workflows of the release +managers and committers who publish and fix the documentation - that's why it's a little complex, as we have +multiple repositories and multiple sources of the documentation involved. + +There are few repositories under `apache` organization which are used to build the documentation for Airflow: + +* `apache-airflow` - the repository with the code and the documentation sources for Airflow distributions, + provider distributions, providers summary and docker summary: [apache-airflow](https://github.com/apache/airflow) + from here we publish the documentation to S3 bucket where the documentation is hosted. +* `airflow-site` - the repository with the website theme and content where we keep sources of the website + structure, navigation, theme for the website [airflow-site](https://github.com/apache/airflow). From here + we publish the website to the ASF servers so they are publish as the [official website](https://airflow.apache.org) +* `airflow-site-archive` - here we keep the archived historical versions of the generated documentation + of all the documentation packages that we keep on S3. This repository is automatically synchronized from + the S3 buckets and is only used in case we need to perform a bulk update of historical documentation. Here only + generated `html`, `css`, `js` and `images` files are kept, no sources of the documentation are kept here. + +We have two S3 buckets where we can publish the documentation generated from `apache-airflow` repository: + +* `s3://live-docs-airflow-apache-org/docs/` - live, [official documentation](https://airflow.apache.org/docs/) +* `s3://staging-docs-airflow-apache-org/docs/` - staging documentation [official documentation](https://staging-airflow.apache.org/docs/) TODO: make it work + +# Diagrams of the documentation architecture + +This is the diagram of live documentation architecture: + + + +# Staging site + +Staging documentation architecture is similar, but uses staging bucket and staging Apache Website. The main +differences are: + +* The staging bucket is `s3://staging-docs-airflow-apache-org/docs/` +* The staging website is `https://airflow.staged.apache.org/docs/` +* The staging site is deployed by merging PR or pushing `staging` branch in the `airflow-site` repository + rather than main. The `staging` branch should be periodically rebased to the `main` branch, but while + some changes are developed in `staging` it can diverge from `main` branch. +* Merging into `staging` branch of `airflow-site` repository or pushing `staging` branch + will automatically trigger the build of the website and publish it to the `publish-staging` branch and + effectively to the staging site. + +Documentation of pre-release versions of Airflow distributions should be published to the staging s3 +bucket so that we can test the documentation before we publish it to the `live` bucket. + +# Typical workflows + +There are a few typical workflows that we support: + +## Publishing the documentation by the release manager + +The release manager publishes the documentation using GitHub Actions workflow +[Publish Docs to S3](https://github.com/apache/airflow/actions/workflows/publish-docs-to-s3.yml). +The same workflow can be used to publish Airflow, Helm chart and providers documentation. + +This workflow is used twice: + +* when pre-release distributions are prepared (alpha/beta/rc) - the documentation should be published to + the `staging` bucket and `staging` site should be built and published. +* when final releases of distributions are prepared - the documentation should be published to the `live` + bucket and the `live` website should be built and published. + +When release manager publishes the documentation they choose `auto` destination by default - depending on the +tag they use - `staging` will be used to publish from pre-release tag and `live` will be used ot publish +from the release tag. + +You can also specify whether `live` or `staging` documentation should be published manually - overriding +the auto-detection. + +The person who triggers the build (release manager) should specify the tag name of the docs to be published +and the list of documentation packages to be published. Usually it is: + +* Airflow: `apache-airflow docker-stack task-sdk` (later we will add `airflow-ctl`) +* Helm chart: `helm-chart` +* Providers: `provider_id1 provider_id2` or `all providers` if all providers should be published. + +Optionally - specifically if we run `all-providers` and release manager wants to exclude some providers, +they can specify documentation packages to exclude. Leaving "no-docs-excluded" will publish all packages +specified to be published without exclusions. + + +Example screenshot of the workflow triggered from the GitHub UI: + + + +Note that this just publishes the documentation but does not update the "site" with version numbers or +stable links to providers and airflow - if you release a new documentation version it will be available +with direct URL (say https://apache.airflow.org/docs/apache-airflow/3.0.1/) but the main site will still +point to previous version of the documentation as `stable` and the version drop-downs will not be updated. + +In order to do it, you need to run the [Build docs](https://github.com/apache/airflow-site/actions/workflows/build.yml) +workflow in `airflow-site` repository. + +For `live` site you should run the workflow in `main` branch. For `staging` site it should be staging branch. +This will build the website and publish it to the `publish` branch of `airflow-site` repository (for `live` +site) or `publish-staging` branch, (for `staging` site). The workflow will also update the website with +including refreshing of the version numbers in the drop-downs and stable links. + +The `staging` documentation is produced automatically with `staging` watermark added. + + + +This workflow also invalidates cache in Fastly that Apache Software Foundation uses to serve the website, +so you should always run it after you modify the documentation for the website. Other than that Fastly is +configured with 3600 seconds TTL - which means that changes will propagate to the website in ~1 hour. + +Shortly after the workflow succeeds and documentation is published, in live bucket, the `airflow-site-archive` +repository is automatically synchronized with the live S3 bucket. TODO: IMPLEMENT THIS, FOR NOW IT HAS +TO BE MANUALLY SYNCHRONIZED VIA [Sync s3 to GitHub](https://github.com/apache/airflow-site-archive/actions/workflows/s3-to-github.yml) +workflow in `airflow-site-archive` repository. The `airflow-site-archive` essentially keeps the history of +snapshots of the `live` documentation. + +## Publishing changes to the website (including theme) + +The workflows in `apache-airflow` only update the documentation for the packages (Airflow, Helm chart, +Providers, Docker Stack) that we publish from airflow sources. If we want to publish changes to the website +itself or to the theme (css, javascript) we need to do it in `airflow-site` repository. + +Publishing of airflow-site happens automatically when a PR from `airflow-site` is merged to `main` or when +the [Build docs](https://github.com/apache/airflow-site/actions/workflows/build.yml) workflow is triggered +manually in the main branch of `airflow-site` repository. The workflow builds the website and publishes it to +`publish` branch of `airflow-site` repository, which in turn gets picked up by the ASF servers and is +published as the official website. This includes any changes to `.htaccess` of the website. + +Such a main build also publishes latest "sphinx-airflow-theme" package to GitHub so that the next build +of documentation can automatically pick it up from there. This means that if you want to make changes to +`javascript` or `css` that are part of the theme, you need to do it in `ariflow-site` repository and +merge it to `main` branch in order to be able to run the documentation build in `apache-airflow` repository +and pick up the latest version of the theme. + +The version of sphinx theme is fixed in both repositories: + +* https://github.com/apache/airflow-site/blob/main/sphinx_airflow_theme/sphinx_airflow_theme/__init__.py#L21 +* https://github.com/apache/airflow/blob/main/devel-common/pyproject.toml#L77 in "docs" section + +In case of bigger changes to the theme, we +can first iterate on the website and merge a new theme version, and only after that we can switch to the new +version of the theme. + + +# Fixing historical documentation + +Sometimes we need to update historical documentation (modify generated `html`) - for example when we find +bad links or when we change some of the structure in the documentation. This can be done via the +`airflow-site-archive` repository. The workflow is as follows: + +1. Get the latest version of the documentation from S3 to `airflow-site-archive` repository using + `Sync s3 to GitHub` workflow. This will download the latest version of the documentation from S3 to + `airflow-site-archive` repository (this should be normally not needed, if automated synchronization works). +2. Make the changes to the documentation in `airflow-site-archive` repository. This can be done using any + text editors, scripts etc. Those files are generated as `html` files and are not meant to be regenerated, + they should be modified as `html` files in-place +3. Commit the changes to `airflow-site-archive` repository and push them to `some` branch of the repository. +4. Run `Sync GitHub to S3` workflow in `airflow-site-archive` repository. This will upload the modified + documentation to S3 bucket. +5. You can choose, whether to sync the changes to `live` or `staging` bucket. The default is `live`. +6. By default the workflow will synchronize all documentation modified in single - last commit pushed to + the branch you specified. You can also specify "full_sync" to synchronize all files in the repository. +7. In case you specify "full_sync", you can also synchronize `all` docs or only selected documentation + packages (for example `apache-airflow` or `docker-stack` or `amazon` or `helm-chart`) - you can specify + more than one package separated by spaces. +8. After you synchronize the changes to S3, the Sync `S3 to GitHub` workflow will be triggered + automatically and the changes will be synchronized to `airflow-site-archive` `main` branch - so there + is no need to merge your changes to `main` branch of `airflow-site-archive` repository. You can safely + delete the branch you created in step 3. + + + + +## Manually publishing documentation directly to S3 + +The regular publishing workflows involve running Github Actions workflow and they cover majority of cases, +however sometimes some manual updates and cherry-picks are needed, when we discover problems with the +publishing and doc building code - for example when we find that we need to fix extensions to sphinx. + +In such case, release manager or a committer can build and publish documentation locally - providing that +they configure AWS credentials to be able to upload files to S3. You can ask in +the #internal-airflow-ci-cd channel on Airflow Slack to get your AWS credentials configured. + +You can checkout locally a version of airflow repo that you need and apply any cherry-picks you need before +running publishing. + +This is done using breeze. You also need to have aws CLI installed and configured credentials to be able +to upload files to S3. You can get credentials from one of the admins of Airflow's AWS account. The +region to set for AWS is `us-east-2`. + +Note that it is advise to add `--dry-run` if you just want to see what would happen. Also you can use +the `s3://staging-docs-airflow-apache-org/docs/` bucket to test the publishing using staging site. + +```bash +breeze build-docs "<package_id1>" "<package_id2>" --docs-only +mkdir /tmp/airflow-site +breeze release-management publish-docs --override-versioned --airflow-site-directory /tmp/airflow-site +breeze release-management publish-docs-to-s3 --source-dir-path /tmp/airflow-site/docs-archive \ + --destination-location s3://live-docs-airflow-apache-org/docs/ --stable-versions \ + --exclude-docs "<package_id1_to_exclude> <package_id2_to_exclude>" [--dry-run] +``` + +## Manually publishing documentation via `apache-airflow-site-archive` repo + +If you do not have S3 credentials and want to be careful about publishing the documentation you can also +use publishing via `apache-airflow-site-archive` repository. This is a little more complex, but it allows +you to publish documentation without having S3 credentials. + +The process is as follows: + +1. Run `Sync s3 to GitHub` workflow in `apache-airflow-site-archive` repository. This will download the + latest version of the documentation from S3 to `airflow-site-archive` repository (this should be normally + not needed, if automated synchronization works). +2. Checkout `apache-airflow-site-archive` repository and create a branch for your changes. +3. Build documentation locally in `apache-airflow` repo with any cherry-picks and modifications you need and + publish the docs to the checked out `airflow-site-archive` branch + +```bash +breeze build-docs "<package_id1>" "<package_id2>" --docs-only +breeze release-management publish-docs --override-versioned --airflow-site-directory <PATH_TO_THE_ARCHIVE_REPO> +``` + +4. Commit the changes to `apache-airflow-site-archive` repository and push them to `some` branch of the + repository. +5. Run `Sync GitHub to S3` workflow in `apache-airflow-site-archive` repository. This will upload the modified + documentation to S3 bucket. You can choose, whether to sync the changes to `live` or `staging` bucket. + The default is `live`. You can also specify which folders to sync - by default all modified folders are synced. +6. After you synchronize the changes to S3, the Sync `S3 to GitHub` workflow will be triggered + automatically and the changes will be synchronized to `airflow-site-archive` `main` branch - so there + is no need to merge your changes to `main` branch of `airflow-site-archive` repository. You can safely + delete the branch you created in step 2. diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 1be322ef0a7..a15434452f3 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -561,6 +561,7 @@ ds dsl Dsn dsn +dst dts dttm dtypes @@ -642,6 +643,7 @@ exc executables execvp exitcode +expanduser explicit exportingmultiple ext @@ -737,6 +739,7 @@ Gentner geq getattr getboolean +getcwd getfqdn getframe getint @@ -1046,6 +1049,7 @@ ListGenerator ListInfoTypesResponse ListModelsPager ListSecretsPager +LiteralValue Liveness liveness livy @@ -1787,6 +1791,7 @@ templatable templateable Templated templated +templater Templating templating templatize diff --git a/task-sdk/docs/.gitignore b/task-sdk/docs/.gitignore new file mode 100644 index 00000000000..69fa449dd96 --- /dev/null +++ b/task-sdk/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/task-sdk/docs/api.rst b/task-sdk/docs/api.rst new file mode 100644 index 00000000000..8605b234792 --- /dev/null +++ b/task-sdk/docs/api.rst @@ -0,0 +1,132 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +airflow.sdk API Reference +========================= + +This page documents the full public API exposed in Airflow 3.0+ via the Task SDK python module. + +If something is not on this page it is best to assume that it is not part of the public API and use of it is entirely at your own risk +-- we won't go out of our way break usage of them, but we make no promises either. + +Defining DAGs +------------- +.. autoapiclass:: airflow.sdk.DAG + + +Decorators +---------- +.. autoapifunction:: airflow.sdk.dag +.. autoapifunction:: airflow.sdk.task + +.. autoapifunction:: airflow.sdk.task_group + +.. autoapifunction:: airflow.sdk.setup + +.. autoapifunction:: airflow.sdk.teardown + +.. autofunction:: airflow.sdk.task +.. autofunction:: airflow.sdk.setup +.. autofunction:: airflow.sdk.teardown +.. autofunction:: airflow.sdk.asset + + +Bases +----- +.. autoapiclass:: airflow.sdk.BaseOperator + +.. autoapiclass:: airflow.sdk.BaseSensorOperator + +.. autoapiclass:: airflow.sdk.BaseNotifier + +.. autoapiclass:: airflow.sdk.BaseOperatorLink + +.. autoapiclass:: airflow.sdk.BaseXCom + +.. autoapiclass:: airflow.sdk.XComArg + +.. autoapiclass:: airflow.sdk.PokeReturnValue + +Connections & Variables +----------------------- +.. autoapiclass:: airflow.sdk.Connection + +.. autoapiclass:: airflow.sdk.Variable + +Tasks & Operators +----------------- +.. autoapiclass:: airflow.sdk.TaskGroup + +.. autoapifunction:: airflow.sdk.get_current_context + +.. autoapifunction:: airflow.sdk.get_parsing_context + +.. autoapiclass:: airflow.sdk.Param + +Setting Dependencies +~~~~~~~~~~~~~~~~~~~~ +.. autoapifunction:: airflow.sdk.chain + +.. autoapifunction:: airflow.sdk.chain_linear + +.. autoapifunction:: airflow.sdk.cross_downstream + +.. autoapifunction:: airflow.sdk.literal + +Edges & Labels +~~~~~~~~~~~~~~ +.. autoapiclass:: airflow.sdk.EdgeModifier + +.. autoapiclass:: airflow.sdk.Label + +Assets +------ +.. autoapiclass:: airflow.sdk.Asset + +.. autoapiclass:: airflow.sdk.AssetAlias + +.. autoapiclass:: airflow.sdk.AssetAll + +.. autoapiclass:: airflow.sdk.AssetAny + +.. autoapiclass:: airflow.sdk.AssetWatcher + +.. autoapiclass:: airflow.sdk.Metadata + +I/O Helpers +----------- +.. autoapiclass:: airflow.sdk.ObjectStoragePath + +Execution Time Components +------------------------- +.. rubric:: Context +.. autoapiclass:: airflow.sdk.Context +.. autoapimodule:: airflow.sdk.execution_time.context + :members: + :undoc-members: + + +Everything else +--------------- + +.. autoapimodule:: airflow.sdk + :members: + :special-members: __version__ + :exclude-members: BaseOperator, DAG, dag, asset, Asset, AssetAlias, AssetAll, AssetAny, AssetWatcher, TaskGroup, XComArg, get_current_context, get_parsing_context + :undoc-members: + :imported-members: + :no-index: diff --git a/task-sdk/docs/conf.py b/task-sdk/docs/conf.py new file mode 100644 index 00000000000..e3fbacae2ce --- /dev/null +++ b/task-sdk/docs/conf.py @@ -0,0 +1,86 @@ +# Disable Flake8 because of all the sphinx imports +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import sys +from pathlib import Path + +CONF_DIR = Path(__file__).parent.absolute() +sys.path.insert(0, str(CONF_DIR.parent.parent.joinpath("devel-common", "src", "sphinx_exts").resolve())) + +project = "Apache Airflow Task SDK" + +language = "en" +locale_dirs: list[str] = [] + +extensions = [ + "sphinx.ext.autodoc", + "autoapi.extension", + "sphinx.ext.intersphinx", + "exampleinclude", + "sphinxcontrib.spelling", +] + +autoapi_dirs = [CONF_DIR.joinpath("..", "src").resolve()] +autoapi_root = "api" +autoapi_ignore = [ + "*/airflow/sdk/execution_time", + "*/airflow/sdk/api", + "*/_internal*", +] +autoapi_options = [ + "undoc-members", + "members", + "imported-members", +] +autoapi_add_toctree_entry = False +autoapi_generate_api_docs = False + +autodoc_typehints = "description" + +# Prefer pyi over py files if both are found +autoapi_file_patterns = ["*.pyi", "*.py"] + +html_theme = "sphinx_airflow_theme" + + +global_substitutions = { + "experimental": "This is an :ref:`experimental feature <experimental>`.", +} + +rst_epilog = "\n".join(f".. |{key}| replace:: {replace}" for key, replace in global_substitutions.items()) + + +intersphinx_resolve_self = "airflow" +intersphinx_mapping = { + "airflow": ("https://airflow.apache.org/docs/apache-airflow/stable/", None), +} +# Suppress known warnings +suppress_warnings = [ + "autoapi.python_import_resolution", + "autodoc", +] + +exampleinclude_sourceroot = str(CONF_DIR.joinpath("..").resolve()) +spelling_show_suggestions = False +spelling_word_list_filename = [ + str(CONF_DIR.parent.parent.joinpath("docs", "spelling_wordlist.txt").resolve()) +] +spelling_ignore_importable_modules = True +spelling_ignore_contributor_names = True diff --git a/task-sdk/docs/examples.rst b/task-sdk/docs/examples.rst new file mode 100644 index 00000000000..172ae425260 --- /dev/null +++ b/task-sdk/docs/examples.rst @@ -0,0 +1,86 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Examples +======== + +Basic Examples +-------------- + +Define a basic DAG and task in just a few lines of Python: + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_simplest_dag.py + :language: python + :start-after: [START simplest_dag] + :end-before: [END simplest_dag] + :caption: Simplest DAG with :func:`@dag <airflow.sdk.dag>` and :func:`@task <airflow.sdk.task>` + +Key Concepts +------------ +Defining DAGs +~~~~~~~~~~~~~ + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_dag_decorator.py + :language: python + :start-after: [START dag_decorator_usage] + :end-before: [END dag_decorator_usage] + :caption: Using the :func:`@dag <airflow.sdk.dag>` decorator with custom tasks and operators. + +Decorators +~~~~~~~~~~ + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_task_group_decorator.py + :language: python + :start-after: [START howto_task_group_decorator] + :end-before: [END howto_task_group_decorator] + :caption: Group tasks using the :func:`@task_group <airflow.sdk.task_group>` decorator. + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_setup_teardown_taskflow.py + :language: python + :start-after: [START example_setup_teardown_taskflow] + :end-before: [END example_setup_teardown_taskflow] + :caption: Define setup and teardown tasks with :func:`@setup <airflow.sdk.setup>` and :func:`@teardown <airflow.sdk.teardown>`. + +Tasks and Operators +~~~~~~~~~~~~~~~~~~~ + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_dynamic_task_mapping.py + :language: python + :start-after: [START example_dynamic_task_mapping] + :end-before: [END example_dynamic_task_mapping] + :caption: Dynamic task mapping with ``expand()`` + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_xcomargs.py + :language: python + :start-after: [START example_xcomargs] + :end-before: [END example_xcomargs] + :caption: Using ``XComArg`` to chain tasks based on return values. + +Assets +~~~~~~ + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_assets.py + :language: python + :start-after: [START asset_def] + :end-before: [END asset_def] + :caption: Defining an :func:`@asset <airflow.sdk.asset>` + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_asset_alias.py + :language: python + :start-after: [START example_asset_alias] + :end-before: [END example_asset_alias] + :caption: Defining asset aliases with :class:`AssetAlias <airflow.sdk.AssetAlias>`. diff --git a/task-sdk/docs/index.rst b/task-sdk/docs/index.rst new file mode 100644 index 00000000000..695f002d355 --- /dev/null +++ b/task-sdk/docs/index.rst @@ -0,0 +1,92 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Apache Airflow Task SDK +================================= + +:any:`DAG` is where to start. :any:`dag` + +The Apache Airflow Task SDK provides python-native interfaces for defining DAGs, +executing tasks in isolated subprocesses and interacting with Airflow resources +(e.g., Connections, Variables, XComs, Metrics, Logs, and OpenLineage events) at runtime. +It also includes core execution-time components to manage communication between the worker +and the Airflow scheduler/backend. + +This approach reduces boilerplate and keeps your DAG definitions concise and readable. + + +Installation +------------ +To install the Task SDK, run: + +.. code-block:: bash + + pip install apache-airflow-task-sdk + +Getting Started +--------------- +Define a basic DAG and task in just a few lines of Python: + +.. exampleinclude:: ../../airflow-core/src/airflow/example_dags/example_simplest_dag.py + :language: python + :start-after: [START simplest_dag] + :end-before: [END simplest_dag] + :caption: Simplest DAG with :func:`@dag <airflow.sdk.dag>` and :func:`@task <airflow.sdk.task>` + +Examples +-------- + +For more examples DAGs and patterns, see the :doc:`examples` page. + +Key Concepts +------------ +Defining DAGs +~~~~~~~~~~~~~ +Use ``@dag`` to convert a function into an Airflow DAG. All nested ``@task`` calls +become part of the workflow. + +Decorators +~~~~~~~~~~ +Simplify task definitions using decorators: + +- :func:`@task <airflow.sdk.task>` : define tasks. +- :func:`@task_group <airflow.sdk.task_group>`: group related tasks into logical units. +- :func:`@setup <airflow.sdk.setup>` and :func:`@teardown <airflow.sdk.teardown>`: define setup and teardown tasks for DAGs and TaskGroups. + +Tasks and Operators +~~~~~~~~~~~~~~~~~~~ +Wrap Python callables with :func:`@task <airflow.sdk.task>` to create tasks, leverage dynamic task mapping with +``.expand()``, and pass data via ``XComArg``. You can also create traditional Operators +(e.g., sensors) via classes imported from the SDK: + + - **BaseOperator**, **Sensor**, **OperatorLink**, **Notifier**, **XComArg**, etc. + (see the **api reference** section for details) + +Assets +~~~~~~ +Model data as assets and emit them to downstream tasks with the SDK's asset library under +``airflow.sdk.definitions.asset``. You can use: + +- :func:`@asset <airflow.sdk.asset>`, :class:`~airflow.sdk.AssetAlias`, etc. (see the **api reference** section below) + +Refer to :doc:`api` for the complete reference of all decorators and classes. + +.. toctree:: + :hidden: + + examples + api diff --git a/task-sdk/pyproject.toml b/task-sdk/pyproject.toml index beb1cdce950..ea7fd02111f 100644 --- a/task-sdk/pyproject.toml +++ b/task-sdk/pyproject.toml @@ -143,6 +143,9 @@ dev = [ "apache-airflow-providers-standard", "apache-airflow-devel-common", ] +docs = [ + "apache-airflow-devel-common[docs]", +] [tool.uv.sources] # These names must match the names as defined in the pyproject.toml of the workspace items, # *not* the workspace folder paths diff --git a/task-sdk/src/airflow/sdk/__init__.pyi b/task-sdk/src/airflow/sdk/__init__.pyi new file mode 100644 index 00000000000..985e616af1c --- /dev/null +++ b/task-sdk/src/airflow/sdk/__init__.pyi @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from airflow.sdk.bases.notifier import BaseNotifier as BaseNotifier +from airflow.sdk.bases.operator import ( + BaseOperator as BaseOperator, + chain as chain, + chain_linear as chain_linear, + cross_downstream as cross_downstream, +) +from airflow.sdk.bases.operatorlink import BaseOperatorLink as BaseOperatorLink +from airflow.sdk.bases.sensor import ( + BaseSensorOperator as BaseSensorOperator, + PokeReturnValue as PokeReturnValue, +) +from airflow.sdk.definitions.asset import ( + Asset as Asset, + AssetAlias as AssetAlias, + AssetAll as AssetAll, + AssetAny as AssetAny, + AssetWatcher as AssetWatcher, +) +from airflow.sdk.definitions.asset.decorators import asset as asset +from airflow.sdk.definitions.asset.metadata import Metadata as Metadata +from airflow.sdk.definitions.connection import Connection as Connection +from airflow.sdk.definitions.context import ( + Context as Context, + get_current_context as get_current_context, + get_parsing_context as get_parsing_context, +) +from airflow.sdk.definitions.dag import DAG as DAG, dag as dag +from airflow.sdk.definitions.decorators import setup as setup, task as task, teardown as teardown +from airflow.sdk.definitions.decorators.task_group import task_group as task_group +from airflow.sdk.definitions.edges import EdgeModifier as EdgeModifier, Label as Label +from airflow.sdk.definitions.param import Param as Param +from airflow.sdk.definitions.taskgroup import TaskGroup as TaskGroup +from airflow.sdk.definitions.template import literal as literal +from airflow.sdk.definitions.variable import Variable as Variable +from airflow.sdk.definitions.xcom_arg import XComArg as XComArg +from airflow.sdk.execution_time.cache import SecretCache as SecretCache +from airflow.sdk.io.path import ObjectStoragePath as ObjectStoragePath + +__all__ = [ + "__version__", + "Asset", + "AssetAlias", + "AssetAll", + "AssetAny", + "AssetWatcher", + "BaseNotifier", + "BaseOperator", + "BaseOperatorLink", + "BaseSensorOperator", + "Connection", + "Context", + "DAG", + "EdgeModifier", + "Label", + "Metadata", + "ObjectStoragePath", + "Param", + "PokeReturnValue", + "SecretCache", + "TaskGroup", + "Variable", + "XComArg", + "asset", + "chain", + "chain_linear", + "cross_downstream", + "dag", + "get_current_context", + "get_parsing_context", + "literal", + "setup", + "task", + "task_group", + "teardown", +] + +__version__: str diff --git a/task-sdk/src/airflow/sdk/definitions/context.py b/task-sdk/src/airflow/sdk/definitions/context.py index 6580b8bcf5e..082ad36202e 100644 --- a/task-sdk/src/airflow/sdk/definitions/context.py +++ b/task-sdk/src/airflow/sdk/definitions/context.py @@ -118,6 +118,8 @@ class AirflowParsingContext(NamedTuple): If these values are not None, they will contain the specific DAG and Task ID that Airflow is requesting to execute. You can use these for optimizing dynamically generated DAG files. + + You can obtain the current values via :py:func:`.get_parsing_context`. """ dag_id: str | None diff --git a/task-sdk/src/airflow/sdk/definitions/dag.py b/task-sdk/src/airflow/sdk/definitions/dag.py index 877d56e3308..ac331220941 100644 --- a/task-sdk/src/airflow/sdk/definitions/dag.py +++ b/task-sdk/src/airflow/sdk/definitions/dag.py @@ -278,7 +278,7 @@ class DAG: :param schedule: If provided, this defines the rules according to which DAG runs are scheduled. Possible values include a cron expression string, timedelta object, Timetable, or list of Asset objects. - See also :doc:`/howto/timetable`. + See also :external:doc:`howto/timetable`. :param start_date: The timestamp from which the scheduler will attempt to backfill. If this is not provided, backfilling must be done manually with an explicit time range. @@ -352,7 +352,7 @@ class DAG: :param tags: List of tags to help filtering DAGs in the UI. :param owner_links: Dict of owners and their links, that will be clickable on the DAGs view UI. Can be used as an HTTP link (for example the link to your Slack channel), or a mailto link. - e.g: {"dag_owner": "https://airflow.apache.org/"} + e.g: ``{"dag_owner": "https://airflow.apache.org/"}`` :param auto_register: Automatically register this DAG when it is used in a ``with`` block :param fail_fast: Fails currently running tasks when task in DAG fails. **Warning**: A fail stop dag can only have tasks with the default trigger rule ("all_success"). diff --git a/task-sdk/src/airflow/sdk/io/path.py b/task-sdk/src/airflow/sdk/io/path.py index 5a3517f6527..5b66e09d110 100644 --- a/task-sdk/src/airflow/sdk/io/path.py +++ b/task-sdk/src/airflow/sdk/io/path.py @@ -237,14 +237,19 @@ class ObjectStoragePath(CloudPath): Examples -------- - >>> read_block(0, 13) - b'Alice, 100\\nBo' - >>> read_block(0, 13, delimiter=b"\\n") - b'Alice, 100\\nBob, 200\\n' - - Use ``length=None`` to read to the end of the file. - >>> read_block(0, None, delimiter=b"\\n") - b'Alice, 100\\nBob, 200\\nCharlie, 300' + .. code-block:: pycon + + # Read the first 13 bytes (no delimiter) + >>> read_block(0, 13) + b'Alice, 100\nBo' + + # Read first 13 bytes, but force newline boundaries + >>> read_block(0, 13, delimiter=b"\n") + b'Alice, 100\nBob, 200\n' + + # Read until EOF, but only stop at newline + >>> read_block(0, None, delimiter=b"\n") + b'Alice, 100\nBob, 200\nCharlie, 300' See Also -------- diff --git a/task-sdk/tests/test_docs_inventory.py b/task-sdk/tests/test_docs_inventory.py new file mode 100644 index 00000000000..a1308031d52 --- /dev/null +++ b/task-sdk/tests/test_docs_inventory.py @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the SDK src directory to sys.path so that importlib loads our airflow.sdk module +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) +import importlib +import shutil +import subprocess +import zlib +from pathlib import Path + +import pytest + + +def read_inventory(inv_path: Path): + """ + Read a Sphinx objects.inv inventory file and return a mapping of documented full names to their entries. + """ + inv: dict[str, tuple[str, str, str]] = {} + with inv_path.open("rb") as f: + f.readline() + f.readline() + f.readline() + f.readline() + data = zlib.decompress(f.read()).decode("utf-8").splitlines() + for line in data: + if not line.strip(): + continue + parts = line.split(None, 4) + if len(parts) != 5: + continue + name, domain_role, prio, location, dispname = parts + inv[name] = (domain_role, location, dispname) + return inv + + +@pytest.mark.skipif( + shutil.which("sphinx-build") is None, reason="sphinx-build not available, skipping docs inventory test" +) +def test_docs_inventory_matches_public_api(tmp_path): + """ + Build the HTML docs and compare the generated Sphinx inventory with the public API re-exports. + """ + docs_dir = Path(__file__).parent.parent / "docs" + build_dir = tmp_path / "build" + sphinx = shutil.which("sphinx-build") + subprocess.run([sphinx, "-b", "html", "-q", str(docs_dir), str(build_dir)], check=True) + inv_path = build_dir / "objects.inv" + assert inv_path.exists(), "objects.inv not found after docs build" + + inv = read_inventory(inv_path) + documented = { + name.rsplit(".", 1)[-1] + for name in inv.keys() + if name.startswith("airflow.sdk.") and name.count(".") == 2 + } + sdk = importlib.import_module("airflow.sdk") + public = set(getattr(sdk, "__all__", [])) - {"__version__"} + + extras = {"AirflowParsingContext"} + missing = public - documented + assert not missing, f"Public API items missing in docs: {missing}" + unexpected = (documented - public) - extras + assert not unexpected, f"Unexpected documented items: {unexpected}" diff --git a/task-sdk/tests/test_public_api.py b/task-sdk/tests/test_public_api.py new file mode 100644 index 00000000000..dd2bf54980c --- /dev/null +++ b/task-sdk/tests/test_public_api.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import importlib + + +def test_airflow_sdk_exports_exist(): + """ + Ensure that all names declared in airflow.sdk.__all__ are present as attributes on the module. + """ + sdk = importlib.import_module("airflow.sdk") + # Provide literal attribute for testing since it's declared in __all__ + template_mod = importlib.import_module("airflow.sdk.definitions.template") + setattr(sdk, "literal", getattr(template_mod, "literal")) + public_names = getattr(sdk, "__all__", []) + missing = [name for name in public_names if not hasattr(sdk, name)] + assert not missing, f"Missing exports in airflow.sdk: {missing}" + + +def test_airflow_sdk_no_unexpected_exports(): + """ + Ensure that no unexpected public attributes are present in airflow.sdk besides those in __all__. + """ + sdk = importlib.import_module("airflow.sdk") + public = set(getattr(sdk, "__all__", [])) + actual = {name for name in dir(sdk) if not name.startswith("_")} + ignore = { + "__getattr__", + "__lazy_imports", + "SecretCache", + "TYPE_CHECKING", + "annotations", + "api", + "bases", + "definitions", + "execution_time", + "io", + "log", + "exceptions", + } + unexpected = actual - public - ignore + assert not unexpected, f"Unexpected exports in airflow.sdk: {sorted(unexpected)}" + + +def test_lazy_imports_match_public_api(): + """ + Ensure that the dynamic lazy-imports mapping matches the public names in __all__, + except for the version string. + """ + import airflow.sdk as sdk + + lazy = getattr(sdk, "__lazy_imports", {}) + expected = set(getattr(sdk, "__all__", [])) - {"__version__", "literal"} + ignore = {"SecretCache"} + actual = set(lazy.keys()) + missing = expected - actual + extra = actual - expected - ignore + assert not missing, f"__lazy_imports missing entries for: {sorted(missing)}" + assert not extra, f"__lazy_imports has unexpected entries: {sorted(extra)}"