This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v2-2-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 7d37b0e3ed9a62480e56a21b137b3b78e5fcc259 Author: Josh Fell <[email protected]> AuthorDate: Thu Nov 25 06:47:23 2021 -0500 Clean up ``default_args`` usage in docs (#19803) This PR aligns `default_args` usage within docs to updates that have been made to example DAGs across the board. The main types of updates include: - Removing `start_date` from being declared in `default_args`. - Removing the pattern of declaring `default_args` separately from the `DAG()` object. - Updating `default_args` values to more relevant examples. - Replace `DummyOperator` with another operator to make some other `default_args` updates relevant and applicable. (cherry picked from commit 744d11bdb2acd52794a959572695943df8729a37) --- airflow/example_dags/example_subdag_operator.py | 14 +++--- airflow/example_dags/tutorial.py | 51 ++++++++++------------ airflow/example_dags/tutorial_etl_dag.py | 14 +++--- .../google/cloud/example_dags/example_functions.py | 2 +- docs/apache-airflow/best-practices.rst | 2 +- docs/apache-airflow/concepts/dags.rst | 39 +++++++++++------ docs/apache-airflow/dag-run.rst | 16 +++---- docs/apache-airflow/faq.rst | 3 +- docs/apache-airflow/lineage.rst | 4 +- docs/apache-airflow/timezone.rst | 14 +++--- docs/apache-airflow/tutorial.rst | 1 + 11 files changed, 78 insertions(+), 82 deletions(-) diff --git a/airflow/example_dags/example_subdag_operator.py b/airflow/example_dags/example_subdag_operator.py index f27aec7..424dc7f 100644 --- a/airflow/example_dags/example_subdag_operator.py +++ b/airflow/example_dags/example_subdag_operator.py @@ -27,12 +27,12 @@ from airflow.utils.dates import days_ago DAG_NAME = 'example_subdag_operator' -args = { - 'owner': 'airflow', -} - with DAG( - dag_id=DAG_NAME, default_args=args, start_date=days_ago(2), schedule_interval="@once", tags=['example'] + dag_id=DAG_NAME, + default_args={"retries": 2}, + start_date=days_ago(2), + schedule_interval="@once", + tags=['example'], ) as dag: start = DummyOperator( @@ -41,7 +41,7 @@ with DAG( section_1 = SubDagOperator( task_id='section-1', - subdag=subdag(DAG_NAME, 'section-1', args), + subdag=subdag(DAG_NAME, 'section-1', dag.default_args), ) some_other_task = DummyOperator( @@ -50,7 +50,7 @@ with DAG( section_2 = SubDagOperator( task_id='section-2', - subdag=subdag(DAG_NAME, 'section-2', args), + subdag=subdag(DAG_NAME, 'section-2', dag.default_args), ) end = DummyOperator( diff --git a/airflow/example_dags/tutorial.py b/airflow/example_dags/tutorial.py index 38d4cbe..1049772 100644 --- a/airflow/example_dags/tutorial.py +++ b/airflow/example_dags/tutorial.py @@ -34,37 +34,34 @@ from airflow.operators.bash import BashOperator # [END import_module] -# [START default_args] -# These args will get passed on to each operator -# You can override them on a per-task basis during operator initialization -default_args = { - 'owner': 'airflow', - 'depends_on_past': False, - 'email': ['[email protected]'], - 'email_on_failure': False, - 'email_on_retry': False, - 'retries': 1, - 'retry_delay': timedelta(minutes=5), - # 'queue': 'bash_queue', - # 'pool': 'backfill', - # 'priority_weight': 10, - # 'end_date': datetime(2016, 1, 1), - # 'wait_for_downstream': False, - # 'dag': dag, - # 'sla': timedelta(hours=2), - # 'execution_timeout': timedelta(seconds=300), - # 'on_failure_callback': some_function, - # 'on_success_callback': some_other_function, - # 'on_retry_callback': another_function, - # 'sla_miss_callback': yet_another_function, - # 'trigger_rule': 'all_success' -} -# [END default_args] # [START instantiate_dag] with DAG( 'tutorial', - default_args=default_args, + # [START default_args] + # These args will get passed on to each operator + # You can override them on a per-task basis during operator initialization + default_args={ + 'depends_on_past': False, + 'email': ['[email protected]'], + 'email_on_failure': False, + 'email_on_retry': False, + 'retries': 1, + 'retry_delay': timedelta(minutes=5), + # 'queue': 'bash_queue', + # 'pool': 'backfill', + # 'priority_weight': 10, + # 'end_date': datetime(2016, 1, 1), + # 'wait_for_downstream': False, + # 'sla': timedelta(hours=2), + # 'execution_timeout': timedelta(seconds=300), + # 'on_failure_callback': some_function, + # 'on_success_callback': some_other_function, + # 'on_retry_callback': another_function, + # 'sla_miss_callback': yet_another_function, + # 'trigger_rule': 'all_success' + }, + # [END default_args] description='A simple tutorial DAG', schedule_interval=timedelta(days=1), start_date=datetime(2021, 1, 1), diff --git a/airflow/example_dags/tutorial_etl_dag.py b/airflow/example_dags/tutorial_etl_dag.py index d284452..8dd0ea4 100644 --- a/airflow/example_dags/tutorial_etl_dag.py +++ b/airflow/example_dags/tutorial_etl_dag.py @@ -37,18 +37,14 @@ from airflow.operators.python import PythonOperator # [END import_module] -# [START default_args] -# These args will get passed on to each operator -# You can override them on a per-task basis during operator initialization -default_args = { - 'owner': 'airflow', -} -# [END default_args] - # [START instantiate_dag] with DAG( 'tutorial_etl_dag', - default_args=default_args, + # [START default_args] + # These args will get passed on to each operator + # You can override them on a per-task basis during operator initialization + default_args={'retries': 2}, + # [END default_args] description='ETL DAG tutorial', schedule_interval=None, start_date=datetime(2021, 1, 1), diff --git a/airflow/providers/google/cloud/example_dags/example_functions.py b/airflow/providers/google/cloud/example_dags/example_functions.py index 03749ba..b32d718 100644 --- a/airflow/providers/google/cloud/example_dags/example_functions.py +++ b/airflow/providers/google/cloud/example_dags/example_functions.py @@ -75,7 +75,7 @@ body = {"name": FUNCTION_NAME, "entryPoint": GCF_ENTRYPOINT, "runtime": GCF_RUNT # [END howto_operator_gcf_deploy_body] # [START howto_operator_gcf_default_args] -default_args = {'owner': 'airflow'} +default_args = {'retries': '3'} # [END howto_operator_gcf_default_args] # [START howto_operator_gcf_deploy_variants] diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 5ebed3b..951e6b4 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -504,7 +504,7 @@ This is an example test want to verify the structure of a code-generated DAG aga with DAG( dag_id=TEST_DAG_ID, schedule_interval="@daily", - default_args={"start_date": DATA_INTERVAL_START}, + start_date=DATA_INTERVAL_START, ) as dag: MyCustomOperator( task_id=TEST_TASK_ID, diff --git a/docs/apache-airflow/concepts/dags.rst b/docs/apache-airflow/concepts/dags.rst index 563264e..8aa4955 100644 --- a/docs/apache-airflow/concepts/dags.rst +++ b/docs/apache-airflow/concepts/dags.rst @@ -195,16 +195,19 @@ Otherwise, you must pass it into each Operator with ``dag=``. Default Arguments ----------------- -Often, many Operators inside a DAG need the same set of default arguments (such as their ``start_date``). Rather than having to specify this individually for every Operator, you can instead pass ``default_args`` to the DAG when you create it, and it will auto-apply them to any operator tied to it:: +Often, many Operators inside a DAG need the same set of default arguments (such as their ``retries``). Rather than having to specify this individually for every Operator, you can instead pass ``default_args`` to the DAG when you create it, and it will auto-apply them to any operator tied to it:: - default_args = { - 'start_date': datetime(2016, 1, 1), - 'owner': 'airflow' - } - with DAG('my_dag', default_args=default_args) as dag: - op = DummyOperator(task_id='dummy') - print(op.owner) # "airflow" + + with DAG( + dag_id='my_dag', + start_date=datetime(2016, 1, 1), + schedule_interval='@daily', + catchup=False, + default_args={'retries': 2}, + ) as dag: + op = BashOperator(task_id='dummy', bash_command='Hello World!') + print(op.retries) # 2 .. _concepts:dag-decorator: @@ -464,12 +467,18 @@ Dependency relationships can be applied across all tasks in a TaskGroup with the TaskGroup also supports ``default_args`` like DAG, it will overwrite the ``default_args`` in DAG level:: - with DAG(dag_id='dag1', default_args={'start_date': datetime(2016, 1, 1), 'owner': 'dag'}): - with TaskGroup('group1', default_args={'owner': 'group'}): + with DAG( + dag_id='dag1', + start_date=datetime(2016, 1, 1), + schedule_interval="@daily", + catchup=False, + default_args={'retries': 1}, + ): + with TaskGroup('group1', default_args={'retries': 3}): task1 = DummyOperator(task_id='task1') - task2 = DummyOperator(task_id='task2', owner='task2') - print(task1.owner) # "group" - print(task2.owner) # "task2" + task2 = BashOperator(task_id='task2', bash_command='echo Hello World!', retries=2) + print(task1.retries) # 3 + print(task2.retries) # 2 If you want to see a more advanced use of TaskGroup, you can look at the ``example_task_group.py`` example DAG that comes with Airflow. @@ -539,7 +548,9 @@ This is especially useful if your tasks are built dynamically from configuration ### My great DAG """ - dag = DAG("my_dag", default_args=default_args) + dag = DAG( + "my_dag", start_date=datetime(2021, 1, 1), schedule_interval="@daily", catchup=False + ) dag.doc_md = __doc__ t = BashOperator("foo", dag=dag) diff --git a/docs/apache-airflow/dag-run.rst b/docs/apache-airflow/dag-run.rst index 39bd9d2..90bb404 100644 --- a/docs/apache-airflow/dag-run.rst +++ b/docs/apache-airflow/dag-run.rst @@ -114,19 +114,13 @@ in the configuration file. When turned off, the scheduler creates a DAG run only from datetime import datetime, timedelta - default_args = { - "owner": "airflow", - "depends_on_past": False, - "email": ["[email protected]"], - "email_on_failure": False, - "email_on_retry": False, - "retries": 1, - "retry_delay": timedelta(minutes=5), - } - dag = DAG( "tutorial", - default_args=default_args, + default_args={ + "depends_on_past": True, + "retries": 1, + "retry_delay": timedelta(minutes=3), + }, start_date=datetime(2015, 12, 1), description="A simple tutorial DAG", schedule_interval="@daily", diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst index 599a1f6..857e685 100644 --- a/docs/apache-airflow/faq.rst +++ b/docs/apache-airflow/faq.rst @@ -173,7 +173,8 @@ What's the deal with ``start_date``? ``start_date`` is partly legacy from the pre-DagRun era, but it is still relevant in many ways. When creating a new DAG, you probably want to set -a global ``start_date`` for your tasks using ``default_args``. The first +a global ``start_date`` for your tasks. This can be done by declaring your +``start_date`` directly in the ``DAG()`` object. The first DagRun to be created will be based on the ``min(start_date)`` for all your tasks. From that point on, the scheduler creates new DagRuns based on your ``schedule_interval`` and the corresponding task instances run as your diff --git a/docs/apache-airflow/lineage.rst b/docs/apache-airflow/lineage.rst index f0b79aa..9b8bb71 100644 --- a/docs/apache-airflow/lineage.rst +++ b/docs/apache-airflow/lineage.rst @@ -32,11 +32,11 @@ works. from datetime import datetime, timedelta - from airflow.operators.bash import BashOperator - from airflow.operators.dummy import DummyOperator from airflow.lineage import AUTO from airflow.lineage.entities import File from airflow.models import DAG + from airflow.operators.bash import BashOperator + from airflow.operators.dummy import DummyOperator FILE_CATEGORIES = ["CAT1", "CAT2", "CAT3"] diff --git a/docs/apache-airflow/timezone.rst b/docs/apache-airflow/timezone.rst index f11a750..32e5223 100644 --- a/docs/apache-airflow/timezone.rst +++ b/docs/apache-airflow/timezone.rst @@ -86,15 +86,13 @@ and ``end_dates`` in your DAG definitions. This is mostly in order to preserve b case a naive ``start_date`` or ``end_date`` is encountered the default time zone is applied. It is applied in such a way that it is assumed that the naive date time is already in the default time zone. In other words if you have a default time zone setting of ``Europe/Amsterdam`` and create a naive datetime ``start_date`` of -``datetime(2017,1,1)`` it is assumed to be a ``start_date`` of Jan 1, 2017 Amsterdam time. +``datetime(2017, 1, 1)`` it is assumed to be a ``start_date`` of Jan 1, 2017 Amsterdam time. .. code-block:: python - default_args = dict(start_date=datetime(2016, 1, 1), owner="airflow") - - dag = DAG("my_dag", default_args=default_args) - op = DummyOperator(task_id="dummy", dag=dag) - print(op.owner) # Airflow + dag = DAG("my_dag", start_date=datetime(2017, 1, 1), default_args={"retries": 3}) + op = BashOperator(task_id="dummy", bash_command="Hello World!", dag=dag) + print(op.retries) # 3 Unfortunately, during DST transitions, some datetimes don’t exist or are ambiguous. In such situations, pendulum raises an exception. That’s why you should always create aware @@ -134,9 +132,7 @@ using ``pendulum``. local_tz = pendulum.timezone("Europe/Amsterdam") - default_args = dict(start_date=datetime(2016, 1, 1, tzinfo=local_tz), owner="airflow") - - dag = DAG("my_tz_dag", default_args=default_args) + dag = DAG("my_tz_dag", start_date=datetime(2016, 1, 1, tzinfo=local_tz)) op = DummyOperator(task_id="dummy", dag=dag) print(dag.timezone) # <Timezone [Europe/Amsterdam]> diff --git a/docs/apache-airflow/tutorial.rst b/docs/apache-airflow/tutorial.rst index 7e27d54..babb8d6 100644 --- a/docs/apache-airflow/tutorial.rst +++ b/docs/apache-airflow/tutorial.rst @@ -77,6 +77,7 @@ of default parameters that we can use when creating tasks. .. exampleinclude:: /../../airflow/example_dags/tutorial.py :language: python + :dedent: 4 :start-after: [START default_args] :end-before: [END default_args]
