This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch v2-2-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 7d37b0e3ed9a62480e56a21b137b3b78e5fcc259
Author: Josh Fell <[email protected]>
AuthorDate: Thu Nov 25 06:47:23 2021 -0500

    Clean up ``default_args`` usage in docs (#19803)
    
    This PR aligns `default_args` usage within docs to updates that have been 
made to example DAGs across the board. The main types of updates include:
    - Removing `start_date` from being declared in `default_args`.
    - Removing the pattern of declaring `default_args` separately from the 
`DAG()` object.
    - Updating `default_args` values to more relevant examples.
    - Replace `DummyOperator` with another operator to make some other 
`default_args` updates relevant and applicable.
    
    (cherry picked from commit 744d11bdb2acd52794a959572695943df8729a37)
---
 airflow/example_dags/example_subdag_operator.py    | 14 +++---
 airflow/example_dags/tutorial.py                   | 51 ++++++++++------------
 airflow/example_dags/tutorial_etl_dag.py           | 14 +++---
 .../google/cloud/example_dags/example_functions.py |  2 +-
 docs/apache-airflow/best-practices.rst             |  2 +-
 docs/apache-airflow/concepts/dags.rst              | 39 +++++++++++------
 docs/apache-airflow/dag-run.rst                    | 16 +++----
 docs/apache-airflow/faq.rst                        |  3 +-
 docs/apache-airflow/lineage.rst                    |  4 +-
 docs/apache-airflow/timezone.rst                   | 14 +++---
 docs/apache-airflow/tutorial.rst                   |  1 +
 11 files changed, 78 insertions(+), 82 deletions(-)

diff --git a/airflow/example_dags/example_subdag_operator.py 
b/airflow/example_dags/example_subdag_operator.py
index f27aec7..424dc7f 100644
--- a/airflow/example_dags/example_subdag_operator.py
+++ b/airflow/example_dags/example_subdag_operator.py
@@ -27,12 +27,12 @@ from airflow.utils.dates import days_ago
 
 DAG_NAME = 'example_subdag_operator'
 
-args = {
-    'owner': 'airflow',
-}
-
 with DAG(
-    dag_id=DAG_NAME, default_args=args, start_date=days_ago(2), 
schedule_interval="@once", tags=['example']
+    dag_id=DAG_NAME,
+    default_args={"retries": 2},
+    start_date=days_ago(2),
+    schedule_interval="@once",
+    tags=['example'],
 ) as dag:
 
     start = DummyOperator(
@@ -41,7 +41,7 @@ with DAG(
 
     section_1 = SubDagOperator(
         task_id='section-1',
-        subdag=subdag(DAG_NAME, 'section-1', args),
+        subdag=subdag(DAG_NAME, 'section-1', dag.default_args),
     )
 
     some_other_task = DummyOperator(
@@ -50,7 +50,7 @@ with DAG(
 
     section_2 = SubDagOperator(
         task_id='section-2',
-        subdag=subdag(DAG_NAME, 'section-2', args),
+        subdag=subdag(DAG_NAME, 'section-2', dag.default_args),
     )
 
     end = DummyOperator(
diff --git a/airflow/example_dags/tutorial.py b/airflow/example_dags/tutorial.py
index 38d4cbe..1049772 100644
--- a/airflow/example_dags/tutorial.py
+++ b/airflow/example_dags/tutorial.py
@@ -34,37 +34,34 @@ from airflow.operators.bash import BashOperator
 
 # [END import_module]
 
-# [START default_args]
-# These args will get passed on to each operator
-# You can override them on a per-task basis during operator initialization
-default_args = {
-    'owner': 'airflow',
-    'depends_on_past': False,
-    'email': ['[email protected]'],
-    'email_on_failure': False,
-    'email_on_retry': False,
-    'retries': 1,
-    'retry_delay': timedelta(minutes=5),
-    # 'queue': 'bash_queue',
-    # 'pool': 'backfill',
-    # 'priority_weight': 10,
-    # 'end_date': datetime(2016, 1, 1),
-    # 'wait_for_downstream': False,
-    # 'dag': dag,
-    # 'sla': timedelta(hours=2),
-    # 'execution_timeout': timedelta(seconds=300),
-    # 'on_failure_callback': some_function,
-    # 'on_success_callback': some_other_function,
-    # 'on_retry_callback': another_function,
-    # 'sla_miss_callback': yet_another_function,
-    # 'trigger_rule': 'all_success'
-}
-# [END default_args]
 
 # [START instantiate_dag]
 with DAG(
     'tutorial',
-    default_args=default_args,
+    # [START default_args]
+    # These args will get passed on to each operator
+    # You can override them on a per-task basis during operator initialization
+    default_args={
+        'depends_on_past': False,
+        'email': ['[email protected]'],
+        'email_on_failure': False,
+        'email_on_retry': False,
+        'retries': 1,
+        'retry_delay': timedelta(minutes=5),
+        # 'queue': 'bash_queue',
+        # 'pool': 'backfill',
+        # 'priority_weight': 10,
+        # 'end_date': datetime(2016, 1, 1),
+        # 'wait_for_downstream': False,
+        # 'sla': timedelta(hours=2),
+        # 'execution_timeout': timedelta(seconds=300),
+        # 'on_failure_callback': some_function,
+        # 'on_success_callback': some_other_function,
+        # 'on_retry_callback': another_function,
+        # 'sla_miss_callback': yet_another_function,
+        # 'trigger_rule': 'all_success'
+    },
+    # [END default_args]
     description='A simple tutorial DAG',
     schedule_interval=timedelta(days=1),
     start_date=datetime(2021, 1, 1),
diff --git a/airflow/example_dags/tutorial_etl_dag.py 
b/airflow/example_dags/tutorial_etl_dag.py
index d284452..8dd0ea4 100644
--- a/airflow/example_dags/tutorial_etl_dag.py
+++ b/airflow/example_dags/tutorial_etl_dag.py
@@ -37,18 +37,14 @@ from airflow.operators.python import PythonOperator
 
 # [END import_module]
 
-# [START default_args]
-# These args will get passed on to each operator
-# You can override them on a per-task basis during operator initialization
-default_args = {
-    'owner': 'airflow',
-}
-# [END default_args]
-
 # [START instantiate_dag]
 with DAG(
     'tutorial_etl_dag',
-    default_args=default_args,
+    # [START default_args]
+    # These args will get passed on to each operator
+    # You can override them on a per-task basis during operator initialization
+    default_args={'retries': 2},
+    # [END default_args]
     description='ETL DAG tutorial',
     schedule_interval=None,
     start_date=datetime(2021, 1, 1),
diff --git a/airflow/providers/google/cloud/example_dags/example_functions.py 
b/airflow/providers/google/cloud/example_dags/example_functions.py
index 03749ba..b32d718 100644
--- a/airflow/providers/google/cloud/example_dags/example_functions.py
+++ b/airflow/providers/google/cloud/example_dags/example_functions.py
@@ -75,7 +75,7 @@ body = {"name": FUNCTION_NAME, "entryPoint": GCF_ENTRYPOINT, 
"runtime": GCF_RUNT
 # [END howto_operator_gcf_deploy_body]
 
 # [START howto_operator_gcf_default_args]
-default_args = {'owner': 'airflow'}
+default_args = {'retries': '3'}
 # [END howto_operator_gcf_default_args]
 
 # [START howto_operator_gcf_deploy_variants]
diff --git a/docs/apache-airflow/best-practices.rst 
b/docs/apache-airflow/best-practices.rst
index 5ebed3b..951e6b4 100644
--- a/docs/apache-airflow/best-practices.rst
+++ b/docs/apache-airflow/best-practices.rst
@@ -504,7 +504,7 @@ This is an example test want to verify the structure of a 
code-generated DAG aga
         with DAG(
             dag_id=TEST_DAG_ID,
             schedule_interval="@daily",
-            default_args={"start_date": DATA_INTERVAL_START},
+            start_date=DATA_INTERVAL_START,
         ) as dag:
             MyCustomOperator(
                 task_id=TEST_TASK_ID,
diff --git a/docs/apache-airflow/concepts/dags.rst 
b/docs/apache-airflow/concepts/dags.rst
index 563264e..8aa4955 100644
--- a/docs/apache-airflow/concepts/dags.rst
+++ b/docs/apache-airflow/concepts/dags.rst
@@ -195,16 +195,19 @@ Otherwise, you must pass it into each Operator with 
``dag=``.
 Default Arguments
 -----------------
 
-Often, many Operators inside a DAG need the same set of default arguments 
(such as their ``start_date``). Rather than having to specify this individually 
for every Operator, you can instead pass ``default_args`` to the DAG when you 
create it, and it will auto-apply them to any operator tied to it::
+Often, many Operators inside a DAG need the same set of default arguments 
(such as their ``retries``). Rather than having to specify this individually 
for every Operator, you can instead pass ``default_args`` to the DAG when you 
create it, and it will auto-apply them to any operator tied to it::
 
-    default_args = {
-        'start_date': datetime(2016, 1, 1),
-        'owner': 'airflow'
-    }
 
-    with DAG('my_dag', default_args=default_args) as dag:
-        op = DummyOperator(task_id='dummy')
-        print(op.owner)  # "airflow"
+
+    with DAG(
+        dag_id='my_dag',
+        start_date=datetime(2016, 1, 1),
+        schedule_interval='@daily',
+        catchup=False,
+        default_args={'retries': 2},
+    ) as dag:
+        op = BashOperator(task_id='dummy', bash_command='Hello World!')
+        print(op.retries)  # 2
 
 
 .. _concepts:dag-decorator:
@@ -464,12 +467,18 @@ Dependency relationships can be applied across all tasks 
in a TaskGroup with the
 
 TaskGroup also supports ``default_args`` like DAG, it will overwrite the 
``default_args`` in DAG level::
 
-    with DAG(dag_id='dag1', default_args={'start_date': datetime(2016, 1, 1), 
'owner': 'dag'}):
-        with TaskGroup('group1', default_args={'owner': 'group'}):
+    with DAG(
+        dag_id='dag1',
+        start_date=datetime(2016, 1, 1),
+        schedule_interval="@daily",
+        catchup=False,
+        default_args={'retries': 1},
+    ):
+        with TaskGroup('group1', default_args={'retries': 3}):
             task1 = DummyOperator(task_id='task1')
-            task2 = DummyOperator(task_id='task2', owner='task2')
-            print(task1.owner) # "group"
-            print(task2.owner) # "task2"
+            task2 = BashOperator(task_id='task2', bash_command='echo Hello 
World!', retries=2)
+            print(task1.retries) # 3
+            print(task2.retries) # 2
 
 If you want to see a more advanced use of TaskGroup, you can look at the 
``example_task_group.py`` example DAG that comes with Airflow.
 
@@ -539,7 +548,9 @@ This is especially useful if your tasks are built 
dynamically from configuration
     ### My great DAG
     """
 
-    dag = DAG("my_dag", default_args=default_args)
+    dag = DAG(
+        "my_dag", start_date=datetime(2021, 1, 1), schedule_interval="@daily", 
catchup=False
+    )
     dag.doc_md = __doc__
 
     t = BashOperator("foo", dag=dag)
diff --git a/docs/apache-airflow/dag-run.rst b/docs/apache-airflow/dag-run.rst
index 39bd9d2..90bb404 100644
--- a/docs/apache-airflow/dag-run.rst
+++ b/docs/apache-airflow/dag-run.rst
@@ -114,19 +114,13 @@ in the configuration file. When turned off, the scheduler 
creates a DAG run only
     from datetime import datetime, timedelta
 
 
-    default_args = {
-        "owner": "airflow",
-        "depends_on_past": False,
-        "email": ["[email protected]"],
-        "email_on_failure": False,
-        "email_on_retry": False,
-        "retries": 1,
-        "retry_delay": timedelta(minutes=5),
-    }
-
     dag = DAG(
         "tutorial",
-        default_args=default_args,
+        default_args={
+            "depends_on_past": True,
+            "retries": 1,
+            "retry_delay": timedelta(minutes=3),
+        },
         start_date=datetime(2015, 12, 1),
         description="A simple tutorial DAG",
         schedule_interval="@daily",
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 599a1f6..857e685 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -173,7 +173,8 @@ What's the deal with ``start_date``?
 
 ``start_date`` is partly legacy from the pre-DagRun era, but it is still
 relevant in many ways. When creating a new DAG, you probably want to set
-a global ``start_date`` for your tasks using ``default_args``. The first
+a global ``start_date`` for your tasks. This can be done by declaring your
+``start_date`` directly in the ``DAG()`` object. The first
 DagRun to be created will be based on the ``min(start_date)`` for all your
 tasks. From that point on, the scheduler creates new DagRuns based on
 your ``schedule_interval`` and the corresponding task instances run as your
diff --git a/docs/apache-airflow/lineage.rst b/docs/apache-airflow/lineage.rst
index f0b79aa..9b8bb71 100644
--- a/docs/apache-airflow/lineage.rst
+++ b/docs/apache-airflow/lineage.rst
@@ -32,11 +32,11 @@ works.
 
     from datetime import datetime, timedelta
 
-    from airflow.operators.bash import BashOperator
-    from airflow.operators.dummy import DummyOperator
     from airflow.lineage import AUTO
     from airflow.lineage.entities import File
     from airflow.models import DAG
+    from airflow.operators.bash import BashOperator
+    from airflow.operators.dummy import DummyOperator
 
     FILE_CATEGORIES = ["CAT1", "CAT2", "CAT3"]
 
diff --git a/docs/apache-airflow/timezone.rst b/docs/apache-airflow/timezone.rst
index f11a750..32e5223 100644
--- a/docs/apache-airflow/timezone.rst
+++ b/docs/apache-airflow/timezone.rst
@@ -86,15 +86,13 @@ and ``end_dates`` in your DAG definitions. This is mostly 
in order to preserve b
 case a naive ``start_date`` or ``end_date`` is encountered the default time 
zone is applied. It is applied
 in such a way that it is assumed that the naive date time is already in the 
default time zone. In other
 words if you have a default time zone setting of ``Europe/Amsterdam`` and 
create a naive datetime ``start_date`` of
-``datetime(2017,1,1)`` it is assumed to be a ``start_date`` of Jan 1, 2017 
Amsterdam time.
+``datetime(2017, 1, 1)`` it is assumed to be a ``start_date`` of Jan 1, 2017 
Amsterdam time.
 
 .. code-block:: python
 
-    default_args = dict(start_date=datetime(2016, 1, 1), owner="airflow")
-
-    dag = DAG("my_dag", default_args=default_args)
-    op = DummyOperator(task_id="dummy", dag=dag)
-    print(op.owner)  # Airflow
+    dag = DAG("my_dag", start_date=datetime(2017, 1, 1), 
default_args={"retries": 3})
+    op = BashOperator(task_id="dummy", bash_command="Hello World!", dag=dag)
+    print(op.retries)  # 3
 
 Unfortunately, during DST transitions, some datetimes don’t exist or are 
ambiguous.
 In such situations, pendulum raises an exception. That’s why you should always 
create aware
@@ -134,9 +132,7 @@ using ``pendulum``.
 
     local_tz = pendulum.timezone("Europe/Amsterdam")
 
-    default_args = dict(start_date=datetime(2016, 1, 1, tzinfo=local_tz), 
owner="airflow")
-
-    dag = DAG("my_tz_dag", default_args=default_args)
+    dag = DAG("my_tz_dag", start_date=datetime(2016, 1, 1, tzinfo=local_tz))
     op = DummyOperator(task_id="dummy", dag=dag)
     print(dag.timezone)  # <Timezone [Europe/Amsterdam]>
 
diff --git a/docs/apache-airflow/tutorial.rst b/docs/apache-airflow/tutorial.rst
index 7e27d54..babb8d6 100644
--- a/docs/apache-airflow/tutorial.rst
+++ b/docs/apache-airflow/tutorial.rst
@@ -77,6 +77,7 @@ of default parameters that we can use when creating tasks.
 
 .. exampleinclude:: /../../airflow/example_dags/tutorial.py
     :language: python
+    :dedent: 4
     :start-after: [START default_args]
     :end-before: [END default_args]
 

Reply via email to