This is an automated email from the ASF dual-hosted git repository.
bolke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new f0ba2dced9 Move `duckdb` & `pandas` import in tutorial DAG into task
(#35964)
f0ba2dced9 is described below
commit f0ba2dced92c767367aaf0fa3147942b4a576f92
Author: Ephraim Anierobi <[email protected]>
AuthorDate: Thu Nov 30 07:46:35 2023 +0100
Move `duckdb` & `pandas` import in tutorial DAG into task (#35964)
This improves the code as per best practices and avoids import
error if duckdb is not installed
---
airflow/example_dags/tutorial_objectstorage.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/airflow/example_dags/tutorial_objectstorage.py
b/airflow/example_dags/tutorial_objectstorage.py
index 47db595c24..11d817400d 100644
--- a/airflow/example_dags/tutorial_objectstorage.py
+++ b/airflow/example_dags/tutorial_objectstorage.py
@@ -47,7 +47,6 @@ base = ObjectStoragePath("s3://airflow-tutorial-data/",
conn_id="aws_default")
# [END create_object_storage_path]
-# [START instantiate_dag]
@dag(
schedule=None,
start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
@@ -62,9 +61,6 @@ def tutorial_objectstorage():
located
[here](https://airflow.apache.org/docs/apache-airflow/stable/tutorial/objectstorage.html)
"""
- # [END instantiate_dag]
- import duckdb
- import pandas as pd
# [START get_air_quality_data]
@task
@@ -74,6 +70,8 @@ def tutorial_objectstorage():
This task gets air quality data from the Finnish Meteorological
Institute's
open data API. The data is saved as parquet.
"""
+ import pandas as pd
+
execution_date = kwargs["logical_date"]
start_time = kwargs["data_interval_start"]
@@ -113,6 +111,8 @@ def tutorial_objectstorage():
#### Analyze
This task analyzes the air quality data, prints the results
"""
+ import duckdb
+
conn = duckdb.connect(database=":memory:")
conn.register_filesystem(path.fs)
conn.execute(f"CREATE OR REPLACE TABLE airquality_urban AS SELECT *
FROM read_parquet('{path}')")