This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new c6ca8a276d1 test: migrate `get_pandas_df` to `get_df` in `provider`
test (#49339)
c6ca8a276d1 is described below
commit c6ca8a276d14c542420d0e990cef740443aedeb6
Author: Guan Ming(Wesley) Chiu <[email protected]>
AuthorDate: Mon Apr 28 19:03:06 2025 +0800
test: migrate `get_pandas_df` to `get_df` in `provider` test (#49339)
* test: migrate get_pandas_df to get_df in provider test
Co-authored-by: Elad Kalif <[email protected]>
Co-authored-by: Jarek Potiuk <[email protected]>
* chore: add polars in deps
* fix: install polars in ci
---------
Co-authored-by: Elad Kalif <[email protected]>
Co-authored-by: Jarek Potiuk <[email protected]>
Co-authored-by: Wesley Chiu <[email protected]>
---
airflow-core/docs/extra-packages-ref.rst | 2 ++
airflow-core/pyproject.toml | 4 ++--
providers/apache/drill/README.rst | 2 +-
providers/apache/drill/pyproject.toml | 4 ++--
.../tests/unit/apache/drill/hooks/test_drill.py | 20 ++++++++++++++++++--
providers/apache/druid/README.rst | 2 +-
providers/apache/druid/pyproject.toml | 3 ++-
.../tests/unit/apache/druid/hooks/test_druid.py | 18 ++++++++++++++++--
providers/apache/impala/README.rst | 2 +-
providers/apache/impala/pyproject.toml | 4 ++--
.../tests/unit/apache/impala/hooks/test_impala.py | 19 +++++++++++++++++--
providers/apache/pinot/README.rst | 2 +-
providers/apache/pinot/pyproject.toml | 4 ++--
.../tests/unit/apache/pinot/hooks/test_pinot.py | 17 +++++++++++++++--
providers/elasticsearch/README.rst | 2 +-
providers/elasticsearch/pyproject.toml | 4 ++--
.../unit/elasticsearch/hooks/test_elasticsearch.py | 4 ++--
providers/sqlite/README.rst | 2 +-
providers/sqlite/pyproject.toml | 4 ++--
.../sqlite/tests/unit/sqlite/hooks/test_sqlite.py | 20 ++++++++++++++++++--
providers/vertica/README.rst | 2 +-
providers/vertica/pyproject.toml | 4 ++--
.../vertica/tests/unit/vertica/hooks/test_vertica.py | 20 ++++++++++++++++++--
pyproject.toml | 12 ++++++------
.../in_container/install_airflow_and_providers.py | 10 +++++++++-
25 files changed, 144 insertions(+), 43 deletions(-)
diff --git a/airflow-core/docs/extra-packages-ref.rst
b/airflow-core/docs/extra-packages-ref.rst
index 31d15dd3249..50ddc38465a 100644
--- a/airflow-core/docs/extra-packages-ref.rst
+++ b/airflow-core/docs/extra-packages-ref.rst
@@ -106,6 +106,8 @@ python dependencies for the provided package.
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
| pandas | ``pip install 'apache-airflow[pandas]'`` |
Install Pandas library compatible with Airflow |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
+| polars | ``pip install 'apache-airflow[polars]'`` |
Polars hooks and operators |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
| rabbitmq | ``pip install 'apache-airflow[rabbitmq]'`` |
RabbitMQ support as a Celery backend |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
| sentry | ``pip install 'apache-airflow[sentry]'`` |
Sentry service for application logging and monitoring |
diff --git a/airflow-core/pyproject.toml b/airflow-core/pyproject.toml
index 9cf394be4ab..ae31dad94c9 100644
--- a/airflow-core/pyproject.toml
+++ b/airflow-core/pyproject.toml
@@ -139,7 +139,7 @@ dependencies = [
# pre-installed providers
"apache-airflow-providers-common-compat>=1.6.0",
"apache-airflow-providers-common-io>=1.5.3",
- "apache-airflow-providers-common-sql>=1.25.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"apache-airflow-providers-smtp>=2.0.2",
"apache-airflow-providers-standard>=0.4.0",
]
@@ -238,7 +238,7 @@ dev = [
"apache-airflow-task-sdk",
# TODO(potiuk): eventually we do not want any providers nor apache-airflow
extras to be needed for
# airflow-core tests
- "apache-airflow[pandas]",
+ "apache-airflow[pandas,polars]",
"apache-airflow-providers-amazon",
"apache-airflow-providers-celery",
"apache-airflow-providers-cncf-kubernetes",
diff --git a/providers/apache/drill/README.rst
b/providers/apache/drill/README.rst
index 52c3a328046..0b4521fb374 100644
--- a/providers/apache/drill/README.rst
+++ b/providers/apache/drill/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ===========================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``sqlalchemy-drill`` ``>=1.1.0,!=1.1.6,!=1.1.7``
======================================= ===========================
diff --git a/providers/apache/drill/pyproject.toml
b/providers/apache/drill/pyproject.toml
index ce6baba5827..97a81f5cf4c 100644
--- a/providers/apache/drill/pyproject.toml
+++ b/providers/apache/drill/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
# Workaround until we get
https://github.com/JohnOmernik/sqlalchemy-drill/issues/94 fixed.
"sqlalchemy-drill>=1.1.0,!=1.1.6,!=1.1.7",
]
@@ -70,7 +70,7 @@ dev = [
"apache-airflow-devel-common",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
- "apache-airflow-providers-common-sql[pandas]",
+ "apache-airflow-providers-common-sql[pandas,polars]",
]
# To build docs:
diff --git a/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py
b/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py
index 0e7f9c69941..920be37a963 100644
--- a/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py
+++ b/providers/apache/drill/tests/unit/apache/drill/hooks/test_drill.py
@@ -90,13 +90,13 @@ class TestDrillHook:
assert self.cur.close.call_count == 1
self.cur.execute.assert_called_once_with(statement)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
self.cur.description = [(column,)]
self.cur.fetchall.return_value = result_sets
- df = self.db_hook().get_pandas_df(statement)
+ df = self.db_hook().get_df(statement, df_type="pandas")
assert column == df.columns[0]
for i, item in enumerate(result_sets):
@@ -104,3 +104,19 @@ class TestDrillHook:
assert self.conn.close.call_count == 1
assert self.cur.close.call_count == 1
self.cur.execute.assert_called_once_with(statement)
+
+ def test_get_df_polars(self):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None,
None)]
+ mock_execute.fetchall.return_value = result_sets
+ self.cur.execute.return_value = mock_execute
+ df = self.db_hook().get_df(statement, df_type="polars")
+
+ self.cur.execute.assert_called_once_with(statement)
+ mock_execute.fetchall.assert_called_once_with()
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
diff --git a/providers/apache/druid/README.rst
b/providers/apache/druid/README.rst
index 697b24ee8fe..d78dee2a909 100644
--- a/providers/apache/druid/README.rst
+++ b/providers/apache/druid/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``pydruid`` ``>=0.4.1``
======================================= ==================
diff --git a/providers/apache/druid/pyproject.toml
b/providers/apache/druid/pyproject.toml
index 00649ee86d0..57ae28e3e7a 100644
--- a/providers/apache/druid/pyproject.toml
+++ b/providers/apache/druid/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"pydruid>=0.4.1",
]
@@ -77,6 +77,7 @@ dev = [
"apache-airflow-providers-apache-hive",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
+ "apache-airflow-providers-common-sql[polars]",
]
# To build docs:
diff --git a/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py
b/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py
index b035aac3d76..b95a04d004d 100644
--- a/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py
+++ b/providers/apache/druid/tests/unit/apache/druid/hooks/test_druid.py
@@ -452,13 +452,13 @@ class TestDruidDbApiHook:
assert self.cur.close.call_count == 1
self.cur.execute.assert_called_once_with(statement)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
self.cur.description = [(column,)]
self.cur.fetchall.return_value = result_sets
- df = self.db_hook().get_pandas_df(statement)
+ df = self.db_hook().get_df(statement, df_type="pandas")
assert column == df.columns[0]
for i, item in enumerate(result_sets):
@@ -466,3 +466,17 @@ class TestDruidDbApiHook:
assert self.conn.close.call_count == 1
assert self.cur.close.call_count == 1
self.cur.execute.assert_called_once_with(statement)
+
+ def test_get_df_polars(self):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None,
None)]
+ mock_execute.fetchall.return_value = result_sets
+ self.cur.execute.return_value = mock_execute
+
+ df = self.db_hook().get_df(statement, df_type="polars")
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
diff --git a/providers/apache/impala/README.rst
b/providers/apache/impala/README.rst
index f06b505cadf..29b32a4a294 100644
--- a/providers/apache/impala/README.rst
+++ b/providers/apache/impala/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``impyla`` ``>=0.18.0,<1.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``apache-airflow`` ``>=2.10.0``
======================================= ==================
diff --git a/providers/apache/impala/pyproject.toml
b/providers/apache/impala/pyproject.toml
index ed4e3e4b21c..0d0cadf2083 100644
--- a/providers/apache/impala/pyproject.toml
+++ b/providers/apache/impala/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"impyla>=0.18.0,<1.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"apache-airflow>=2.10.0",
]
@@ -77,7 +77,7 @@ dev = [
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
"kerberos>=1.3.0",
- "apache-airflow-providers-common-sql[pandas]"
+ "apache-airflow-providers-common-sql[pandas,polars]"
]
# To build docs:
diff --git
a/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py
b/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py
index 01a360c8771..3283e309234 100644
--- a/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py
+++ b/providers/apache/impala/tests/unit/apache/impala/hooks/test_impala.py
@@ -107,13 +107,13 @@ def test_get_records(impala_hook_fixture):
impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.assert_called_once_with(statement)
-def test_get_pandas_df(impala_hook_fixture):
+def test_get_df(impala_hook_fixture):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
impala_hook_fixture.get_conn.return_value.cursor.return_value.description
= [(column,)]
impala_hook_fixture.get_conn.return_value.cursor.return_value.fetchall.return_value
= result_sets
- df = impala_hook_fixture.get_pandas_df(statement)
+ df = impala_hook_fixture.get_df(statement, df_type="pandas")
assert column == df.columns[0]
@@ -121,3 +121,18 @@ def test_get_pandas_df(impala_hook_fixture):
assert result_sets[1][0] == df.values.tolist()[1][0]
impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.assert_called_once_with(statement)
+
+
+def test_get_df_polars(impala_hook_fixture):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None, None)]
+ mock_execute.fetchall.return_value = result_sets
+
impala_hook_fixture.get_conn.return_value.cursor.return_value.execute.return_value
= mock_execute
+
+ df = impala_hook_fixture.get_df(statement, df_type="polars")
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
diff --git a/providers/apache/pinot/README.rst
b/providers/apache/pinot/README.rst
index e696711ac0e..31d7d733142 100644
--- a/providers/apache/pinot/README.rst
+++ b/providers/apache/pinot/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``pinotdb`` ``>=5.1.0``
======================================= ==================
diff --git a/providers/apache/pinot/pyproject.toml
b/providers/apache/pinot/pyproject.toml
index b7594282d98..1a794d53198 100644
--- a/providers/apache/pinot/pyproject.toml
+++ b/providers/apache/pinot/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"pinotdb>=5.1.0",
]
@@ -69,7 +69,7 @@ dev = [
"apache-airflow-devel-common",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
- "apache-airflow-providers-common-sql[pandas]"
+ "apache-airflow-providers-common-sql[pandas,polars]"
]
# To build docs:
diff --git a/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py
b/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py
index 8a433eace11..151379734da 100644
--- a/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py
+++ b/providers/apache/pinot/tests/unit/apache/pinot/hooks/test_pinot.py
@@ -266,17 +266,30 @@ class TestPinotDbApiHook:
self.cur.fetchone.return_value = result_sets[0]
assert result_sets[0] == self.db_hook().get_first(statement)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
self.cur.description = [(column,)]
self.cur.fetchall.return_value = result_sets
- df = self.db_hook().get_pandas_df(statement)
+ df = self.db_hook().get_df(statement, df_type="pandas")
assert column == df.columns[0]
for i, item in enumerate(result_sets):
assert item[0] == df.values.tolist()[i][0]
+ def test_get_df_polars(self):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = mock.MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None,
None)]
+ mock_execute.fetchall.return_value = result_sets
+ self.cur.execute.return_value = mock_execute
+ df = self.db_hook().get_df(statement, df_type="polars")
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
+
class TestPinotAdminHookWithAuth:
def setup_method(self):
diff --git a/providers/elasticsearch/README.rst
b/providers/elasticsearch/README.rst
index cd150ca7388..2d82dd50831 100644
--- a/providers/elasticsearch/README.rst
+++ b/providers/elasticsearch/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``elasticsearch`` ``>=8.10,<9``
======================================= ==================
diff --git a/providers/elasticsearch/pyproject.toml
b/providers/elasticsearch/pyproject.toml
index 7b2560831f8..e2ed7865bdf 100644
--- a/providers/elasticsearch/pyproject.toml
+++ b/providers/elasticsearch/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"elasticsearch>=8.10,<9",
]
@@ -69,7 +69,7 @@ dev = [
"apache-airflow-devel-common",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
- "apache-airflow-providers-common-sql[pandas]",
+ "apache-airflow-providers-common-sql[pandas,polars]",
]
# To build docs:
diff --git
a/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py
b/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py
index b8f8fe25bb6..9848f6ae8d7 100644
---
a/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py
+++
b/providers/elasticsearch/tests/unit/elasticsearch/hooks/test_elasticsearch.py
@@ -166,9 +166,9 @@ class TestElasticsearchSQLHook:
self.spy_agency.assert_spy_called(self.cur.close)
self.spy_agency.assert_spy_called(self.cur.execute)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SELECT * FROM hollywood.actors"
- df = self.db_hook.get_pandas_df(statement)
+ df = self.db_hook.get_df(statement, df_type="pandas")
assert list(df.columns) == ["index", "name", "firstname", "age"]
assert df.values.tolist() == ROWS
diff --git a/providers/sqlite/README.rst b/providers/sqlite/README.rst
index 4d21dc82f1d..3dbbd411ae9 100644
--- a/providers/sqlite/README.rst
+++ b/providers/sqlite/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
======================================= ==================
Cross provider package dependencies
diff --git a/providers/sqlite/pyproject.toml b/providers/sqlite/pyproject.toml
index 09c584d1d8a..00040211dd1 100644
--- a/providers/sqlite/pyproject.toml
+++ b/providers/sqlite/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
]
[dependency-groups]
@@ -68,7 +68,7 @@ dev = [
"apache-airflow-devel-common",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
- "apache-airflow-providers-common-sql[pandas]",
+ "apache-airflow-providers-common-sql[pandas,polars]",
]
# To build docs:
diff --git a/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py
b/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py
index 90238822015..5587bb5f4fd 100644
--- a/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py
+++ b/providers/sqlite/tests/unit/sqlite/hooks/test_sqlite.py
@@ -99,13 +99,13 @@ class TestSqliteHook:
self.cur.close.assert_called_once_with()
self.cur.execute.assert_called_once_with(statement)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
self.cur.description = [(column,)]
self.cur.fetchall.return_value = result_sets
- df = self.db_hook.get_pandas_df(statement)
+ df = self.db_hook.get_df(statement, df_type="pandas")
assert column == df.columns[0]
@@ -114,6 +114,22 @@ class TestSqliteHook:
self.cur.execute.assert_called_once_with(statement)
+ def test_get_df_polars(self):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = mock.MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None,
None)]
+ mock_execute.fetchall.return_value = result_sets
+ self.cur.execute.return_value = mock_execute
+ df = self.db_hook.get_df(statement, df_type="polars")
+
+ self.cur.execute.assert_called_once_with(statement)
+ mock_execute.fetchall.assert_called_once_with()
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
+
def test_run_log(self):
statement = "SQL"
self.db_hook.run(statement)
diff --git a/providers/vertica/README.rst b/providers/vertica/README.rst
index 7b0fb2f62b3..909966b0b8d 100644
--- a/providers/vertica/README.rst
+++ b/providers/vertica/README.rst
@@ -54,7 +54,7 @@ Requirements
PIP package Version required
======================================= ==================
``apache-airflow`` ``>=2.10.0``
-``apache-airflow-providers-common-sql`` ``>=1.20.0``
+``apache-airflow-providers-common-sql`` ``>=1.26.0``
``vertica-python`` ``>=0.6.0``
======================================= ==================
diff --git a/providers/vertica/pyproject.toml b/providers/vertica/pyproject.toml
index 1912b7ed907..df100c11ced 100644
--- a/providers/vertica/pyproject.toml
+++ b/providers/vertica/pyproject.toml
@@ -58,7 +58,7 @@ requires-python = "~=3.9"
# After you modify the dependencies, and rebuild your Breeze CI image with
``breeze ci-image build``
dependencies = [
"apache-airflow>=2.10.0",
- "apache-airflow-providers-common-sql>=1.20.0",
+ "apache-airflow-providers-common-sql>=1.26.0",
"vertica-python>=0.6.0",
]
@@ -69,7 +69,7 @@ dev = [
"apache-airflow-devel-common",
"apache-airflow-providers-common-sql",
# Additional devel dependencies (do not remove this line and add extra
development dependencies)
- "apache-airflow-providers-common-sql[pandas]",
+ "apache-airflow-providers-common-sql[pandas,polars]",
]
# To build docs:
diff --git a/providers/vertica/tests/unit/vertica/hooks/test_vertica.py
b/providers/vertica/tests/unit/vertica/hooks/test_vertica.py
index e5ff2538ebe..0063bf30534 100644
--- a/providers/vertica/tests/unit/vertica/hooks/test_vertica.py
+++ b/providers/vertica/tests/unit/vertica/hooks/test_vertica.py
@@ -169,15 +169,31 @@ class TestVerticaHook:
self.cur.close.assert_called_once_with()
self.cur.execute.assert_called_once_with(statement)
- def test_get_pandas_df(self):
+ def test_get_df_pandas(self):
statement = "SQL"
column = "col"
result_sets = [("row1",), ("row2",)]
self.cur.description = [(column,)]
self.cur.fetchall.return_value = result_sets
- df = self.db_hook.get_pandas_df(statement)
+ df = self.db_hook.get_df(statement, df_type="pandas")
assert column == df.columns[0]
assert result_sets[0][0] == df.values.tolist()[0][0]
assert result_sets[1][0] == df.values.tolist()[1][0]
+
+ def test_get_df_polars(self):
+ statement = "SQL"
+ column = "col"
+ result_sets = [("row1",), ("row2",)]
+ mock_execute = mock.MagicMock()
+ mock_execute.description = [(column, None, None, None, None, None,
None)]
+ mock_execute.fetchall.return_value = result_sets
+ self.cur.execute.return_value = mock_execute
+ df = self.db_hook.get_df(statement, df_type="polars")
+
+ self.cur.execute.assert_called_once_with(statement)
+ mock_execute.fetchall.assert_called_once_with()
+ assert column == df.columns[0]
+ assert result_sets[0][0] == df.row(0)[0]
+ assert result_sets[1][0] == df.row(1)[0]
diff --git a/pyproject.toml b/pyproject.toml
index a787b4e527b..74a0e5361f7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -380,7 +380,7 @@ packages = []
"apache-airflow-providers-zendesk>=4.9.0"
]
"all" = [
-
"apache-airflow[aiobotocore,apache-atlas,apache-webhdfs,async,cloudpickle,github-enterprise,google-auth,graphviz,kerberos,ldap,otel,pandas,rabbitmq,s3fs,sentry,statsd,uv]",
+
"apache-airflow[aiobotocore,apache-atlas,apache-webhdfs,async,cloudpickle,github-enterprise,google-auth,graphviz,kerberos,ldap,otel,pandas,polars,rabbitmq,s3fs,sentry,statsd,uv]",
"apache-airflow-core[all]",
"apache-airflow-providers-airbyte>=5.0.0",
"apache-airflow-providers-alibaba>=3.0.0",
@@ -508,10 +508,10 @@ packages = []
"python-ldap>=3.4.4",
]
"pandas" = [
- # In pandas 2.2 minimal version of the sqlalchemy is 2.0
- #
https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
- # However Airflow not fully supports it yet:
https://github.com/apache/airflow/issues/28723
- "pandas>=2.1.2,<2.3",
+ "apache-airflow-providers-common-sql[pandas]",
+]
+"polars" = [
+ "apache-airflow-providers-common-sql[polars]",
]
"rabbitmq" = [
"amqp>=5.2.0",
@@ -785,7 +785,7 @@ testing = ["dev", "providers.tests", "tests_common",
"tests", "system", "unit",
ban-relative-imports = "all"
# Ban certain modules from being imported at module level, instead requiring
# that they're imported lazily (e.g., within a function definition).
-banned-module-level-imports = ["numpy", "pandas"]
+banned-module-level-imports = ["numpy", "pandas", "polars"]
[tool.ruff.lint.flake8-tidy-imports.banned-api]
# Direct import from the airflow package modules and constraints
diff --git a/scripts/in_container/install_airflow_and_providers.py
b/scripts/in_container/install_airflow_and_providers.py
index ec1d6fbddb6..6a843647a9d 100755
--- a/scripts/in_container/install_airflow_and_providers.py
+++ b/scripts/in_container/install_airflow_and_providers.py
@@ -100,7 +100,15 @@ def find_provider_distributions(extension: str,
selected_providers: list[str]) -
for candidate in sorted(candidates):
console.print(f" {candidate.as_posix()}")
console.print()
- return [candidate.as_posix() for candidate in candidates]
+ result = []
+ for candidate in candidates:
+ # https://github.com/apache/airflow/pull/49339
+ path_str = candidate.as_posix()
+ if "apache_airflow_providers_common_sql" in path_str:
+ console.print(f"[bright_blue]Adding [polars] extra to common.sql
provider: {path_str}")
+ path_str += "[polars]"
+ result.append(path_str)
+ return result
def calculate_constraints_location(