This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new c0bce9b2219 Fix Google provider to handle False boolean values in
connection extras (#58348)
c0bce9b2219 is described below
commit c0bce9b221991cb45731e2e720e463fcf928cfa9
Author: Ashir Alam <[email protected]>
AuthorDate: Sun Nov 16 17:12:15 2025 -0500
Fix Google provider to handle False boolean values in connection extras
(#58348)
* Fix Google provider to handle False boolean values in connection extras
Fixed a bug where boolean fields set to False in Google provider
connection extras were incorrectly returning default values instead.
The issue occurred in two places:
1. get_field() function used 'or None' which treated False as falsy
2. _get_field() method used compound boolean logic that returned
the default value when the actual value was False
This particularly affected BigQueryHook where use_legacy_sql=False
in connection extras would incorrectly return True.
Changes:
- Modified get_field() to only treat empty strings as None
- Modified _get_field() to explicitly check 'is not None'
- Added tests for False/falsy value handling
Fixes #47053
* Fix test import paths for Airflow 3.x compatibility
Update mock paths from 'airflow.hooks.base.BaseHook' to
'airflow.providers.common.compat.sdk.BaseHook' to match
the new import structure in Airflow 3.x.
* Fix Google provider to respect False boolean values in connection extras
Use sentinel pattern to distinguish unset parameters from explicitly False
values, allowing both connection extras and parameter overrides to work
correctly.
* Fix mypy no-redef errors by removing duplicate type annotations
* Fix import order for static checks
* Trigger CI re-run
---
.../providers/google/cloud/hooks/bigquery.py | 47 +++++++++++++++++-----
.../providers/google/common/hooks/base_google.py | 26 ++++++++++--
.../tests/unit/google/cloud/hooks/test_bigquery.py | 16 ++++++++
.../unit/google/common/hooks/test_base_google.py | 27 +++++++++++++
4 files changed, 103 insertions(+), 13 deletions(-)
diff --git
a/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py
b/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py
index ebdbbd47146..b8d2451bc8f 100644
--- a/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py
+++ b/providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py
@@ -71,6 +71,7 @@ from
airflow.providers.google.cloud.utils.credentials_provider import _get_scope
from airflow.providers.google.common.consts import CLIENT_INFO
from airflow.providers.google.common.deprecated import deprecated
from airflow.providers.google.common.hooks.base_google import (
+ _UNSET,
PROVIDE_PROJECT_ID,
GoogleBaseAsyncHook,
GoogleBaseHook,
@@ -160,21 +161,47 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
def __init__(
self,
- use_legacy_sql: bool = True,
- location: str | None = None,
- priority: str = "INTERACTIVE",
- api_resource_configs: dict | None = None,
+ use_legacy_sql: bool | object = _UNSET,
+ location: str | None | object = _UNSET,
+ priority: str | object = _UNSET,
+ api_resource_configs: dict | None | object = _UNSET,
impersonation_scopes: str | Sequence[str] | None = None,
- labels: dict | None = None,
+ labels: dict | None | object = _UNSET,
**kwargs,
) -> None:
super().__init__(**kwargs)
- self.use_legacy_sql: bool = self._get_field("use_legacy_sql",
use_legacy_sql)
- self.location: str | None = self._get_field("location", location)
- self.priority: str = self._get_field("priority", priority)
+ # Use sentinel pattern to distinguish "not provided" from "explicitly
provided"
+ if use_legacy_sql is _UNSET:
+ value = self._get_field("use_legacy_sql", _UNSET)
+ self.use_legacy_sql: bool = value if value is not None else True
+ else:
+ self.use_legacy_sql = use_legacy_sql # type: ignore[assignment]
+
+ if location is _UNSET:
+ self.location: str | None = self._get_field("location", _UNSET)
+ else:
+ self.location = location # type: ignore[assignment]
+
+ if priority is _UNSET:
+ value = self._get_field("priority", _UNSET)
+ self.priority: str = value if value is not None else "INTERACTIVE"
+ else:
+ self.priority = priority # type: ignore[assignment]
+
self.running_job_id: str | None = None
- self.api_resource_configs: dict =
self._get_field("api_resource_configs", api_resource_configs or {})
- self.labels = self._get_field("labels", labels or {})
+
+ if api_resource_configs is _UNSET:
+ value = self._get_field("api_resource_configs", _UNSET)
+ self.api_resource_configs: dict = value if value is not None else
{}
+ else:
+ self.api_resource_configs = api_resource_configs or {} # type:
ignore[assignment]
+
+ if labels is _UNSET:
+ value = self._get_field("labels", _UNSET)
+ self.labels = value if value is not None else {}
+ else:
+ self.labels = labels or {} # type: ignore[assignment]
+
self.impersonation_scopes: str | Sequence[str] | None =
impersonation_scopes
def get_conn(self) -> BigQueryConnection:
diff --git
a/providers/google/src/airflow/providers/google/common/hooks/base_google.py
b/providers/google/src/airflow/providers/google/common/hooks/base_google.py
index a3abecb848b..0a4e9017de1 100644
--- a/providers/google/src/airflow/providers/google/common/hooks/base_google.py
+++ b/providers/google/src/airflow/providers/google/common/hooks/base_google.py
@@ -154,6 +154,9 @@ PROVIDE_PROJECT_ID: str = cast("str", None)
T = TypeVar("T", bound=Callable)
RT = TypeVar("RT")
+# Sentinel value to distinguish "parameter not provided" from "parameter
explicitly set to a value"
+_UNSET = object()
+
def get_field(extras: dict, field_name: str) -> str | None:
"""Get field from extra, first checking short name, then for backcompat we
check for prefixed name."""
@@ -163,9 +166,11 @@ def get_field(extras: dict, field_name: str) -> str | None:
"when using this method."
)
if field_name in extras:
- return extras[field_name] or None
+ value = extras[field_name]
+ return None if value == "" else value
prefixed_name = f"extra__google_cloud_platform__{field_name}"
- return extras.get(prefixed_name) or None
+ value = extras.get(prefixed_name)
+ return None if value == "" else value
class GoogleBaseHook(BaseHook):
@@ -405,7 +410,22 @@ class GoogleBaseHook(BaseHook):
custom UI elements to the hook page, which allow admins to specify
service_account, key_path, etc. They get formatted as shown below.
"""
- return hasattr(self, "extras") and get_field(self.extras, f) or default
+ # New behavior: If default is _UNSET, parameter was not provided
+ # Check connection extras first, return None if not found (caller
handles default)
+ if default is _UNSET:
+ if hasattr(self, "extras"):
+ value = get_field(self.extras, f)
+ if value is not None:
+ return value
+ return None
+
+ # Old behavior (for backward compatibility):
+ # Check connection extras, but properly handle False values
+ if hasattr(self, "extras"):
+ value = get_field(self.extras, f)
+ if value is not None:
+ return value
+ return default
@property
def project_id(self) -> str:
diff --git a/providers/google/tests/unit/google/cloud/hooks/test_bigquery.py
b/providers/google/tests/unit/google/cloud/hooks/test_bigquery.py
index 307ac13d40a..2ad9c025044 100644
--- a/providers/google/tests/unit/google/cloud/hooks/test_bigquery.py
+++ b/providers/google/tests/unit/google/cloud/hooks/test_bigquery.py
@@ -1420,6 +1420,22 @@ class TestBigQueryHookLegacySql(_BigQueryBaseTestClass):
_, kwargs = mock_insert.call_args
assert kwargs["configuration"]["query"]["useLegacySql"] is False
+ @mock.patch("airflow.providers.common.compat.sdk.BaseHook.get_connection")
+ @mock.patch(
+
"airflow.providers.google.common.hooks.base_google.GoogleBaseHook.get_credentials_and_project_id",
+ return_value=(CREDENTIALS, PROJECT_ID),
+ )
+ def test_use_legacy_sql_from_connection_extra_false(
+ self, mock_get_creds_and_proj_id, mock_get_connection
+ ):
+ """Test that use_legacy_sql=False in connection extras is respected."""
+ mock_connection = mock.MagicMock()
+ mock_connection.extra_dejson = {"use_legacy_sql": False}
+ mock_get_connection.return_value = mock_connection
+
+ bq_hook = BigQueryHook(gcp_conn_id="test_conn")
+ assert bq_hook.use_legacy_sql is False
+
@pytest.mark.db_test
class TestBigQueryWithKMS(_BigQueryBaseTestClass):
diff --git
a/providers/google/tests/unit/google/common/hooks/test_base_google.py
b/providers/google/tests/unit/google/common/hooks/test_base_google.py
index 7a0a4083320..72fa3992c3d 100644
--- a/providers/google/tests/unit/google/common/hooks/test_base_google.py
+++ b/providers/google/tests/unit/google/common/hooks/test_base_google.py
@@ -1037,3 +1037,30 @@ class TestGoogleBaseAsyncHook:
token = await instance.get_token()
assert await token.get_project() == "CONN_PROJECT_ID"
assert await token.get() == "IMPERSONATED_ACCESS_TOKEN"
+
+
+class TestGetFieldWithFalseValues:
+ """Test get_field function and _get_field method handle False and other
falsy values correctly."""
+
+ def test_get_field_returns_false_not_none(self):
+ """Test that get_field correctly returns False instead of None."""
+ extras = {"use_legacy_sql": False}
+ result = hook.get_field(extras, "use_legacy_sql")
+ assert result is False
+
+ def test_get_field_returns_false_with_prefixed_name(self):
+ """Test that get_field correctly returns False with prefixed field
name."""
+ extras = {"extra__google_cloud_platform__use_legacy_sql": False}
+ result = hook.get_field(extras, "use_legacy_sql")
+ assert result is False
+
+ @mock.patch("airflow.providers.common.compat.sdk.BaseHook.get_connection")
+ def test_hook_get_field_returns_false_not_default(self,
mock_get_connection):
+ """Test that _get_field correctly returns False instead of default
value."""
+ mock_connection = mock.MagicMock()
+ mock_connection.extra_dejson = {"use_legacy_sql": False}
+ mock_get_connection.return_value = mock_connection
+
+ hook_instance = hook.GoogleBaseHook(gcp_conn_id="test_conn")
+ result = hook_instance._get_field("use_legacy_sql", default=True)
+ assert result is False