shahar1 commented on code in PR #67218:
URL: https://github.com/apache/airflow/pull/67218#discussion_r3425831614


##########
providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py:
##########
@@ -45,7 +47,12 @@
     SchemaField,
     UnknownJob,
 )
-from google.cloud.bigquery.dataset import AccessEntry, Dataset, 
DatasetListItem, DatasetReference
+from google.cloud.bigquery.dataset import (
+    AccessEntry,
+    Dataset,
+    DatasetListItem,
+    DatasetReference,
+)

Review Comment:
   Please revert unrelated formatting or refactoring, it makes it hard to review



##########
providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py:
##########
@@ -229,7 +246,7 @@ def get_conn(self) -> BigQueryConnection:
             "v2",
             http=http_authorized,
             cache_discovery=False,
-            client_options=self.get_client_options(),
+            client_options=getattr(self, "get_client_options", lambda: None)(),

Review Comment:
   What is this for?



##########
providers/google/tests/unit/google/cloud/hooks/test_bigquery.py:
##########
@@ -2313,3 +2313,262 @@ def test_insert_job_hook_lineage(self, mock_client, 
mock_query_job, mock_send_li
         )
 
         mock_send_lineage.assert_called_once_with(context=self.hook, 
job=mock_job_instance)
+
+
[email protected]_test
+class TestBigQueryHookProxy:
+    """Tests that HTTP/HTTPS proxy settings are propagated through _authorize, 
get_client, and _get_pandas_df."""
+
+    def _make_hook(self, http_proxy=None, https_proxy=None):
+        class MockedBigQueryHook(BigQueryHook):
+            def get_credentials_and_project_id(self):
+                return CREDENTIALS, PROJECT_ID
+
+            def get_credentials(self):
+                return mock.MagicMock(name="credentials")
+
+        return MockedBigQueryHook(http_proxy=http_proxy, 
https_proxy=https_proxy)
+
+    # --- __init__ ---
+
+    def test_http_proxy_stored_from_constructor(self):
+        hook = self._make_hook(http_proxy="http://proxy.example.com:3128";)
+        assert hook.http_proxy == "http://proxy.example.com:3128";
+        assert hook.https_proxy is None
+
+    def test_https_proxy_stored_from_constructor(self):
+        hook = self._make_hook(https_proxy="https://proxy.example.com:3129";)
+        assert hook.http_proxy is None
+        assert hook.https_proxy == "https://proxy.example.com:3129";
+
+    def test_both_proxies_stored_from_constructor(self):
+        hook = self._make_hook(
+            http_proxy="http://proxy.example.com:3128";,
+            https_proxy="https://proxy.example.com:3129";,
+        )
+        assert hook.http_proxy == "http://proxy.example.com:3128";
+        assert hook.https_proxy == "https://proxy.example.com:3129";
+
+    def test_no_proxy_defaults_to_none(self):
+        hook = self._make_hook()
+        assert hook.http_proxy is None
+        assert hook.https_proxy is None
+
+    # --- _authorize ---
+
+    
@mock.patch("airflow.providers.google.common.hooks.base_google.GoogleBaseHook._authorize")
+    def test_authorize_without_proxy_delegates_to_base(self, 
mock_base_authorize):
+        hook = self._make_hook()
+        result = hook._authorize()
+        mock_base_authorize.assert_called_once()
+        assert result == mock_base_authorize.return_value
+
+    @mock.patch("httplib2.socks")
+    
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.google_auth_httplib2.AuthorizedHttp")
+    @mock.patch("googleapiclient.http.set_user_agent")
+    @mock.patch("httplib2.Http")
+    @mock.patch("httplib2.ProxyInfo")
+    def test_authorize_with_http_proxy_creates_proxy_info(
+        self, mock_proxy_info, mock_http, _mock_set_user_agent, 
mock_authorized_http, _mock_socks
+    ):
+        hook = self._make_hook(http_proxy="http://proxy.example.com:3128";)
+        result = hook._authorize()
+
+        mock_proxy_info.assert_called_once_with(
+            proxy_type=mock.ANY,
+            proxy_host="proxy.example.com",
+            proxy_port=3128,
+            proxy_user=None,
+            proxy_pass=None,
+        )
+        
mock_http.assert_called_once_with(proxy_info=mock_proxy_info.return_value)
+        mock_authorized_http.assert_called_once()
+        assert result == mock_authorized_http.return_value
+
+    @mock.patch("httplib2.socks")
+    
@mock.patch("airflow.providers.google.cloud.hooks.bigquery.google_auth_httplib2.AuthorizedHttp")
+    @mock.patch("googleapiclient.http.set_user_agent")
+    @mock.patch("httplib2.Http")
+    @mock.patch("httplib2.ProxyInfo")
+    def test_authorize_with_https_proxy_creates_proxy_info(

Review Comment:
   There are way too many unit tests - please reduce the amount of tests to the 
minimal necessary to reach 100% coverage (see 
https://github.com/apache/airflow/pull/68502 - if you merge from `master`, your 
AI agent will have the instructions to handle it).



##########
providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py:
##########
@@ -345,6 +399,9 @@ def _get_pandas_df(
         if dialect is None:
             dialect = "legacy" if self.use_legacy_sql else "standard"
 
+        if self.http_proxy or self.https_proxy:
+            return self.get_client().query(sql, 
timeout=10).to_dataframe(create_bqstorage_client=False)

Review Comment:
   This path drops `dialect` and `**kwargs` params, as well as hardcodes 
`timeout`.
   Why? 



##########
providers/google/src/airflow/providers/google/cloud/hooks/bigquery.py:
##########
@@ -240,20 +257,57 @@ def get_conn(self) -> BigQueryConnection:
             hook=self,
         )
 
+    def _authorize(self) -> google_auth_httplib2.AuthorizedHttp:
+        """Return an authorized HTTP object, optionally configured with a 
proxy."""
+        proxy_url = self.http_proxy or self.https_proxy
+        if not proxy_url:
+            return super()._authorize()
+
+        import httplib2
+        from googleapiclient.http import set_user_agent
+
+        from airflow import version

Review Comment:
   Imports should be extracted out of the method, unless there's a good reason 
for that (same goes for similar imports in other methods)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to