SameerMesiah97 commented on code in PR #61949:
URL: https://github.com/apache/airflow/pull/61949#discussion_r2809742430


##########
providers/google/tests/unit/google/cloud/hooks/test_gcs.py:
##########
@@ -1843,3 +1846,88 @@ def get_logs_string(call_args_list):
         assert "GCS object size (15) and local file size (9) differ." in 
logs_string
         assert f"Downloading dag_03.py to {sync_local_dir}/dag_03.py" in 
logs_string
         self.gcs_hook.download.assert_called_once()
+
+
+class TestGCSDownloadDiskCheck:
+    @mock.patch(GCS_STRING.format("GoogleBaseHook.__init__"), 
new=mock_base_gcp_hook_default_project_id)
+    @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
+    @mock.patch("shutil.disk_usage")
+    def test_download_raises_error_if_not_enough_space(self, mock_disk_usage, 
mock_get_conn):
+        hook = gcs.GCSHook(gcp_conn_id="test")
+
+        # Mock GCS connection and blob
+        mock_client = mock.Mock()
+        mock_get_conn.return_value = mock_client
+        mock_bucket = mock.Mock()
+        mock_client.bucket.return_value = mock_bucket
+        mock_blob = mock.Mock()
+        mock_bucket.blob.return_value = mock_blob
+
+        # Blob size 1000 bytes
+        mock_blob.size = 1000
+
+        # Free space 500 bytes
+        # shutil.disk_usage returns a named tuple (total, used, free)
+        mock_disk_usage.return_value = mock.Mock(total=2000, used=1500, 
free=500)
+
+        with pytest.raises(AirflowException, match="Insufficient disk space"):
+            hook.download(bucket_name="bucket", object_name="object", 
filename="/tmp/file")
+
+        # Verify blob.reload() was called to get the size
+        mock_blob.reload.assert_called_once()

Review Comment:
   I would add assertions for `mock_disk_usage` and `download_to_filename` as 
well. Like this:
   
   `mock_disk_usage.assert_called_once()`
   `mock_blob.download_to_filename.assert_not_called()`



##########
providers/google/src/airflow/providers/google/cloud/hooks/gcs.py:
##########
@@ -347,6 +344,16 @@ def download(
                 blob = bucket.blob(blob_name=object_name, 
chunk_size=chunk_size)
 
                 if filename:
+                    blob.reload(timeout=timeout)
+                    if blob.size:

Review Comment:
   What about situations where blob.size is 0? Since 0 is falsy, won't this 
result in the disk size check being skipped? I understand that this will not 
have a functional impact, but we can be more explicit here. I think this would 
be more precise:
   
   `if blob.size is not None:`



##########
providers/google/tests/unit/google/cloud/hooks/test_gcs.py:
##########
@@ -1843,3 +1846,88 @@ def get_logs_string(call_args_list):
         assert "GCS object size (15) and local file size (9) differ." in 
logs_string
         assert f"Downloading dag_03.py to {sync_local_dir}/dag_03.py" in 
logs_string
         self.gcs_hook.download.assert_called_once()
+
+
+class TestGCSDownloadDiskCheck:
+    @mock.patch(GCS_STRING.format("GoogleBaseHook.__init__"), 
new=mock_base_gcp_hook_default_project_id)
+    @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
+    @mock.patch("shutil.disk_usage")
+    def test_download_raises_error_if_not_enough_space(self, mock_disk_usage, 
mock_get_conn):
+        hook = gcs.GCSHook(gcp_conn_id="test")
+
+        # Mock GCS connection and blob
+        mock_client = mock.Mock()
+        mock_get_conn.return_value = mock_client
+        mock_bucket = mock.Mock()
+        mock_client.bucket.return_value = mock_bucket
+        mock_blob = mock.Mock()
+        mock_bucket.blob.return_value = mock_blob
+
+        # Blob size 1000 bytes
+        mock_blob.size = 1000
+
+        # Free space 500 bytes
+        # shutil.disk_usage returns a named tuple (total, used, free)
+        mock_disk_usage.return_value = mock.Mock(total=2000, used=1500, 
free=500)
+
+        with pytest.raises(AirflowException, match="Insufficient disk space"):
+            hook.download(bucket_name="bucket", object_name="object", 
filename="/tmp/file")
+
+        # Verify blob.reload() was called to get the size
+        mock_blob.reload.assert_called_once()
+
+    @mock.patch(GCS_STRING.format("GoogleBaseHook.__init__"), 
new=mock_base_gcp_hook_default_project_id)
+    @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
+    @mock.patch("shutil.disk_usage")
+    def test_download_works_if_enough_space(self, mock_disk_usage, 
mock_get_conn):

Review Comment:
   I think this test should handle boundary conditions i.e. `blob.size == 
free_space` and optionally 0. You could parameterize it by setting 
`mock_blob.size` to 1000, 2000 and 0. 



##########
providers/google/tests/unit/google/cloud/hooks/test_gcs.py:
##########
@@ -1843,3 +1846,88 @@ def get_logs_string(call_args_list):
         assert "GCS object size (15) and local file size (9) differ." in 
logs_string
         assert f"Downloading dag_03.py to {sync_local_dir}/dag_03.py" in 
logs_string
         self.gcs_hook.download.assert_called_once()
+
+
+class TestGCSDownloadDiskCheck:
+    @mock.patch(GCS_STRING.format("GoogleBaseHook.__init__"), 
new=mock_base_gcp_hook_default_project_id)
+    @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
+    @mock.patch("shutil.disk_usage")
+    def test_download_raises_error_if_not_enough_space(self, mock_disk_usage, 
mock_get_conn):
+        hook = gcs.GCSHook(gcp_conn_id="test")
+
+        # Mock GCS connection and blob
+        mock_client = mock.Mock()
+        mock_get_conn.return_value = mock_client
+        mock_bucket = mock.Mock()
+        mock_client.bucket.return_value = mock_bucket
+        mock_blob = mock.Mock()
+        mock_bucket.blob.return_value = mock_blob
+
+        # Blob size 1000 bytes
+        mock_blob.size = 1000
+
+        # Free space 500 bytes
+        # shutil.disk_usage returns a named tuple (total, used, free)
+        mock_disk_usage.return_value = mock.Mock(total=2000, used=1500, 
free=500)
+
+        with pytest.raises(AirflowException, match="Insufficient disk space"):
+            hook.download(bucket_name="bucket", object_name="object", 
filename="/tmp/file")
+
+        # Verify blob.reload() was called to get the size
+        mock_blob.reload.assert_called_once()
+
+    @mock.patch(GCS_STRING.format("GoogleBaseHook.__init__"), 
new=mock_base_gcp_hook_default_project_id)
+    @mock.patch(GCS_STRING.format("GCSHook.get_conn"))
+    @mock.patch("shutil.disk_usage")
+    def test_download_works_if_enough_space(self, mock_disk_usage, 
mock_get_conn):
+        hook = gcs.GCSHook(gcp_conn_id="test")
+
+        # Mock GCS connection and blob
+        mock_client = mock.Mock()
+        mock_get_conn.return_value = mock_client
+        mock_bucket = mock.Mock()
+        mock_client.bucket.return_value = mock_bucket
+        mock_blob = mock.Mock()
+        mock_bucket.blob.return_value = mock_blob
+
+        # Blob size 1000 bytes
+        mock_blob.size = 1000
+        mock_blob.name = "test_blob"
+        mock_bucket.name = "test_bucket"
+
+        # Free space 2000 bytes
+        mock_disk_usage.return_value = mock.Mock(total=4000, used=2000, 
free=2000)
+
+        filename = "/tmp/file"
+        hook.download(bucket_name="bucket", object_name="object", 
filename=filename)
+
+        # Verify blob.reload() was called
+        mock_blob.reload.assert_called_once()
+        # Verify download was called
+        mock_blob.download_to_filename.assert_called_once_with(filename, 
timeout=mock.ANY)

Review Comment:
   Same here. See comment above.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to