This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f9f923d64 [Fix #41763]: Redundant forward slash in SFTPToGCSOperator 
when destination_path is not specified or have default value (#41928)
7f9f923d64 is described below

commit 7f9f923d6469760d298a2a0525a1cb098eb1cd99
Author: Mayuresh Kedari <[email protected]>
AuthorDate: Mon Sep 2 19:46:54 2024 +0530

    [Fix #41763]: Redundant forward slash in SFTPToGCSOperator when 
destination_path is not specified or have default value (#41928)
    
    * fix the redundant forward issue if the has
    
    * Add a test for fix
    
    * respect airflow standards linting
    
    * update destination path to destination_path in comment for clarity
    
    ---------
    
    Co-authored-by: Mayuresh Kedari <[email protected]>
---
 .../google/cloud/transfers/sftp_to_gcs.py          |  6 +++
 .../google/cloud/transfers/test_sftp_to_gcs.py     | 49 ++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py 
b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
index 12d9f056a0..57af1fdebe 100644
--- a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
+++ b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
@@ -133,6 +133,12 @@ class SFTPToGCSOperator(BaseOperator):
 
             for file in files:
                 destination_path = file.replace(base_path, 
self.destination_path, 1)
+                # See issue: https://github.com/apache/airflow/issues/41763
+                # If the destination_path is not specified, it defaults to an 
empty string. As a result,
+                # replacing base_path with an empty string is ineffective, 
causing the destination_path to
+                # retain the "/" prefix, if it has.
+                if not self.destination_path:
+                    destination_path = destination_path.lstrip("/")
                 self._copy_single_object(gcs_hook, sftp_hook, file, 
destination_path)
 
         else:
diff --git a/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py 
b/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
index b6790c70f9..7755ef0f02 100644
--- a/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
+++ b/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
@@ -35,6 +35,7 @@ DEFAULT_MIME_TYPE = "application/octet-stream"
 
 TEST_BUCKET = "test-bucket"
 SOURCE_OBJECT_WILDCARD_FILENAME = "main_dir/test_object*.json"
+SOURCE_OBJECT_WILDCARD_TXT_FILENAME = "main_dir/test_object*.txt"
 SOURCE_OBJECT_NO_WILDCARD = "main_dir/test_object3.json"
 SOURCE_OBJECT_MULTIPLE_WILDCARDS = "main_dir/csv/*/test_*.csv"
 
@@ -252,3 +253,51 @@ class TestSFTPToGCSOperator:
 
         err = ctx.value
         assert "Only one wildcard '*' is allowed in source_path parameter" in 
str(err)
+
+    @mock.patch("airflow.providers.google.cloud.transfers.sftp_to_gcs.GCSHook")
+    
@mock.patch("airflow.providers.google.cloud.transfers.sftp_to_gcs.SFTPHook")
+    def test_execute_copy_with_wildcard_and_default_destination_path(self, 
sftp_hook, gcs_hook):
+        sftp_hook.return_value.get_tree_map.return_value = [
+            ["main_dir/test_object1.txt", "main_dir/test_object2.txt"],
+            [],
+            [],
+        ]
+
+        task = SFTPToGCSOperator(
+            task_id=TASK_ID,
+            source_path=SOURCE_OBJECT_WILDCARD_TXT_FILENAME,
+            destination_bucket=TEST_BUCKET,
+            gcp_conn_id=GCP_CONN_ID,
+            sftp_conn_id=SFTP_CONN_ID,
+        )
+        task.execute(None)
+
+        sftp_hook.return_value.get_tree_map.assert_called_with(
+            "main_dir", prefix="main_dir/test_object", delimiter=".txt"
+        )
+
+        sftp_hook.return_value.retrieve_file.assert_has_calls(
+            [
+                mock.call("main_dir/test_object1.txt", mock.ANY, 
prefetch=True),
+                mock.call("main_dir/test_object2.txt", mock.ANY, 
prefetch=True),
+            ]
+        )
+
+        gcs_hook.return_value.upload.assert_has_calls(
+            [
+                mock.call(
+                    bucket_name=TEST_BUCKET,
+                    object_name="test_object1.txt",
+                    mime_type=DEFAULT_MIME_TYPE,
+                    filename=mock.ANY,
+                    gzip=False,
+                ),
+                mock.call(
+                    bucket_name=TEST_BUCKET,
+                    object_name="test_object2.txt",
+                    mime_type=DEFAULT_MIME_TYPE,
+                    filename=mock.ANY,
+                    gzip=False,
+                ),
+            ]
+        )

Reply via email to