This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 7f9f923d64 [Fix #41763]: Redundant forward slash in SFTPToGCSOperator
when destination_path is not specified or have default value (#41928)
7f9f923d64 is described below
commit 7f9f923d6469760d298a2a0525a1cb098eb1cd99
Author: Mayuresh Kedari <[email protected]>
AuthorDate: Mon Sep 2 19:46:54 2024 +0530
[Fix #41763]: Redundant forward slash in SFTPToGCSOperator when
destination_path is not specified or have default value (#41928)
* fix the redundant forward issue if the has
* Add a test for fix
* respect airflow standards linting
* update destination path to destination_path in comment for clarity
---------
Co-authored-by: Mayuresh Kedari <[email protected]>
---
.../google/cloud/transfers/sftp_to_gcs.py | 6 +++
.../google/cloud/transfers/test_sftp_to_gcs.py | 49 ++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
index 12d9f056a0..57af1fdebe 100644
--- a/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
+++ b/airflow/providers/google/cloud/transfers/sftp_to_gcs.py
@@ -133,6 +133,12 @@ class SFTPToGCSOperator(BaseOperator):
for file in files:
destination_path = file.replace(base_path,
self.destination_path, 1)
+ # See issue: https://github.com/apache/airflow/issues/41763
+ # If the destination_path is not specified, it defaults to an
empty string. As a result,
+ # replacing base_path with an empty string is ineffective,
causing the destination_path to
+ # retain the "/" prefix, if it has.
+ if not self.destination_path:
+ destination_path = destination_path.lstrip("/")
self._copy_single_object(gcs_hook, sftp_hook, file,
destination_path)
else:
diff --git a/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
b/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
index b6790c70f9..7755ef0f02 100644
--- a/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
+++ b/tests/providers/google/cloud/transfers/test_sftp_to_gcs.py
@@ -35,6 +35,7 @@ DEFAULT_MIME_TYPE = "application/octet-stream"
TEST_BUCKET = "test-bucket"
SOURCE_OBJECT_WILDCARD_FILENAME = "main_dir/test_object*.json"
+SOURCE_OBJECT_WILDCARD_TXT_FILENAME = "main_dir/test_object*.txt"
SOURCE_OBJECT_NO_WILDCARD = "main_dir/test_object3.json"
SOURCE_OBJECT_MULTIPLE_WILDCARDS = "main_dir/csv/*/test_*.csv"
@@ -252,3 +253,51 @@ class TestSFTPToGCSOperator:
err = ctx.value
assert "Only one wildcard '*' is allowed in source_path parameter" in
str(err)
+
+ @mock.patch("airflow.providers.google.cloud.transfers.sftp_to_gcs.GCSHook")
+
@mock.patch("airflow.providers.google.cloud.transfers.sftp_to_gcs.SFTPHook")
+ def test_execute_copy_with_wildcard_and_default_destination_path(self,
sftp_hook, gcs_hook):
+ sftp_hook.return_value.get_tree_map.return_value = [
+ ["main_dir/test_object1.txt", "main_dir/test_object2.txt"],
+ [],
+ [],
+ ]
+
+ task = SFTPToGCSOperator(
+ task_id=TASK_ID,
+ source_path=SOURCE_OBJECT_WILDCARD_TXT_FILENAME,
+ destination_bucket=TEST_BUCKET,
+ gcp_conn_id=GCP_CONN_ID,
+ sftp_conn_id=SFTP_CONN_ID,
+ )
+ task.execute(None)
+
+ sftp_hook.return_value.get_tree_map.assert_called_with(
+ "main_dir", prefix="main_dir/test_object", delimiter=".txt"
+ )
+
+ sftp_hook.return_value.retrieve_file.assert_has_calls(
+ [
+ mock.call("main_dir/test_object1.txt", mock.ANY,
prefetch=True),
+ mock.call("main_dir/test_object2.txt", mock.ANY,
prefetch=True),
+ ]
+ )
+
+ gcs_hook.return_value.upload.assert_has_calls(
+ [
+ mock.call(
+ bucket_name=TEST_BUCKET,
+ object_name="test_object1.txt",
+ mime_type=DEFAULT_MIME_TYPE,
+ filename=mock.ANY,
+ gzip=False,
+ ),
+ mock.call(
+ bucket_name=TEST_BUCKET,
+ object_name="test_object2.txt",
+ mime_type=DEFAULT_MIME_TYPE,
+ filename=mock.ANY,
+ gzip=False,
+ ),
+ ]
+ )