This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new e625e7fb642 feat: retrieve sftp file attrs onces instead multiple time 
(#44625)
e625e7fb642 is described below

commit e625e7fb6422cd7762d4e76d012606275a45f7cc
Author: Sebastian Daum <[email protected]>
AuthorDate: Fri Dec 6 08:17:13 2024 +0100

    feat: retrieve sftp file attrs onces instead multiple time (#44625)
---
 providers/src/airflow/providers/sftp/hooks/sftp.py | 22 +++++++++++++++-------
 providers/tests/sftp/hooks/test_sftp.py            |  8 +++++++-
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/providers/src/airflow/providers/sftp/hooks/sftp.py 
b/providers/src/airflow/providers/sftp/hooks/sftp.py
index 1a826cd645c..f4eb0fedace 100644
--- a/providers/src/airflow/providers/sftp/hooks/sftp.py
+++ b/providers/src/airflow/providers/sftp/hooks/sftp.py
@@ -168,6 +168,15 @@ class SFTPHook(SSHHook):
         files = sorted(conn.listdir(path))
         return files
 
+    def list_directory_with_attr(self, path: str) -> 
list[paramiko.SFTPAttributes]:
+        """
+        List files in a directory on the remote system including their 
SFTPAttributes.
+
+        :param path: full path to the remote directory to list
+        """
+        conn = self.get_conn()
+        return [file for file in conn.listdir_attr(path)]
+
     def mkdir(self, path: str, mode: int = 0o777) -> None:
         """
         Create a directory on the remote system.
@@ -344,10 +353,9 @@ class SFTPHook(SSHHook):
             (form: ``func(str)``)
         :param bool recurse: *Default: True* - should it recurse
         """
-        conn = self.get_conn()
-        for entry in self.list_directory(path):
-            pathname = os.path.join(path, entry)
-            mode = conn.stat(pathname).st_mode
+        for entry in self.list_directory_with_attr(path):
+            pathname = os.path.join(path, entry.filename)
+            mode = entry.st_mode
             if stat.S_ISDIR(mode):  # type: ignore
                 # It's a directory, call the dcallback function
                 dcallback(pathname)
@@ -423,9 +431,9 @@ class SFTPHook(SSHHook):
         :return: list of string containing the found files, or an empty list 
if none matched
         """
         matched_files = []
-        for file in self.list_directory(path):
-            if fnmatch(file, fnmatch_pattern):
-                matched_files.append(file)
+        for file in self.list_directory_with_attr(path):
+            if fnmatch(file.filename, fnmatch_pattern):
+                matched_files.append(file.filename)
 
         return matched_files
 
diff --git a/providers/tests/sftp/hooks/test_sftp.py 
b/providers/tests/sftp/hooks/test_sftp.py
index 5f2c34a8cc0..b9c1c33683c 100644
--- a/providers/tests/sftp/hooks/test_sftp.py
+++ b/providers/tests/sftp/hooks/test_sftp.py
@@ -117,6 +117,12 @@ class TestSFTPHook:
         output = self.hook.list_directory(path=os.path.join(self.temp_dir, 
TMP_DIR_FOR_TESTS))
         assert output == [SUB_DIR, FIFO_FOR_TESTS]
 
+    def test_list_directory_with_attr(self):
+        output = 
self.hook.list_directory_with_attr(path=os.path.join(self.temp_dir, 
TMP_DIR_FOR_TESTS))
+        file_names = [f.filename for f in output]
+        assert all(isinstance(f, paramiko.SFTPAttributes) for f in output)
+        assert sorted(file_names) == [SUB_DIR, FIFO_FOR_TESTS]
+
     def test_mkdir(self):
         new_dir_name = "mk_dir"
         self.hook.mkdir(os.path.join(self.temp_dir, TMP_DIR_FOR_TESTS, 
new_dir_name))
@@ -457,7 +463,7 @@ class TestSFTPHook:
 
     def test_get_all_matched_files(self):
         output = self.hook.get_files_by_pattern(self.temp_dir, "test_*.txt")
-        assert output == [TMP_FILE_FOR_TESTS, ANOTHER_FILE_FOR_TESTS]
+        assert sorted(output) == [TMP_FILE_FOR_TESTS, ANOTHER_FILE_FOR_TESTS]
 
     def test_get_matched_files_with_different_pattern(self):
         output = self.hook.get_files_by_pattern(self.temp_dir, "*_file_*.txt")

Reply via email to