This is an automated email from the ASF dual-hosted git repository.
eladkal pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 6c5b0776c0 Adding a parameter for exclusion of trashed files in
GoogleDriveHook (#25675)
6c5b0776c0 is described below
commit 6c5b0776c07c6317e6e2eea67964a07cdadf2394
Author: Alex Kruchkov <[email protected]>
AuthorDate: Sun Aug 14 10:22:30 2022 +0300
Adding a parameter for exclusion of trashed files in GoogleDriveHook
(#25675)
* Adding a parameter for exclusion of trashed files to GoogleDriveHook
---
airflow/providers/google/suite/hooks/drive.py | 22 +++++++++++++++++++---
tests/providers/google/suite/hooks/test_drive.py | 23 +++++++++++++++++++++--
2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/airflow/providers/google/suite/hooks/drive.py
b/airflow/providers/google/suite/hooks/drive.py
index 94390503aa..3d1fb3cc2a 100644
--- a/airflow/providers/google/suite/hooks/drive.py
+++ b/airflow/providers/google/suite/hooks/drive.py
@@ -128,31 +128,47 @@ class GoogleDriveHook(GoogleBaseHook):
request = service.files().get_media(fileId=file_id)
return request
- def exists(self, folder_id: str, file_name: str, drive_id: Optional[str] =
None):
+ def exists(
+ self, folder_id: str, file_name: str, drive_id: Optional[str] = None,
*, include_trashed: bool = True
+ ):
"""
Checks to see if a file exists within a Google Drive folder
:param folder_id: The id of the Google Drive folder in which the file
resides
:param file_name: The name of a file in Google Drive
:param drive_id: Optional. The id of the shared Google Drive in which
the file resides.
+ :param include_trashed: Whether to include objects in trash or not,
default True as in Google API.
+
:return: True if the file exists, False otherwise
:rtype: bool
"""
- return bool(self.get_file_id(folder_id=folder_id, file_name=file_name,
drive_id=drive_id))
+ return bool(
+ self.get_file_id(
+ folder_id=folder_id, file_name=file_name,
include_trashed=include_trashed, drive_id=drive_id
+ )
+ )
- def get_file_id(self, folder_id: str, file_name: str, drive_id:
Optional[str] = None):
+ def get_file_id(
+ self, folder_id: str, file_name: str, drive_id: Optional[str] = None,
*, include_trashed: bool = True
+ ):
"""
Returns the file id of a Google Drive file
:param folder_id: The id of the Google Drive folder in which the file
resides
:param file_name: The name of a file in Google Drive
:param drive_id: Optional. The id of the shared Google Drive in which
the file resides.
+ :param include_trashed: Whether to include objects in trash or not,
default True as in Google API.
+
:return: Google Drive file id if the file exists, otherwise None
:rtype: str if file exists else None
"""
query = f"name = '{file_name}'"
if folder_id:
query += f" and parents in '{folder_id}'"
+
+ if not include_trashed:
+ query += " and trashed=false"
+
service = self.get_conn()
if drive_id:
files = (
diff --git a/tests/providers/google/suite/hooks/test_drive.py
b/tests/providers/google/suite/hooks/test_drive.py
index a0ab2eb983..14e2cdb3ce 100644
--- a/tests/providers/google/suite/hooks/test_drive.py
+++ b/tests/providers/google/suite/hooks/test_drive.py
@@ -167,7 +167,9 @@ class TestGoogleDriveHook(unittest.TestCase):
file_name = "abc123.csv"
result_value = self.gdrive_hook.exists(folder_id=folder_id,
file_name=file_name, drive_id=drive_id)
- mock_method.assert_called_once_with(folder_id=folder_id,
file_name=file_name, drive_id=drive_id)
+ mock_method.assert_called_once_with(
+ folder_id=folder_id, file_name=file_name, drive_id=drive_id,
include_trashed=True
+ )
self.assertEqual(True, result_value)
@mock.patch("airflow.providers.google.suite.hooks.drive.GoogleDriveHook.get_file_id")
@@ -178,7 +180,24 @@ class TestGoogleDriveHook(unittest.TestCase):
file_name = "abc123.csv"
self.gdrive_hook.exists(folder_id=folder_id, file_name=file_name,
drive_id=drive_id)
- mock_method.assert_called_once_with(folder_id=folder_id,
file_name=file_name, drive_id=drive_id)
+ mock_method.assert_called_once_with(
+ folder_id=folder_id, file_name=file_name, drive_id=drive_id,
include_trashed=True
+ )
+
+
@mock.patch("airflow.providers.google.suite.hooks.drive.GoogleDriveHook.get_file_id")
+
@mock.patch("airflow.providers.google.suite.hooks.drive.GoogleDriveHook.get_conn")
+ def test_exists_when_trashed_is_false(self, mock_get_conn, mock_method):
+ folder_id = "abxy1z"
+ drive_id = "abc123"
+ file_name = "abc123.csv"
+ include_trashed = False
+
+ self.gdrive_hook.exists(
+ folder_id=folder_id, file_name=file_name, drive_id=drive_id,
include_trashed=include_trashed
+ )
+ mock_method.assert_called_once_with(
+ folder_id=folder_id, file_name=file_name, drive_id=drive_id,
include_trashed=False
+ )
@mock.patch("airflow.providers.google.suite.hooks.drive.GoogleDriveHook.get_conn")
def test_get_media_request(self, mock_get_conn):