jghoman closed pull request #4070: [AIRFLOW-3235] Add list function in
AzureDataLakeHook
URL: https://github.com/apache/incubator-airflow/pull/4070
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/airflow/contrib/hooks/azure_data_lake_hook.py
b/airflow/contrib/hooks/azure_data_lake_hook.py
index 74ae194809..2178738220 100644
--- a/airflow/contrib/hooks/azure_data_lake_hook.py
+++ b/airflow/contrib/hooks/azure_data_lake_hook.py
@@ -139,3 +139,15 @@ def download_file(self, local_path, remote_path,
nthreads=64, overwrite=True,
overwrite=overwrite,
buffersize=buffersize,
blocksize=blocksize)
+
+ def list(self, path):
+ """
+ List files in Azure Data Lake Storage
+
+ :param path: full path/globstring to use to list files in ADLS
+ :type path: str
+ """
+ if "*" in path:
+ return self.connection.glob(path)
+ else:
+ return self.connection.walk(path)
diff --git a/tests/contrib/hooks/test_azure_data_lake_hook.py
b/tests/contrib/hooks/test_azure_data_lake_hook.py
index d99581b829..af26f85d99 100644
--- a/tests/contrib/hooks/test_azure_data_lake_hook.py
+++ b/tests/contrib/hooks/test_azure_data_lake_hook.py
@@ -100,6 +100,24 @@ def test_download_file(self, mock_lib, mock_downloader):
nthreads=64, overwrite=True,
buffersize=4194304,
blocksize=4194304)
+
@mock.patch('airflow.contrib.hooks.azure_data_lake_hook.core.AzureDLFileSystem',
+ autospec=True)
+ @mock.patch('airflow.contrib.hooks.azure_data_lake_hook.lib',
autospec=True)
+ def test_list_glob(self, mock_lib, mock_fs):
+ from airflow.contrib.hooks.azure_data_lake_hook import
AzureDataLakeHook
+ hook = AzureDataLakeHook(azure_data_lake_conn_id='adl_test_key')
+ hook.list('file_path/*')
+ mock_fs.return_value.glob.assert_called_with('file_path/*')
+
+
@mock.patch('airflow.contrib.hooks.azure_data_lake_hook.core.AzureDLFileSystem',
+ autospec=True)
+ @mock.patch('airflow.contrib.hooks.azure_data_lake_hook.lib',
autospec=True)
+ def test_list_walk(self, mock_lib, mock_fs):
+ from airflow.contrib.hooks.azure_data_lake_hook import
AzureDataLakeHook
+ hook = AzureDataLakeHook(azure_data_lake_conn_id='adl_test_key')
+ hook.list('file_path/some_folder/')
+ mock_fs.return_value.walk.assert_called_with('file_path/some_folder/')
+
if __name__ == '__main__':
unittest.main()
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services