Repository: incubator-airflow Updated Branches: refs/heads/master 91dd36866 -> a47b2776f
[AIRFLOW-2557] Fix pagination for s3 Paged tests for s3 are taking over 120 seconds. There is functionality to set the page size. This reduces the time spent on tests. Closes #3455 from bolkedebruin/AIRFLOW-2557 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/a47b2776 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/a47b2776 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/a47b2776 Branch: refs/heads/master Commit: a47b2776f159f8c439de89e68545e90b81a4397a Parents: 91dd368 Author: Bolke de Bruin <[email protected]> Authored: Sun Jun 3 22:29:26 2018 +0200 Committer: Fokko Driesprong <[email protected]> Committed: Sun Jun 3 22:29:26 2018 +0200 ---------------------------------------------------------------------- airflow/hooks/S3_hook.py | 30 ++++++++++++++++++++++++++---- tests/hooks/test_s3_hook.py | 18 +++++++++++------- 2 files changed, 37 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a47b2776/airflow/hooks/S3_hook.py ---------------------------------------------------------------------- diff --git a/airflow/hooks/S3_hook.py b/airflow/hooks/S3_hook.py index 3d72275..b4f3ac3 100644 --- a/airflow/hooks/S3_hook.py +++ b/airflow/hooks/S3_hook.py @@ -77,7 +77,8 @@ class S3Hook(AwsHook): plist = self.list_prefixes(bucket_name, previous_level, delimiter) return False if plist is None else prefix in plist - def list_prefixes(self, bucket_name, prefix='', delimiter=''): + def list_prefixes(self, bucket_name, prefix='', delimiter='', + page_size=None, max_items=None): """ Lists prefixes in a bucket under prefix @@ -87,11 +88,21 @@ class S3Hook(AwsHook): :type prefix: str :param delimiter: the delimiter marks key hierarchy. :type delimiter: str + :param page_size: pagination size + :type page_size: int + :param max_items: maximum items to return + :type max_items: int """ + config = { + 'PageSize': page_size, + 'MaxItems': max_items, + } + paginator = self.get_conn().get_paginator('list_objects_v2') response = paginator.paginate(Bucket=bucket_name, Prefix=prefix, - Delimiter=delimiter) + Delimiter=delimiter, + PaginationConfig=config) has_results = False prefixes = [] @@ -104,7 +115,8 @@ class S3Hook(AwsHook): if has_results: return prefixes - def list_keys(self, bucket_name, prefix='', delimiter=''): + def list_keys(self, bucket_name, prefix='', delimiter='', + page_size=None, max_items=None): """ Lists keys in a bucket under prefix and not containing delimiter @@ -114,11 +126,21 @@ class S3Hook(AwsHook): :type prefix: str :param delimiter: the delimiter marks key hierarchy. :type delimiter: str + :param page_size: pagination size + :type page_size: int + :param max_items: maximum items to return + :type max_items: int """ + config = { + 'PageSize': page_size, + 'MaxItems': max_items, + } + paginator = self.get_conn().get_paginator('list_objects_v2') response = paginator.paginate(Bucket=bucket_name, Prefix=prefix, - Delimiter=delimiter) + Delimiter=delimiter, + PaginationConfig=config) has_results = False keys = [] http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a47b2776/tests/hooks/test_s3_hook.py ---------------------------------------------------------------------- diff --git a/tests/hooks/test_s3_hook.py b/tests/hooks/test_s3_hook.py index 94d0e36..baac203 100644 --- a/tests/hooks/test_s3_hook.py +++ b/tests/hooks/test_s3_hook.py @@ -7,9 +7,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -96,13 +96,16 @@ class TestS3Hook(unittest.TestCase): b = hook.get_bucket('bucket') b.create() - keys = ["%s/b" % i for i in range(5000)] - dirs = ["%s/" % i for i in range(5000)] + # we dont need to test the paginator + # that's covered by boto tests + keys = ["%s/b" % i for i in range(2)] + dirs = ["%s/" % i for i in range(2)] for key in keys: b.put_object(Key=key, Body=b'a') self.assertListEqual(sorted(dirs), - sorted(hook.list_prefixes('bucket', delimiter='/'))) + sorted(hook.list_prefixes('bucket', delimiter='/', + page_size=1))) @mock_s3 def test_list_keys(self): @@ -123,12 +126,13 @@ class TestS3Hook(unittest.TestCase): b = hook.get_bucket('bucket') b.create() - keys = [str(i) for i in range(5000)] + keys = [str(i) for i in range(2)] for key in keys: b.put_object(Key=key, Body=b'a') self.assertListEqual(sorted(keys), - sorted(hook.list_keys('bucket', delimiter='/'))) + sorted(hook.list_keys('bucket', delimiter='/', + page_size=1))) @mock_s3 def test_check_for_key(self):
