Repository: incubator-airflow
Updated Branches:
  refs/heads/master 91dd36866 -> a47b2776f


[AIRFLOW-2557] Fix pagination for s3

Paged tests for s3 are taking over 120 seconds.
There
is functionality to set the page size. This
reduces
the time spent on tests.

Closes #3455 from bolkedebruin/AIRFLOW-2557


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/a47b2776
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/a47b2776
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/a47b2776

Branch: refs/heads/master
Commit: a47b2776f159f8c439de89e68545e90b81a4397a
Parents: 91dd368
Author: Bolke de Bruin <[email protected]>
Authored: Sun Jun 3 22:29:26 2018 +0200
Committer: Fokko Driesprong <[email protected]>
Committed: Sun Jun 3 22:29:26 2018 +0200

----------------------------------------------------------------------
 airflow/hooks/S3_hook.py    | 30 ++++++++++++++++++++++++++----
 tests/hooks/test_s3_hook.py | 18 +++++++++++-------
 2 files changed, 37 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a47b2776/airflow/hooks/S3_hook.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/S3_hook.py b/airflow/hooks/S3_hook.py
index 3d72275..b4f3ac3 100644
--- a/airflow/hooks/S3_hook.py
+++ b/airflow/hooks/S3_hook.py
@@ -77,7 +77,8 @@ class S3Hook(AwsHook):
         plist = self.list_prefixes(bucket_name, previous_level, delimiter)
         return False if plist is None else prefix in plist
 
-    def list_prefixes(self, bucket_name, prefix='', delimiter=''):
+    def list_prefixes(self, bucket_name, prefix='', delimiter='',
+                      page_size=None, max_items=None):
         """
         Lists prefixes in a bucket under prefix
 
@@ -87,11 +88,21 @@ class S3Hook(AwsHook):
         :type prefix: str
         :param delimiter: the delimiter marks key hierarchy.
         :type delimiter: str
+        :param page_size: pagination size
+        :type page_size: int
+        :param max_items: maximum items to return
+        :type max_items: int
         """
+        config = {
+            'PageSize': page_size,
+            'MaxItems': max_items,
+        }
+
         paginator = self.get_conn().get_paginator('list_objects_v2')
         response = paginator.paginate(Bucket=bucket_name,
                                       Prefix=prefix,
-                                      Delimiter=delimiter)
+                                      Delimiter=delimiter,
+                                      PaginationConfig=config)
 
         has_results = False
         prefixes = []
@@ -104,7 +115,8 @@ class S3Hook(AwsHook):
         if has_results:
             return prefixes
 
-    def list_keys(self, bucket_name, prefix='', delimiter=''):
+    def list_keys(self, bucket_name, prefix='', delimiter='',
+                  page_size=None, max_items=None):
         """
         Lists keys in a bucket under prefix and not containing delimiter
 
@@ -114,11 +126,21 @@ class S3Hook(AwsHook):
         :type prefix: str
         :param delimiter: the delimiter marks key hierarchy.
         :type delimiter: str
+        :param page_size: pagination size
+        :type page_size: int
+        :param max_items: maximum items to return
+        :type max_items: int
         """
+        config = {
+            'PageSize': page_size,
+            'MaxItems': max_items,
+        }
+
         paginator = self.get_conn().get_paginator('list_objects_v2')
         response = paginator.paginate(Bucket=bucket_name,
                                       Prefix=prefix,
-                                      Delimiter=delimiter)
+                                      Delimiter=delimiter,
+                                      PaginationConfig=config)
 
         has_results = False
         keys = []

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/a47b2776/tests/hooks/test_s3_hook.py
----------------------------------------------------------------------
diff --git a/tests/hooks/test_s3_hook.py b/tests/hooks/test_s3_hook.py
index 94d0e36..baac203 100644
--- a/tests/hooks/test_s3_hook.py
+++ b/tests/hooks/test_s3_hook.py
@@ -7,9 +7,9 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -96,13 +96,16 @@ class TestS3Hook(unittest.TestCase):
         b = hook.get_bucket('bucket')
         b.create()
 
-        keys = ["%s/b" % i for i in range(5000)]
-        dirs = ["%s/" % i for i in range(5000)]
+        # we dont need to test the paginator
+        # that's covered by boto tests
+        keys = ["%s/b" % i for i in range(2)]
+        dirs = ["%s/" % i for i in range(2)]
         for key in keys:
             b.put_object(Key=key, Body=b'a')
 
         self.assertListEqual(sorted(dirs),
-                             sorted(hook.list_prefixes('bucket', 
delimiter='/')))
+                             sorted(hook.list_prefixes('bucket', delimiter='/',
+                                                       page_size=1)))
 
     @mock_s3
     def test_list_keys(self):
@@ -123,12 +126,13 @@ class TestS3Hook(unittest.TestCase):
         b = hook.get_bucket('bucket')
         b.create()
 
-        keys = [str(i) for i in range(5000)]
+        keys = [str(i) for i in range(2)]
         for key in keys:
             b.put_object(Key=key, Body=b'a')
 
         self.assertListEqual(sorted(keys),
-                             sorted(hook.list_keys('bucket', delimiter='/')))
+                             sorted(hook.list_keys('bucket', delimiter='/',
+                                                   page_size=1)))
 
     @mock_s3
     def test_check_for_key(self):

Reply via email to