Repository: beam
Updated Branches:
  refs/heads/master 774d8972e -> 4ec3366d6


[BEAM-2338] Fix the limit counter in gcsio reads


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2e86f4fa
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2e86f4fa
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2e86f4fa

Branch: refs/heads/master
Commit: 2e86f4faf9cba1fc15969f3c1f3c3463a332f7ca
Parents: 774d897
Author: Sourabh Bajaj <[email protected]>
Authored: Tue May 23 13:24:27 2017 -0700
Committer: [email protected] <[email protected]>
Committed: Tue May 23 15:15:02 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/gcp/gcsio.py      |  2 +-
 sdks/python/apache_beam/io/gcp/gcsio_test.py | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/2e86f4fa/sdks/python/apache_beam/io/gcp/gcsio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py 
b/sdks/python/apache_beam/io/gcp/gcsio.py
index 7e21586..d43c8ba 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio.py
@@ -392,7 +392,7 @@ class GcsIO(object):
         if fnmatch.fnmatch(item.name, name_pattern):
           file_name = 'gs://%s/%s' % (item.bucket, item.name)
           file_sizes[file_name] = item.size
-        counter += 1
+          counter += 1
         if limit is not None and counter >= limit:
           break
         if counter % 10000 == 0:

http://git-wip-us.apache.org/repos/asf/beam/blob/2e86f4fa/sdks/python/apache_beam/io/gcp/gcsio_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py 
b/sdks/python/apache_beam/io/gcp/gcsio_test.py
index 73d2213..06a8227 100644
--- a/sdks/python/apache_beam/io/gcp/gcsio_test.py
+++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py
@@ -641,6 +641,7 @@ class TestGCSIO(unittest.TestCase):
         'apple/fish/cat',
         'apple/fish/cart',
         'apple/fish/carl',
+        'apple/fish/handle',
         'apple/dish/bat',
         'apple/dish/cat',
         'apple/dish/carl',
@@ -661,6 +662,7 @@ class TestGCSIO(unittest.TestCase):
             'apple/fish/cat',
             'apple/fish/cart',
             'apple/fish/carl',
+            'apple/fish/handle',
             'apple/dish/bat',
             'apple/dish/cat',
             'apple/dish/carl',
@@ -691,6 +693,12 @@ class TestGCSIO(unittest.TestCase):
             'apple/fish/bambi',
             'apple/fish/balloon',
         ]),
+        ('gs://gcsio-test/apple/f*/b*', [
+            'apple/fish/blubber',
+            'apple/fish/blowfish',
+            'apple/fish/bambi',
+            'apple/fish/balloon',
+        ]),
         ('gs://gcsio-test/apple/dish/[cb]at', [
             'apple/dish/bat',
             'apple/dish/cat',
@@ -726,6 +734,7 @@ class TestGCSIO(unittest.TestCase):
         ('apple/dish/bat', 13),
         ('apple/dish/cat', 14),
         ('apple/dish/carl', 15),
+        ('apple/fish/handle', 16),
     ]
     for (object_name, size) in object_names:
       file_name = 'gs://%s/%s' % (bucket_name, object_name)
@@ -739,7 +748,11 @@ class TestGCSIO(unittest.TestCase):
         ('gs://gcsio-test/apple/fish/car?', [
             ('apple/fish/cart', 11),
             ('apple/fish/carl', 12),
-        ])
+        ]),
+        ('gs://gcsio-test/*/f*/car?', [
+            ('apple/fish/cart', 11),
+            ('apple/fish/carl', 12),
+        ]),
     ]
     for file_pattern, expected_object_names in test_cases:
       expected_file_sizes = {'gs://%s/%s' % (bucket_name, o): s
@@ -747,6 +760,13 @@ class TestGCSIO(unittest.TestCase):
       self.assertEqual(
           self.gcs.size_of_files_in_glob(file_pattern), expected_file_sizes)
 
+    # Check if limits are followed correctly
+    limit = 1
+    for file_pattern, expected_object_names in test_cases:
+      expected_num_items = min(len(expected_object_names), limit)
+      self.assertEqual(
+          len(self.gcs.glob(file_pattern, limit)), expected_num_items)
+
   def test_size_of_files_in_glob_limited(self):
     bucket_name = 'gcsio-test'
     object_names = [

Reply via email to