Repository: beam Updated Branches: refs/heads/master 774d8972e -> 4ec3366d6
[BEAM-2338] Fix the limit counter in gcsio reads Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2e86f4fa Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2e86f4fa Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2e86f4fa Branch: refs/heads/master Commit: 2e86f4faf9cba1fc15969f3c1f3c3463a332f7ca Parents: 774d897 Author: Sourabh Bajaj <[email protected]> Authored: Tue May 23 13:24:27 2017 -0700 Committer: [email protected] <[email protected]> Committed: Tue May 23 15:15:02 2017 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/io/gcp/gcsio.py | 2 +- sdks/python/apache_beam/io/gcp/gcsio_test.py | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/2e86f4fa/sdks/python/apache_beam/io/gcp/gcsio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index 7e21586..d43c8ba 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -392,7 +392,7 @@ class GcsIO(object): if fnmatch.fnmatch(item.name, name_pattern): file_name = 'gs://%s/%s' % (item.bucket, item.name) file_sizes[file_name] = item.size - counter += 1 + counter += 1 if limit is not None and counter >= limit: break if counter % 10000 == 0: http://git-wip-us.apache.org/repos/asf/beam/blob/2e86f4fa/sdks/python/apache_beam/io/gcp/gcsio_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcp/gcsio_test.py b/sdks/python/apache_beam/io/gcp/gcsio_test.py index 73d2213..06a8227 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio_test.py +++ b/sdks/python/apache_beam/io/gcp/gcsio_test.py @@ -641,6 +641,7 @@ class TestGCSIO(unittest.TestCase): 'apple/fish/cat', 'apple/fish/cart', 'apple/fish/carl', + 'apple/fish/handle', 'apple/dish/bat', 'apple/dish/cat', 'apple/dish/carl', @@ -661,6 +662,7 @@ class TestGCSIO(unittest.TestCase): 'apple/fish/cat', 'apple/fish/cart', 'apple/fish/carl', + 'apple/fish/handle', 'apple/dish/bat', 'apple/dish/cat', 'apple/dish/carl', @@ -691,6 +693,12 @@ class TestGCSIO(unittest.TestCase): 'apple/fish/bambi', 'apple/fish/balloon', ]), + ('gs://gcsio-test/apple/f*/b*', [ + 'apple/fish/blubber', + 'apple/fish/blowfish', + 'apple/fish/bambi', + 'apple/fish/balloon', + ]), ('gs://gcsio-test/apple/dish/[cb]at', [ 'apple/dish/bat', 'apple/dish/cat', @@ -726,6 +734,7 @@ class TestGCSIO(unittest.TestCase): ('apple/dish/bat', 13), ('apple/dish/cat', 14), ('apple/dish/carl', 15), + ('apple/fish/handle', 16), ] for (object_name, size) in object_names: file_name = 'gs://%s/%s' % (bucket_name, object_name) @@ -739,7 +748,11 @@ class TestGCSIO(unittest.TestCase): ('gs://gcsio-test/apple/fish/car?', [ ('apple/fish/cart', 11), ('apple/fish/carl', 12), - ]) + ]), + ('gs://gcsio-test/*/f*/car?', [ + ('apple/fish/cart', 11), + ('apple/fish/carl', 12), + ]), ] for file_pattern, expected_object_names in test_cases: expected_file_sizes = {'gs://%s/%s' % (bucket_name, o): s @@ -747,6 +760,13 @@ class TestGCSIO(unittest.TestCase): self.assertEqual( self.gcs.size_of_files_in_glob(file_pattern), expected_file_sizes) + # Check if limits are followed correctly + limit = 1 + for file_pattern, expected_object_names in test_cases: + expected_num_items = min(len(expected_object_names), limit) + self.assertEqual( + len(self.gcs.glob(file_pattern, limit)), expected_num_items) + def test_size_of_files_in_glob_limited(self): bucket_name = 'gcsio-test' object_names = [
