Repository: incubator-beam Updated Branches: refs/heads/python-sdk 4c1ad11d1 -> 7d0758b64
Making sure that GcsBufferedReader implements the iterator protocol Adding raise StopIteration to GcsBufferedReader.next Adding unit tests, and __next__ for Python 3 Fixing some lint issues in unit test Improving test readability. Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/07c7aafb Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/07c7aafb Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/07c7aafb Branch: refs/heads/python-sdk Commit: 07c7aafbab1f4e700f30467dbd84cea3ca19d76e Parents: 4c1ad11 Author: polecito...@gmail.com <polecito...@gmail.com> Authored: Thu Sep 29 15:41:25 2016 -0700 Committer: Robert Bradshaw <rober...@gmail.com> Committed: Fri Oct 7 16:40:23 2016 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/io/gcsio.py | 16 ++++++++++++++++ sdks/python/apache_beam/io/gcsio_test.py | 21 +++++++++++++++++++++ 2 files changed, 37 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/07c7aafb/sdks/python/apache_beam/io/gcsio.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcsio.py b/sdks/python/apache_beam/io/gcsio.py index 5a83004..9fcce5b 100644 --- a/sdks/python/apache_beam/io/gcsio.py +++ b/sdks/python/apache_beam/io/gcsio.py @@ -307,6 +307,22 @@ class GcsBufferedReader(object): def _get_object_metadata(self, get_request): return self.client.objects.Get(get_request) + def __iter__(self): + return self + + def __next__(self): + """Read one line delimited by '\\n' from the file. + """ + return self.next() + + def next(self): + """Read one line delimited by '\\n' from the file. + """ + line = self.readline() + if not line: + raise StopIteration + return line + def read(self, size=-1): """Read data from a GCS file. http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/07c7aafb/sdks/python/apache_beam/io/gcsio_test.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/io/gcsio_test.py b/sdks/python/apache_beam/io/gcsio_test.py index 919e9d2..2e9945a 100644 --- a/sdks/python/apache_beam/io/gcsio_test.py +++ b/sdks/python/apache_beam/io/gcsio_test.py @@ -341,6 +341,27 @@ class TestGCSIO(unittest.TestCase): f.read(end - start + 1), random_file.contents[start:end + 1]) self.assertEqual(f.tell(), end + 1) + def test_file_iterator(self): + file_name = 'gs://gcsio-test/iterating_file' + lines = [] + line_count = 10 + for _ in range(line_count): + line_length = random.randint(100, 500) + line = os.urandom(line_length).replace('\n', ' ') + '\n' + lines.append(line) + + contents = ''.join(lines) + bucket, name = gcsio.parse_gcs_path(file_name) + self.client.objects.add_file(FakeFile(bucket, name, contents, 1)) + + f = self.gcs.open(file_name) + + read_lines = 0 + for line in f: + read_lines += 1 + + self.assertEqual(read_lines, line_count) + def test_file_read_line(self): file_name = 'gs://gcsio-test/read_line_file' lines = []