Repository: incubator-beam Updated Branches: refs/heads/python-sdk 67a769a9a -> e8c39c798
Add type hints to bigshuffle to avoid pickle overhead. Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/f858ea93 Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/f858ea93 Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/f858ea93 Branch: refs/heads/python-sdk Commit: f858ea9335b38c67778f47de63e1d1d16dc79fee Parents: 67a769a Author: Robert Bradshaw <rober...@google.com> Authored: Tue Jul 12 13:05:46 2016 -0700 Committer: Robert Bradshaw <rober...@google.com> Committed: Thu Jul 14 10:17:09 2016 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/examples/cookbook/bigshuffle.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/f858ea93/sdks/python/apache_beam/examples/cookbook/bigshuffle.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/examples/cookbook/bigshuffle.py b/sdks/python/apache_beam/examples/cookbook/bigshuffle.py index 692bd52..0b5da02 100644 --- a/sdks/python/apache_beam/examples/cookbook/bigshuffle.py +++ b/sdks/python/apache_beam/examples/cookbook/bigshuffle.py @@ -48,11 +48,14 @@ def run(argv=None): p = beam.Pipeline(argv=pipeline_args) # Read the text file[pattern] into a PCollection. - lines = p | beam.io.Read('read', beam.io.TextFileSource(known_args.input)) + lines = p | beam.io.Read( + 'read', beam.io.TextFileSource(known_args.input, + coder=beam.coders.BytesCoder())) # Count the occurrences of each word. output = (lines - | beam.Map('split', lambda x: (x[:10], x[10:99])) + | beam.Map('split', lambda x: (x[:10], x[10:99]) + ).with_output_types(beam.typehints.KV[str, str]) | beam.GroupByKey('group') | beam.FlatMap( 'format',