Repository: beam Updated Branches: refs/heads/master b44e99b09 -> 8ecc45085
Remove templates from wordcount example Project: http://git-wip-us.apache.org/repos/asf/beam/repo Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fd93ff65 Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fd93ff65 Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fd93ff65 Branch: refs/heads/master Commit: fd93ff658b2da36c4f7c7c5289484fb6493b609b Parents: b44e99b Author: Maria Garcia Herrero <[email protected]> Authored: Tue May 9 13:58:19 2017 -0700 Committer: Ahmet Altay <[email protected]> Committed: Tue May 9 15:44:51 2017 -0700 ---------------------------------------------------------------------- sdks/python/apache_beam/examples/wordcount.py | 31 ++++++++++------------ 1 file changed, 14 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/beam/blob/fd93ff65/sdks/python/apache_beam/examples/wordcount.py ---------------------------------------------------------------------- diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index 8c6a485..e7e542a 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -19,6 +19,7 @@ from __future__ import absolute_import +import argparse import logging import re @@ -66,29 +67,25 @@ class WordExtractingDoFn(beam.DoFn): def run(argv=None): """Main entry point; defines and runs the wordcount pipeline.""" - class WordcountOptions(PipelineOptions): - @classmethod - def _add_argparse_args(cls, parser): - parser.add_value_provider_argument( - '--input', - dest='input', - default='gs://dataflow-samples/shakespeare/kinglear.txt', - help='Input file to process.') - parser.add_value_provider_argument( - '--output', - dest='output', - required=True, - help='Output file to write results to.') - pipeline_options = PipelineOptions(argv) - wordcount_options = pipeline_options.view_as(WordcountOptions) + parser = argparse.ArgumentParser() + parser.add_argument('--input', + dest='input', + default='gs://dataflow-samples/shakespeare/kinglear.txt', + help='Input file to process.') + parser.add_argument('--output', + dest='output', + required=True, + help='Output file to write results to.') + known_args, pipeline_args = parser.parse_known_args(argv) # We use the save_main_session option because one or more DoFn's in this # workflow rely on global context (e.g., a module imported at module level). + pipeline_options = PipelineOptions(pipeline_args) pipeline_options.view_as(SetupOptions).save_main_session = True p = beam.Pipeline(options=pipeline_options) # Read the text file[pattern] into a PCollection. - lines = p | 'read' >> ReadFromText(wordcount_options.input) + lines = p | 'read' >> ReadFromText(known_args.input) # Count the occurrences of each word. counts = (lines @@ -103,7 +100,7 @@ def run(argv=None): # Write the output using a "Write" transform that has side effects. # pylint: disable=expression-not-assigned - output | 'write' >> WriteToText(wordcount_options.output) + output | 'write' >> WriteToText(known_args.output) # Actually run the pipeline (all operations above are deferred). result = p.run()
