Repository: incubator-beam Updated Branches: refs/heads/master 061e6b5d8 -> e5bca60de
DebuggingWordCount now takes filter as an option Previously it was hard-coded as "Flourish|stomach". Now it is a PipelineOption with that as the default. This allows "breaking" the pipeline by mis-specifying the pattern without changing the code. Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/abb24cff Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/abb24cff Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/abb24cff Branch: refs/heads/master Commit: abb24cff479b714fec8a61d85af18bdea0a6aa16 Parents: 061e6b5 Author: bchambers <bchamb...@google.com> Authored: Thu Mar 31 15:18:09 2016 -0700 Committer: bchambers <bchamb...@google.com> Committed: Thu Mar 31 17:55:07 2016 -0700 ---------------------------------------------------------------------- .../dataflow/examples/DebuggingWordCount.java | 20 +++++++++++++++++-- .../src/main/java/DebuggingWordCount.java | 21 ++++++++++++++++++-- 2 files changed, 37 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java ---------------------------------------------------------------------- diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java index 1f76181..331d7c6 100644 --- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java @@ -17,9 +17,10 @@ */ package com.google.cloud.dataflow.examples; -import com.google.cloud.dataflow.examples.WordCount.WordCountOptions; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.io.TextIO; +import com.google.cloud.dataflow.sdk.options.Default; +import com.google.cloud.dataflow.sdk.options.Description; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.Aggregator; @@ -151,6 +152,21 @@ public class DebuggingWordCount { } } + /** + * Options supported by {@link DebuggingWordCount}. + * + * <p>Inherits standard configuration options and all options defined in + * {@link WordCount.WordCountOptions}. + */ + public static interface WordCountOptions extends WordCount.WordCountOptions { + + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); + } + public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(WordCountOptions.class); @@ -159,7 +175,7 @@ public class DebuggingWordCount { PCollection<KV<String, Long>> filteredWords = p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile())) .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn("Flourish|stomach"))); + .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); /** * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java ---------------------------------------------------------------------- diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java index 905670f..32fca4e 100644 --- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java +++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java @@ -17,9 +17,11 @@ */ package ${package}; -import ${package}.WordCount.WordCountOptions; +import ${package}.WordCount; import com.google.cloud.dataflow.sdk.Pipeline; import com.google.cloud.dataflow.sdk.io.TextIO; +import com.google.cloud.dataflow.sdk.options.Default; +import com.google.cloud.dataflow.sdk.options.Description; import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory; import com.google.cloud.dataflow.sdk.testing.DataflowAssert; import com.google.cloud.dataflow.sdk.transforms.Aggregator; @@ -150,6 +152,21 @@ public class DebuggingWordCount { } } } + + /** + * Options supported by {@link DebuggingWordCount}. + * + * <p>Inherits standard configuration options and all options defined in + * {@link WordCount.WordCountOptions}. + */ + public static interface WordCountOptions extends WordCount.WordCountOptions { + + @Description("Regex filter pattern to use in DebuggingWordCount. " + + "Only words matching this pattern will be counted.") + @Default.String("Flourish|stomach") + String getFilterPattern(); + void setFilterPattern(String value); + } public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() @@ -159,7 +176,7 @@ public class DebuggingWordCount { PCollection<KV<String, Long>> filteredWords = p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile())) .apply(new WordCount.CountWords()) - .apply(ParDo.of(new FilterTextFn("Flourish|stomach"))); + .apply(ParDo.of(new FilterTextFn(options.getFilterPattern()))); /** * Concept #4: DataflowAssert is a set of convenient PTransforms in the style of