Repository: incubator-beam
Updated Branches:
  refs/heads/master 061e6b5d8 -> e5bca60de


DebuggingWordCount now takes filter as an option

Previously it was hard-coded as "Flourish|stomach".
Now it is a PipelineOption with that as the default.

This allows "breaking" the pipeline by mis-specifying the pattern
without changing the code.


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/abb24cff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/abb24cff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/abb24cff

Branch: refs/heads/master
Commit: abb24cff479b714fec8a61d85af18bdea0a6aa16
Parents: 061e6b5
Author: bchambers <bchamb...@google.com>
Authored: Thu Mar 31 15:18:09 2016 -0700
Committer: bchambers <bchamb...@google.com>
Committed: Thu Mar 31 17:55:07 2016 -0700

----------------------------------------------------------------------
 .../dataflow/examples/DebuggingWordCount.java   | 20 +++++++++++++++++--
 .../src/main/java/DebuggingWordCount.java       | 21 ++++++++++++++++++--
 2 files changed, 37 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
 
b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 1f76181..331d7c6 100644
--- 
a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ 
b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -17,9 +17,10 @@
  */
 package com.google.cloud.dataflow.examples;
 
-import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
@@ -151,6 +152,21 @@ public class DebuggingWordCount {
     }
   }
 
+  /**
+   * Options supported by {@link DebuggingWordCount}.
+   *
+   * <p>Inherits standard configuration options and all options defined in
+   * {@link WordCount.WordCountOptions}.
+   */
+  public static interface WordCountOptions extends WordCount.WordCountOptions {
+
+    @Description("Regex filter pattern to use in DebuggingWordCount. "
+        + "Only words matching this pattern will be counted.")
+    @Default.String("Flourish|stomach")
+    String getFilterPattern();
+    void setFilterPattern(String value);
+  }
+
   public static void main(String[] args) {
     WordCountOptions options = 
PipelineOptionsFactory.fromArgs(args).withValidation()
       .as(WordCountOptions.class);
@@ -159,7 +175,7 @@ public class DebuggingWordCount {
     PCollection<KV<String, Long>> filteredWords =
         p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
          .apply(new WordCount.CountWords())
-         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+         .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
 
     /**
      * Concept #4: DataflowAssert is a set of convenient PTransforms in the 
style of

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git 
a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
 
b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
index 905670f..32fca4e 100644
--- 
a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
+++ 
b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -17,9 +17,11 @@
  */
 package ${package};
 
-import ${package}.WordCount.WordCountOptions;
+import ${package}.WordCount;
 import com.google.cloud.dataflow.sdk.Pipeline;
 import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
 import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
 import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
 import com.google.cloud.dataflow.sdk.transforms.Aggregator;
@@ -150,6 +152,21 @@ public class DebuggingWordCount {
       }
     }
   }
+  
+  /**
+   * Options supported by {@link DebuggingWordCount}.
+   *
+   * <p>Inherits standard configuration options and all options defined in
+   * {@link WordCount.WordCountOptions}.
+   */
+  public static interface WordCountOptions extends WordCount.WordCountOptions {
+
+    @Description("Regex filter pattern to use in DebuggingWordCount. "
+        + "Only words matching this pattern will be counted.")
+    @Default.String("Flourish|stomach")
+    String getFilterPattern();
+    void setFilterPattern(String value);
+  }
 
   public static void main(String[] args) {
     WordCountOptions options = 
PipelineOptionsFactory.fromArgs(args).withValidation()
@@ -159,7 +176,7 @@ public class DebuggingWordCount {
     PCollection<KV<String, Long>> filteredWords =
         p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
          .apply(new WordCount.CountWords())
-         .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+         .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
 
     /**
      * Concept #4: DataflowAssert is a set of convenient PTransforms in the 
style of

Reply via email to