[ 
https://issues.apache.org/jira/browse/BEAM-3534?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16339227#comment-16339227
 ] 

ASF GitHub Bot commented on BEAM-3534:
--------------------------------------

iemejia closed pull request #4361: [BEAM-3534] Add a spark validates runner 
test for metrics sink in streaming mode
URL: https://github.com/apache/beam/pull/4361
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/SparkMetricsSinkTest.java
 
b/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/SparkMetricsSinkTest.java
index fff95cbcaf1..0f15c8cbf3e 100644
--- 
a/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/SparkMetricsSinkTest.java
+++ 
b/runners/spark/src/test/java/org/apache/beam/runners/spark/aggregators/metrics/sink/SparkMetricsSinkTest.java
@@ -26,20 +26,31 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.Set;
+import org.apache.beam.runners.spark.ReuseSparkContextRule;
+import org.apache.beam.runners.spark.SparkPipelineOptions;
+import org.apache.beam.runners.spark.StreamingTest;
 import org.apache.beam.runners.spark.examples.WordCount;
+import org.apache.beam.runners.spark.io.CreateStream;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.experimental.categories.Category;
 import org.junit.rules.ExternalResource;
 
 
 /**
- * A test that verifies Beam metrics are reported to Spark's metrics sink.
+ * A test that verifies Beam metrics are reported to Spark's metrics sink in 
both batch
+ * and streaming modes.
  */
 public class SparkMetricsSinkTest {
 
@@ -49,30 +60,60 @@
   @Rule
   public final TestPipeline pipeline = TestPipeline.create();
 
-  private void runPipeline() {
-    final List<String> words =
-        Arrays.asList("hi there", "hi", "hi sue bob", "hi sue", "", "bob hi");
+  @Rule
+  public final transient ReuseSparkContextRule noContextResue = 
ReuseSparkContextRule.no();
+
+  private static final List<String> WORDS = Arrays
+      .asList("hi there", "hi", "hi sue bob", "hi sue", "", "bob hi");
+  private static final Set<String> EXPECTED_COUNTS = ImmutableSet
+      .of("hi: 5", "there: 1", "sue: 2", "bob: 2");
 
-    final Set<String> expectedCounts =
-        ImmutableSet.of("hi: 5", "there: 1", "sue: 2", "bob: 2");
+  @Test
+  public void testInBatchMode() throws Exception {
+    assertThat(InMemoryMetrics.valueOf("emptyLines"), is(nullValue()));
 
     final PCollection<String> output =
         pipeline
-        .apply(Create.of(words).withCoder(StringUtf8Coder.of()))
+        .apply(Create.of(WORDS).withCoder(StringUtf8Coder.of()))
         .apply(new WordCount.CountWords())
         .apply(MapElements.via(new WordCount.FormatAsTextFn()));
-
-    PAssert.that(output).containsInAnyOrder(expectedCounts);
-
+    PAssert.that(output).containsInAnyOrder(EXPECTED_COUNTS);
     pipeline.run();
+
+    assertThat(InMemoryMetrics.<Double>valueOf("emptyLines"), is(1d));
   }
 
+  @Category(StreamingTest.class)
   @Test
-  public void testNamedMetric() throws Exception {
+  public void testInStreamingMode() throws Exception {
     assertThat(InMemoryMetrics.valueOf("emptyLines"), is(nullValue()));
 
-    runPipeline();
+    Instant instant = new Instant(0);
+    CreateStream<String> source =
+        CreateStream.of(StringUtf8Coder.of(), Duration.millis(
+            
(pipeline.getOptions().as(SparkPipelineOptions.class)).getBatchIntervalMillis()))
+            .emptyBatch()
+            .advanceWatermarkForNextBatch(instant)
+            .nextBatch(
+                TimestampedValue.of(WORDS.get(0), instant),
+                TimestampedValue.of(WORDS.get(1), instant),
+                TimestampedValue.of(WORDS.get(2), instant))
+            
.advanceWatermarkForNextBatch(instant.plus(Duration.standardSeconds(2L)))
+            .nextBatch(
+                TimestampedValue.of(WORDS.get(3), 
instant.plus(Duration.standardSeconds(1L))),
+                TimestampedValue.of(WORDS.get(4), 
instant.plus(Duration.standardSeconds(1L))),
+                TimestampedValue.of(WORDS.get(5), 
instant.plus(Duration.standardSeconds(1L))))
+            .advanceNextBatchWatermarkToInfinity();
+    PCollection<String> output = pipeline
+        .apply(source)
+        
.apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(3L)))
+            .withAllowedLateness(Duration.ZERO))
+        .apply(new WordCount.CountWords())
+        .apply(MapElements.via(new WordCount.FormatAsTextFn()));
+    PAssert.that(output).containsInAnyOrder(EXPECTED_COUNTS);
+    pipeline.run();
 
     assertThat(InMemoryMetrics.<Double>valueOf("emptyLines"), is(1d));
   }
+
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Add a spark validates runner test for metrics sink in streaming mode
> --------------------------------------------------------------------
>
>                 Key: BEAM-3534
>                 URL: https://issues.apache.org/jira/browse/BEAM-3534
>             Project: Beam
>          Issue Type: Sub-task
>          Components: runner-spark
>            Reporter: Etienne Chauchot
>            Assignee: Etienne Chauchot
>            Priority: Major
>
> There is also a metrics sink test in batch mode.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to