[GitHub] flink pull request #2593: [FLINK-4329] Fix Streaming File Source Timestamps/...

mxm Wed, 05 Oct 2016 03:59:10 -0700

Github user mxm commented on a diff in the pull request:

    https://github.com/apache/flink/pull/2593#discussion_r81944268
  
    --- Diff: 
flink-fs-tests/src/test/java/org/apache/flink/hdfstests/ContinuousFileMonitoringTest.java
 ---
    @@ -106,6 +107,155 @@ public static void destroyHDFS() {
        //                                              TESTS
     
        @Test
    +   public void testFileReadingOperatorWithIngestionTime() throws Exception 
{
    +           Set<org.apache.hadoop.fs.Path> filesCreated = new HashSet<>();
    +           Map<Integer, String> expectedFileContents = new HashMap<>();
    +
    +           for(int i = 0; i < NO_OF_FILES; i++) {
    +                   Tuple2<org.apache.hadoop.fs.Path, String> file = 
fillWithData(hdfsURI, "file", i, "This is test line.");
    +                   filesCreated.add(file.f0);
    +                   expectedFileContents.put(i, file.f1);
    +           }
    +
    +           TextInputFormat format = new TextInputFormat(new Path(hdfsURI));
    +           TypeInformation<String> typeInfo = 
TypeExtractor.getInputFormatTypes(format);
    +
    +           final long watermarkInterval = 10;
    +           ExecutionConfig executionConfig = new ExecutionConfig();
    +           executionConfig.setAutoWatermarkInterval(watermarkInterval);
    +
    +           ContinuousFileReaderOperator<String, ?> reader = new 
ContinuousFileReaderOperator<>(format);
    +           reader.setOutputType(typeInfo, executionConfig);
    +
    +           final TestTimeServiceProvider timeServiceProvider = new 
TestTimeServiceProvider();
    +           final OneInputStreamOperatorTestHarness<FileInputSplit, String> 
tester =
    +                   new OneInputStreamOperatorTestHarness<>(reader, 
executionConfig, timeServiceProvider);
    +           tester.setTimeCharacteristic(TimeCharacteristic.IngestionTime);
    +           tester.open();
    +
    +           Assert.assertEquals(TimeCharacteristic.IngestionTime, 
tester.getTimeCharacteristic());
    +
    +           // test that watermarks are correctly emitted
    +
    +           timeServiceProvider.setCurrentTime(201);
    +           timeServiceProvider.setCurrentTime(301);
    +           timeServiceProvider.setCurrentTime(401);
    +           timeServiceProvider.setCurrentTime(501);
    +
    +           int i = 0;
    +           for(Object line: tester.getOutput()) {
    +                   if (!(line instanceof Watermark)) {
    +                           Assert.fail("Only watermarks are expected here 
");
    +                   }
    +                   Watermark w = (Watermark) line;
    +                   Assert.assertEquals(200 + (i * 100), w.getTimestamp());
    +                   i++;
    +           }
    +
    +           // clear the output to get the elements only and the final 
watermark
    +           tester.getOutput().clear();
    +           Assert.assertEquals(0, tester.getOutput().size());
    +
    +           // create the necessary splits for the test
    +           FileInputSplit[] splits = format.createInputSplits(
    +                   
reader.getRuntimeContext().getNumberOfParallelSubtasks());
    +
    +           // and feed them to the operator
    +           Map<Integer, List<String>> actualFileContents = new HashMap<>();
    +
    +           long lastSeenWatermark = Long.MIN_VALUE;
    +           int lineCounter = 0;    // counter for the lines read from the 
splits
    +           int watermarkCounter = 0;
    +
    +           for(FileInputSplit split: splits) {
    +
    +                   // set the next "current processing time".
    +                   long nextTimestamp = 
timeServiceProvider.getCurrentProcessingTime() + watermarkInterval;
    +                   timeServiceProvider.setCurrentTime(nextTimestamp);
    +
    +                   // send the next split to be read and wait until it is 
fully read.
    +                   tester.processElement(new StreamRecord<>(split));
    +                   synchronized (tester.getCheckpointLock()) {
    +                           while (tester.getOutput().isEmpty() || 
tester.getOutput().size() != (LINES_PER_FILE + 1)) {
    +                                   tester.getCheckpointLock().wait(10);
    +                           }
    +                   }
    +
    +                   // verify that the results are the expected
    +                   for(Object line: tester.getOutput()) {
    +                           if (line instanceof StreamRecord) {
    +                                   StreamRecord<String> element = 
(StreamRecord<String>) line;
    +                                   lineCounter++;
    +
    +                                   Assert.assertEquals(nextTimestamp, 
element.getTimestamp());
    +
    +                                   int fileIdx = 
Character.getNumericValue(element.getValue().charAt(0));
    +                                   List<String> content = 
actualFileContents.get(fileIdx);
    +                                   if (content == null) {
    +                                           content = new ArrayList<>();
    +                                           actualFileContents.put(fileIdx, 
content);
    +                                   }
    +                                   content.add(element.getValue() + "\n");
    +                           } else if (line instanceof Watermark) {
    +                                   long watermark = ((Watermark) 
line).getTimestamp();
    +
    +                                   Assert.assertEquals(nextTimestamp - 
(nextTimestamp % watermarkInterval), watermark);
    +                                   Assert.assertTrue(watermark > 
lastSeenWatermark);
    +                                   watermarkCounter++;
    +
    +                                   lastSeenWatermark = watermark;
    +                           } else {
    +                                   Assert.fail("Unknown element in the 
list.");
    +                           }
    +                   }
    +
    +                   // clean the output to be ready for the next split
    +                   tester.getOutput().clear();
    +           }
    +
    +           // now we are processing one split after the other,
    +           // so all the elements must be here by now.
    +           Assert.assertEquals(NO_OF_FILES * LINES_PER_FILE, lineCounter);
    +
    +           // because we expect one watermark per split.
    +           Assert.assertEquals(NO_OF_FILES, watermarkCounter);
    --- End diff --
    
    This should be `Assert.assertEquals(splits.length, watermarkCounter);`.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] flink pull request #2593: [FLINK-4329] Fix Streaming File Source Timestamps/...

Reply via email to