[ 
https://issues.apache.org/jira/browse/FLINK-8997?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16683414#comment-16683414
 ] 

ASF GitHub Bot commented on FLINK-8997:
---------------------------------------

kl0u commented on a change in pull request #7039: [FLINK-8997] Added sliding 
window aggregation to datastream test job
URL: https://github.com/apache/flink/pull/7039#discussion_r232569338
 
 

 ##########
 File path: 
flink-end-to-end-tests/flink-datastream-allround-test/src/main/java/org/apache/flink/streaming/tests/SlidingWindowCheckMapper.java
 ##########
 @@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.tests;
+
+import org.apache.flink.api.common.functions.RichFlatMapFunction;
+import org.apache.flink.api.common.state.ValueState;
+import org.apache.flink.api.common.state.ValueStateDescriptor;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.typeutils.ListTypeInfo;
+import org.apache.flink.api.java.typeutils.TupleTypeInfo;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.util.Collector;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * This mapper validates sliding event time window. It checks each event 
belongs to appropriate number of consecutive
+ * windows.
+ */
+public class SlidingWindowCheckMapper extends 
RichFlatMapFunction<Tuple2<Integer, List<Event>>, String> {
+
+       private static final long serialVersionUID = -744070793650644485L;
+
+       /** This value state tracks previously seen events with the number of 
windows they appeared in. */
+       private transient ValueState<List<Tuple2<Event, Integer>>> 
eventsSeenSoFar;
+
+       private transient ValueState<Long> lastSequenceNumber;
+
+       private final int slideFactor;
+
+       SlidingWindowCheckMapper(int slideFactor) {
+               this.slideFactor = slideFactor;
+       }
+
+       @Override
+       public void open(Configuration parameters) {
+               ValueStateDescriptor<List<Tuple2<Event, Integer>>> 
previousWindowDescriptor =
+                       new ValueStateDescriptor<>("eventsSeenSoFar",
+                               new ListTypeInfo<>(new 
TupleTypeInfo<>(TypeInformation.of(Event.class), BasicTypeInfo.INT_TYPE_INFO)));
+
+               eventsSeenSoFar = 
getRuntimeContext().getState(previousWindowDescriptor);
+
+               ValueStateDescriptor<Long> lastSequenceNumberDescriptor =
+                       new ValueStateDescriptor<>("lastSequenceNumber", 
BasicTypeInfo.LONG_TYPE_INFO);
+
+               lastSequenceNumber = 
getRuntimeContext().getState(lastSequenceNumberDescriptor);
+       }
+
+       @Override
+       public void flatMap(Tuple2<Integer, List<Event>> value, 
Collector<String> out) throws Exception {
+               List<Tuple2<Event, Integer>> previousWindowValues = 
Optional.ofNullable(eventsSeenSoFar.value()).orElseGet(
+                       Collections::emptyList);
+
+               List<Event> newValues = value.f1;
+               Optional<Event> lastEventInWindow = 
verifyWindowContiguity(newValues, out);
+
+               Long lastSequenceNumberSeenSoFar = lastSequenceNumber.value();
+               List<Tuple2<Event, Integer>> newWindows =
+                       verifyPreviousOccurences(previousWindowValues, 
newValues, out, lastSequenceNumberSeenSoFar);
+
+               if (lastEventInWindow.isPresent()) {
+                       updateLastSeenSequenceNumber(lastEventInWindow.get(), 
lastSequenceNumberSeenSoFar);
+               }
+
+               eventsSeenSoFar.update(newWindows);
+       }
+
+       private void updateLastSeenSequenceNumber(
+                       Event lastEventInWindow,
+                       Long lastSequenceNumberSeenSoFar) throws IOException {
+               long lastSequenceNumberInWindow = 
lastEventInWindow.getSequenceNumber();
+               if (lastSequenceNumberSeenSoFar == null || 
lastSequenceNumberInWindow > lastSequenceNumberSeenSoFar) {
+                       lastSequenceNumber.update(lastSequenceNumberInWindow);
+               }
+       }
+
+       /**
+        * Verifies if all values from previous windows appear in the new one. 
Returns union of all events seen so far that
+        * were not seen <b>slideFactor</b> number of times yet.
+        */
+       private List<Tuple2<Event, Integer>> verifyPreviousOccurences(
+                       List<Tuple2<Event, Integer>> previousWindowValues,
+                       List<Event> newValues,
+                       Collector<String> out, Long 
lastSequenceNumberSeenSoFar) {
+               List<Tuple2<Event, Integer>> newEventsSeenSoFar = new 
ArrayList<>();
+               List<Event> seenWindows = new ArrayList<>();
+
+               for (Tuple2<Event, Integer> windowValue : previousWindowValues) 
{
+                       if (!newValues.contains(windowValue.f0)) {
+                               printEventNotSeenAlertMessage(windowValue, 
newValues, out);
+                       } else {
+                               seenWindows.add(windowValue.f0);
+                               
preserveOrDiscardIfSeenSlideFactorTimes(newEventsSeenSoFar, windowValue);
+                       }
+               }
+
+               addNotSeenValues(newEventsSeenSoFar, newValues, seenWindows, 
lastSequenceNumberSeenSoFar, out);
+
+               return newEventsSeenSoFar;
+       }
+
+       private void addNotSeenValues(
+                       List<Tuple2<Event, Integer>> newEventsSeenSoFar,
+                       List<Event> newValues,
+                       List<Event> seenWindows,
+                       Long lastSequenceNumberSeenSoFar,
+                       Collector<String> out) {
+               newValues.stream()
+                       .filter(e -> !seenWindows.contains(e))
+                       .forEach(e -> {
+                               if (lastSequenceNumberSeenSoFar == null || 
e.getSequenceNumber() > lastSequenceNumberSeenSoFar) {
 
 Review comment:
   The
   ```
   if (lastSequenceNumberSeenSoFar == null || e.getSequenceNumber() > 
lastSequenceNumberSeenSoFar) {
                                        newEventsSeenSoFar.add(Tuple2.of(e, 1));
                                } 
   ```
   should go in the method that verifies the sequence numbers which is the 
`verifyWindowContiguity`. I think it seems more related to that, right?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Add sliding window aggregation to the job
> -----------------------------------------
>
>                 Key: FLINK-8997
>                 URL: https://issues.apache.org/jira/browse/FLINK-8997
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Tests
>    Affects Versions: 1.5.0
>            Reporter: Stefan Richter
>            Assignee: Dawid Wysakowicz
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 1.7.0
>
>
> The test job should also test windowing. Sliding windows are probably the 
> most demanding form, so this would be a good pick for the test.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to