mjsax commented on a change in pull request #11252: URL: https://github.com/apache/kafka/pull/11252#discussion_r708758100
########## File path: streams/src/main/java/org/apache/kafka/streams/kstream/internals/KStreamKStreamJoin.java ########## @@ -209,37 +210,39 @@ private void emitNonJoinedOuterRecords(final WindowStore<KeyAndJoinSide<K>, Left // reset to MAX_VALUE in case the store is empty sharedTimeTracker.minTime = Long.MAX_VALUE; - try (final KeyValueIterator<Windowed<KeyAndJoinSide<K>>, LeftOrRightValue> it = store.all()) { + try (final KeyValueIterator<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> it = store.all()) { while (it.hasNext()) { - final KeyValue<Windowed<KeyAndJoinSide<K>>, LeftOrRightValue> record = it.next(); + final KeyValue<TimestampedKeyAndJoinSide<K>, LeftOrRightValue<V1, V2>> record = it.next(); - final Windowed<KeyAndJoinSide<K>> windowedKey = record.key; - final LeftOrRightValue value = record.value; - sharedTimeTracker.minTime = windowedKey.window().start(); + final TimestampedKeyAndJoinSide<K> timestampedKeyAndJoinSide = record.key; + final LeftOrRightValue<V1, V2> value = record.value; + final K key = timestampedKeyAndJoinSide.getKey(); + final long timestamp = timestampedKeyAndJoinSide.getTimestamp(); + sharedTimeTracker.minTime = timestamp; // Skip next records if window has not closed - if (windowedKey.window().start() + joinAfterMs + joinGraceMs >= sharedTimeTracker.streamTime) { + if (timestamp + joinAfterMs + joinGraceMs >= sharedTimeTracker.streamTime) { break; } - final K key = windowedKey.key().getKey(); - final long time = windowedKey.window().start(); - final R nullJoinedValue; if (isLeftSide) { nullJoinedValue = joiner.apply(key, - (V1) value.getLeftValue(), - (V2) value.getRightValue()); + value.getLeftValue(), + value.getRightValue()); } else { nullJoinedValue = joiner.apply(key, - (V1) value.getRightValue(), - (V2) value.getLeftValue()); + (V1) value.getRightValue(), + (V2) value.getLeftValue()); } - context().forward(key, nullJoinedValue, To.all().withTimestamp(time)); + context().forward(key, nullJoinedValue, To.all().withTimestamp(timestamp)); - // Delete the key from the outer window store now it is emitted - store.put(record.key.key(), null, record.key.window().start()); + // blind-delete the key from the outer window store now it is emitted; + // we may delete some values of the same key which has not been iterated yet, + // but since the iterator would still return that key this is fine. + // we do not use the delete() call since that would incur an extra get + store.put(timestampedKeyAndJoinSide, null); Review comment: If we do the blind delete of the full list, is there not potential data loss scenario? Assume you have a list if 2+ left-join candidates in the list. We process the first one, and do the delete, and we crash before processing the others in the list. After restart, we might never emit the left-join result for those record? Thus, would we need to do the delete only after we have exhausted the list, ie, move from key-timestamp to a different key or timestamp ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org