boyuanzz commented on a change in pull request #11922:
URL: https://github.com/apache/beam/pull/11922#discussion_r436072548
##########
File path:
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
##########
@@ -296,6 +299,12 @@ public void accept(WindowedValue input) throws Exception {
/** Only valid during {@code processElement...} methods, null otherwise. */
private WindowedValue<InputT> currentElement;
+ /**
+ * Only valid during {@link #processElementForSizedElementAndRestriction}
and {@link
+ * #processElementForSizedElementAndRestriction}.
Review comment:
Duplicated {@link #processElementForSizedElementAndRestriction}?
##########
File path:
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
##########
@@ -577,26 +586,83 @@ public Instant timestamp(DoFn<InputT, OutputT> doFn) {
switch (pTransform.getSpec().getUrn()) {
case PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN:
this.convertSplitResultToWindowedSplitResult =
- (splitResult, watermarkEstimatorState) ->
- WindowedSplitResult.forRoots(
- WindowedValue.of(
- KV.of(
- currentElement.getValue(),
- KV.of(splitResult.getPrimary(),
currentWatermarkEstimatorState)),
- currentElement.getTimestamp(),
- currentWindow,
- currentElement.getPane()),
- WindowedValue.of(
- KV.of(
- currentElement.getValue(),
- KV.of(splitResult.getResidual(),
watermarkEstimatorState)),
- currentElement.getTimestamp(),
- currentWindow,
- currentElement.getPane()));
+ (splitResult, watermarkEstimatorState) -> {
+ List<BoundedWindow> primaryFullyProcessedWindows =
+ ImmutableList.copyOf(
+ Iterables.limit(
+ currentElement.getWindows(),
currentWindowIterator.previousIndex()));
+ // Advances the iterator consuming the remaining windows.
+ List<BoundedWindow> residualUnprocessedWindows =
+ ImmutableList.copyOf(currentWindowIterator);
Review comment:
`currentWindowIterator .next()`?
##########
File path:
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
##########
@@ -941,20 +1047,59 @@ private Progress getProgress() {
convertSplitResultToWindowedSplitResult.apply(result,
watermarkAndState.getValue());
}
+ List<BundleApplication> primaryRoots = new ArrayList<>();
Review comment:
This also takes care of self checkpoint, right?
##########
File path:
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
##########
@@ -577,26 +586,83 @@ public Instant timestamp(DoFn<InputT, OutputT> doFn) {
switch (pTransform.getSpec().getUrn()) {
case PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN:
this.convertSplitResultToWindowedSplitResult =
- (splitResult, watermarkEstimatorState) ->
- WindowedSplitResult.forRoots(
- WindowedValue.of(
- KV.of(
- currentElement.getValue(),
- KV.of(splitResult.getPrimary(),
currentWatermarkEstimatorState)),
- currentElement.getTimestamp(),
- currentWindow,
- currentElement.getPane()),
- WindowedValue.of(
- KV.of(
- currentElement.getValue(),
- KV.of(splitResult.getResidual(),
watermarkEstimatorState)),
- currentElement.getTimestamp(),
- currentWindow,
- currentElement.getPane()));
+ (splitResult, watermarkEstimatorState) -> {
+ List<BoundedWindow> primaryFullyProcessedWindows =
+ ImmutableList.copyOf(
+ Iterables.limit(
+ currentElement.getWindows(),
currentWindowIterator.previousIndex()));
+ // Advances the iterator consuming the remaining windows.
+ List<BoundedWindow> residualUnprocessedWindows =
+ ImmutableList.copyOf(currentWindowIterator);
+ return WindowedSplitResult.forRoots(
+ primaryFullyProcessedWindows.isEmpty()
+ ? null
+ : WindowedValue.of(
+ KV.of(
+ currentElement.getValue(),
+ KV.of(currentRestriction,
currentWatermarkEstimatorState)),
+ currentElement.getTimestamp(),
+ primaryFullyProcessedWindows,
+ currentElement.getPane()),
+ WindowedValue.of(
+ KV.of(
+ currentElement.getValue(),
+ KV.of(splitResult.getPrimary(),
currentWatermarkEstimatorState)),
+ currentElement.getTimestamp(),
+ currentWindow,
+ currentElement.getPane()),
+ WindowedValue.of(
+ KV.of(
+ currentElement.getValue(),
+ KV.of(splitResult.getResidual(),
watermarkEstimatorState)),
+ currentElement.getTimestamp(),
+ currentWindow,
+ currentElement.getPane()),
+ residualUnprocessedWindows.isEmpty()
+ ? null
+ : WindowedValue.of(
+ KV.of(
+ currentElement.getValue(),
+ KV.of(currentRestriction,
currentWatermarkEstimatorState)),
+ currentElement.getTimestamp(),
+ residualUnprocessedWindows,
+ currentElement.getPane()));
+ };
break;
case
PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN:
this.convertSplitResultToWindowedSplitResult =
(splitResult, watermarkEstimatorState) -> {
+ List<BoundedWindow> primaryFullyProcessedWindows =
+ ImmutableList.copyOf(
+ Iterables.limit(
+ currentElement.getWindows(),
currentWindowIterator.previousIndex()));
+ // Advances the iterator consuming the remaining windows.
+ List<BoundedWindow> residualUnprocessedWindows =
+ ImmutableList.copyOf(currentWindowIterator);
Review comment:
`currentWindowIterator.next()`?
##########
File path:
sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
##########
@@ -632,6 +698,17 @@ public Object restriction() {
}
});
return WindowedSplitResult.forRoots(
+ primaryFullyProcessedWindows.isEmpty()
+ ? null
+ : WindowedValue.of(
+ KV.of(
+ KV.of(
+ currentElement.getValue(),
+ KV.of(currentRestriction,
currentWatermarkEstimatorState)),
+ fullSize),
Review comment:
I'm wondering should the size be `fullSize` * num of windows?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]