sahnib commented on code in PR #45376:
URL: https://github.com/apache/spark/pull/45376#discussion_r1592763363
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala:
##########
@@ -347,6 +347,28 @@ class IncrementalExecution(
eventTimeWatermarkForEviction =
inputWatermarkForEviction(m.stateInfo.get)
)
+ // UpdateEventTimeColumnExec is used to tag the eventTime column, and
validate
+ // emitted rows adhere to watermark in the output of transformWithState.
+ // Hence, this node shares the same watermark value as
TransformWithStateExec.
+ // However, given that UpdateEventTimeColumnExec does not store any
state, it
+ // does not have any StateInfo. We simply use the StateInfo of
transformWithStateExec
+ // to propagate watermark to both UpdateEventTimeColumnExec and
transformWithStateExec.
+ case UpdateEventTimeColumnExec(eventTime, delay, None,
+ SerializeFromObjectExec(serializer,
+ t: TransformWithStateExec)) if t.stateInfo.isDefined =>
+
+ val stateInfo = t.stateInfo.get
+ val eventTimeWatermarkForLateEvents =
inputWatermarkForLateEvents(stateInfo)
+ val eventTimeWatermarkForEviction =
inputWatermarkForLateEvents(stateInfo)
+
+ UpdateEventTimeColumnExec(eventTime, delay,
eventTimeWatermarkForEviction,
Review Comment:
Renamed
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala:
##########
@@ -347,6 +347,28 @@ class IncrementalExecution(
eventTimeWatermarkForEviction =
inputWatermarkForEviction(m.stateInfo.get)
)
+ // UpdateEventTimeColumnExec is used to tag the eventTime column, and
validate
+ // emitted rows adhere to watermark in the output of transformWithState.
+ // Hence, this node shares the same watermark value as
TransformWithStateExec.
+ // However, given that UpdateEventTimeColumnExec does not store any
state, it
+ // does not have any StateInfo. We simply use the StateInfo of
transformWithStateExec
+ // to propagate watermark to both UpdateEventTimeColumnExec and
transformWithStateExec.
+ case UpdateEventTimeColumnExec(eventTime, delay, None,
+ SerializeFromObjectExec(serializer,
+ t: TransformWithStateExec)) if t.stateInfo.isDefined =>
+
+ val stateInfo = t.stateInfo.get
+ val eventTimeWatermarkForLateEvents =
inputWatermarkForLateEvents(stateInfo)
+ val eventTimeWatermarkForEviction =
inputWatermarkForLateEvents(stateInfo)
+
+ UpdateEventTimeColumnExec(eventTime, delay,
eventTimeWatermarkForEviction,
Review Comment:
Fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]