tdas commented on a change in pull request #33093:
URL: https://github.com/apache/spark/pull/33093#discussion_r661565073
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
##########
@@ -178,6 +257,47 @@ case class FlatMapGroupsWithStateExec(
}
}
+ /**
+ * Process the new data iterator along with the initial state. The initial
state is applied
+ * before processing the new data for every key. The user defined function
is called only
+ * once on the data.
+ */
+ def processNewDataWithInitState(
+ childDataIter: Iterator[InternalRow],
+ initStateIter: Iterator[InternalRow]
+ ): Iterator[InternalRow] = {
+
+ if (!childDataIter.hasNext && !initStateIter.hasNext) return
Iterator.empty
+
+ val groupedChildDataIter = GroupedIterator(childDataIter,
groupingAttributes, child.output)
+ val groupedInitStateIter =
+ GroupedIterator(initStateIter, initStateGroupAttrs,
initialState.output)
+
+ val keyOrderingComparator = GenerateOrdering.generate(
+ groupingAttributes.map(SortOrder(_, Ascending)), groupingAttributes)
+
+ FlatMapGroupsWithStateExec.mergeGroupedIters(
+ groupedChildDataIter,
+ groupedInitStateIter,
+ keyOrderingComparator).flatMap { case (keyRow, valueRowIter,
initStateRowOption) =>
+ val keyUnsafeRow = keyRow.asInstanceOf[UnsafeRow]
+ var foundInitStateForKey = false
Review comment:
isnt it an easier logic to understand (than this var) if you convert the
init state iterator to an option with `take(2)`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]