tdas commented on a change in pull request #33093:
URL: https://github.com/apache/spark/pull/33093#discussion_r662304369
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
##########
@@ -199,6 +284,49 @@ case class FlatMapGroupsWithStateExec(
}
}
+ /**
+ * Process the new data iterator along with the initial state. The initial
state is applied
+ * before processing the new data for every key. The user defined function
is called only
+ * once for every key that has either initial state or data or both.
+ */
+ def processNewDataWithInitialState(
+ childDataIter: Iterator[InternalRow],
+ initStateIter: Iterator[InternalRow]
+ ): Iterator[InternalRow] = {
+
+ if (!childDataIter.hasNext && !initStateIter.hasNext) return
Iterator.empty
+
+ // Create iterators for the child data and the initial state grouped by
their grouping
+ // attributes.
+ val groupedChildDataIter = GroupedIterator(childDataIter,
groupingAttributes, child.output)
+ val groupedInitialStateIter =
+ GroupedIterator(initStateIter, initialStateGroupAttrs,
initialState.output)
+
+ // Create a CoGroupedIterator that will group the two iterators together
for every key group.
+ new CoGroupedIterator(
+ groupedChildDataIter, groupedInitialStateIter,
groupingAttributes).flatMap {
+ case (keyRow, valueRowIter, initialStateRowIter) =>
+ val keyUnsafeRow = keyRow.asInstanceOf[UnsafeRow]
+ var foundInitialStateForKey = false
+ initialStateRowIter.foreach { initialStateRow =>
+ if (foundInitialStateForKey) {
+ throw new IllegalArgumentException("The initial state provided
contained " +
Review comment:
nit: do not have to do in this PR but is it possible to print the key
that has duplicate.. so that the user can debug?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]