tdas commented on a change in pull request #33093:
URL: https://github.com/apache/spark/pull/33093#discussion_r661575428
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
##########
@@ -1243,6 +1244,214 @@ class FlatMapGroupsWithStateSuite extends
StateStoreMetricsTest {
assert(e.getMessage === "The output mode of function should be append or
update")
}
+ import testImplicits._
+
+ val flatMapGroupsWithStateFunc =
+ (key: String, values: Iterator[String], state: GroupState[RunningCount])
=> {
+ val valList = values.toList
+ val count = state.getOption.map(_.count).getOrElse(0L) + valList.size
+ state.update(new RunningCount(count))
+ Iterator((key, valList))
+ }
+
+ Seq("1", "2", "6").foreach { shufflePartitions =>
+ testWithAllStateVersions(s"flatMapGroupsWithState - initial " +
+ s"state - all cases - shuffle partitions ${shufflePartitions}") {
+ withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> shufflePartitions) {
+ // We will test them on different shuffle partition configuration to
make sure the
+ // grouping by key will still work. On higher number of shuffle
partitions its possible
+ // that all keys end up on different partitions.
+ val initialState: Dataset[(String, RunningCount)] = Seq(
+ ("keyInStateAndData-1", new RunningCount(1)),
+ ("keyInStateAndData-2", new RunningCount(1)),
+ ("keyOnlyInState-1", new RunningCount(2)),
+ ("keyOnlyInState-2", new RunningCount(1))
+ ).toDS()
+
+ val inputData = MemoryStream[String]
+ val result =
+ inputData.toDS()
+ .groupByKey(x => x)
+ .flatMapGroupsWithState(
+ Update, GroupStateTimeout.NoTimeout,
initialState)(flatMapGroupsWithStateFunc)
+
+ testStream(result, Update)(
+ AddData(inputData, "keyOnlyInData", "keyInStateAndData-1"),
+ CheckNewAnswer(
+ ("keyOnlyInState-1", ArrayBuffer[String]()),
Review comment:
why array buffer and not just Seq[String]()
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]