ericm-db commented on code in PR #50123:
URL: https://github.com/apache/spark/pull/50123#discussion_r2006158058
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -966,3 +968,38 @@ class RocksDBStateStoreChangeDataReader(
}
}
}
+
+/**
+ * Class used to relay events reported from a RocksDB instance to the state
store coordinator.
+ *
+ * We pass this into the RocksDB instance to report specific events like
snapshot uploads.
+ * This should only be used to report back to the coordinator for metrics and
monitoring purposes.
+ */
+private[state] case class RocksDBEventListener(
+ queryRunId: String,
+ stateStoreId: StateStoreId,
+ storeConf: StateStoreConf) {
+
+ /** ID of the state store provider managing the RocksDB instance */
+ private val stateStoreProviderId: StateStoreProviderId =
+ StateStoreProviderId(stateStoreId, UUID.fromString(queryRunId))
Review Comment:
remove queryRunId
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -73,7 +74,9 @@ class RocksDB(
hadoopConf: Configuration = new Configuration,
loggingId: String = "",
useColumnFamilies: Boolean = false,
- enableStateStoreCheckpointIds: Boolean = false) extends Logging {
+ enableStateStoreCheckpointIds: Boolean = false,
+ eventListener: Option[RocksDBEventListener] = None)
+ extends Logging {
Review Comment:
nit: move to line above
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -385,6 +385,7 @@ private[sql] class RocksDBStateStoreProvider
this.useColumnFamilies = useColumnFamilies
this.stateStoreEncoding = storeConf.stateStoreEncodingFormat
this.stateSchemaProvider = stateSchemaProvider
+ this.rocksDBEventListener = RocksDBEventListener(getRunId(hadoopConf),
stateStoreId, storeConf)
Review Comment:
nit: remove runID if it's unnecessary
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala:
##########
@@ -26,8 +26,9 @@ import org.apache.spark.{SharedSparkContext, SparkContext,
SparkFunSuite}
import org.apache.spark.scheduler.ExecutorCacheTaskLocation
import org.apache.spark.sql.classic.SparkSession
import org.apache.spark.sql.execution.streaming.{MemoryStream,
StreamingQueryWrapper}
-import org.apache.spark.sql.functions.count
-import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
+import
org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide,
RightSide}
+import org.apache.spark.sql.functions.{count, expr}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.util.Utils
class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext
{
Review Comment:
Add a test case for the query restart case
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala:
##########
@@ -129,10 +202,25 @@ class StateStoreCoordinatorRef private(rpcEndpointRef:
RpcEndpointRef) {
* Class for coordinating instances of [[StateStore]]s loaded in executors
across the cluster,
* and get their locations for job scheduling.
*/
-private class StateStoreCoordinator(override val rpcEnv: RpcEnv)
- extends ThreadSafeRpcEndpoint with Logging {
+private class StateStoreCoordinator(
+ override val rpcEnv: RpcEnv,
+ val sqlConf: SQLConf)
+ extends ThreadSafeRpcEndpoint
+ with Logging {
Review Comment:
nit: move to line above
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]