Re: [PR] [SPARK-51358] [SS] Introduce snapshot upload lag detection through StateStoreCoordinator [spark]

via GitHub Thu, 20 Mar 2025 10:58:27 -0700


ericm-db commented on code in PR #50123:
URL: https://github.com/apache/spark/pull/50123#discussion_r2006158058



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -966,3 +968,38 @@ class RocksDBStateStoreChangeDataReader(
     }
   }
 }
+
+/**
+ * Class used to relay events reported from a RocksDB instance to the state 
store coordinator.
+ *
+ * We pass this into the RocksDB instance to report specific events like 
snapshot uploads.
+ * This should only be used to report back to the coordinator for metrics and 
monitoring purposes.
+ */
+private[state] case class RocksDBEventListener(
+    queryRunId: String,
+    stateStoreId: StateStoreId,
+    storeConf: StateStoreConf) {
+
+  /** ID of the state store provider managing the RocksDB instance */
+  private val stateStoreProviderId: StateStoreProviderId =
+    StateStoreProviderId(stateStoreId, UUID.fromString(queryRunId))

Review Comment:
   remove queryRunId



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -73,7 +74,9 @@ class RocksDB(
     hadoopConf: Configuration = new Configuration,
     loggingId: String = "",
     useColumnFamilies: Boolean = false,
-    enableStateStoreCheckpointIds: Boolean = false) extends Logging {
+    enableStateStoreCheckpointIds: Boolean = false,
+    eventListener: Option[RocksDBEventListener] = None)
+  extends Logging {

Review Comment:
   nit: move to line above



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -385,6 +385,7 @@ private[sql] class RocksDBStateStoreProvider
     this.useColumnFamilies = useColumnFamilies
     this.stateStoreEncoding = storeConf.stateStoreEncodingFormat
     this.stateSchemaProvider = stateSchemaProvider
+    this.rocksDBEventListener = RocksDBEventListener(getRunId(hadoopConf), 
stateStoreId, storeConf)

Review Comment:
   nit: remove runID if it's unnecessary



##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala:
##########
@@ -26,8 +26,9 @@ import org.apache.spark.{SharedSparkContext, SparkContext, 
SparkFunSuite}
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.classic.SparkSession
 import org.apache.spark.sql.execution.streaming.{MemoryStream, 
StreamingQueryWrapper}
-import org.apache.spark.sql.functions.count
-import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
+import 
org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide,
 RightSide}
+import org.apache.spark.sql.functions.{count, expr}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
 
 class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext 
{

Review Comment:
   Add a test case for the query restart case



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala:
##########
@@ -129,10 +202,25 @@ class StateStoreCoordinatorRef private(rpcEndpointRef: 
RpcEndpointRef) {
  * Class for coordinating instances of [[StateStore]]s loaded in executors 
across the cluster,
  * and get their locations for job scheduling.
  */
-private class StateStoreCoordinator(override val rpcEnv: RpcEnv)
-    extends ThreadSafeRpcEndpoint with Logging {
+private class StateStoreCoordinator(
+    override val rpcEnv: RpcEnv,
+    val sqlConf: SQLConf)
+  extends ThreadSafeRpcEndpoint
+  with Logging {

Review Comment:
   nit: move to line above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-51358] [SS] Introduce snapshot upload lag detection through StateStoreCoordinator [spark]

Reply via email to