zecookiez commented on code in PR #50123:
URL: https://github.com/apache/spark/pull/50123#discussion_r2007884599
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala:
##########
@@ -129,10 +194,24 @@ class StateStoreCoordinatorRef private(rpcEndpointRef:
RpcEndpointRef) {
* Class for coordinating instances of [[StateStore]]s loaded in executors
across the cluster,
* and get their locations for job scheduling.
*/
-private class StateStoreCoordinator(override val rpcEnv: RpcEnv)
- extends ThreadSafeRpcEndpoint with Logging {
+private class StateStoreCoordinator(
+ override val rpcEnv: RpcEnv,
+ val sqlConf: SQLConf)
+ extends ThreadSafeRpcEndpoint with Logging {
private val instances = new mutable.HashMap[StateStoreProviderId,
ExecutorCacheTaskLocation]
+ // Stores the latest snapshot upload event for a specific state store
+ private val stateStoreLatestUploadedSnapshot =
+ new mutable.HashMap[StateStoreId, SnapshotUploadEvent]
+
+ // Default snapshot upload event to use when a provider has never uploaded a
snapshot
+ private val defaultSnapshotUploadEvent = SnapshotUploadEvent(-1, 0)
+
+ // Stores the last timestamp in milliseconds where the coordinator did a
full report on
+ // instances lagging behind on snapshot uploads. The initial timestamp is
defaulted to
+ // 0 milliseconds.
+ private var lastFullSnapshotLagReportTimeMs = 0L
Review Comment:
Yeah that's a good point, I've switched this to keep track of timestamps per
query run ID instead now. Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]