micheal-o commented on code in PR #51484:
URL: https://github.com/apache/spark/pull/51484#discussion_r2211275566
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala:
##########
@@ -90,7 +90,7 @@ private[sql] class HDFSBackedStateStoreProvider extends
StateStoreProvider with
override def abort(): Unit = {}
override def toString(): String = {
-
s"HDFSReadStateStore[id=(op=${id.operatorId},part=${id.partitionId}),dir=$baseDir]"
+ s"HDFSReadStateStore[stateStoreProviderId=$stateStoreProviderId]"
Review Comment:
why are you using provider id here? as the string representation of the
state store?
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala:
##########
@@ -244,7 +244,7 @@ private[sql] class HDFSBackedStateStoreProvider extends
StateStoreProvider with
}
override def toString(): String = {
-
s"HDFSStateStore[id=(op=${id.operatorId},part=${id.partitionId}),dir=$baseDir]"
+ s"HDFSStateStore[stateStoreProviderId=$stateStoreProviderId]"
Review Comment:
ditto
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala:
##########
@@ -1453,9 +1454,9 @@ abstract class StateStoreSuiteBase[ProviderClass <:
StateStoreProvider]
assert(e.getCondition == "CANNOT_WRITE_STATE_STORE.CANNOT_COMMIT")
if (store.getClass.getName contains ROCKSDB_STATE_STORE) {
- assert(e.getMessage contains "RocksDBStateStore[id=(op=0,part=0)")
+ assert(e.getMessage contains "RocksDBStateStore")
} else {
- assert(e.getMessage contains "HDFSStateStore[id=(op=0,part=0)")
+ assert(e.getMessage contains "HDFSStateStore")
Review Comment:
ditto
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala:
##########
@@ -1453,9 +1454,9 @@ abstract class StateStoreSuiteBase[ProviderClass <:
StateStoreProvider]
assert(e.getCondition == "CANNOT_WRITE_STATE_STORE.CANNOT_COMMIT")
if (store.getClass.getName contains ROCKSDB_STATE_STORE) {
- assert(e.getMessage contains "RocksDBStateStore[id=(op=0,part=0)")
+ assert(e.getMessage contains "RocksDBStateStore")
Review Comment:
why remove the op and part id in the assert?
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala:
##########
@@ -802,9 +806,7 @@ case class StateStoreId(
}
override def toString: String = {
- s"""StateStoreId[ checkpointRootLocation=$checkpointRootLocation,
operatorId=$operatorId,
- | partitionId=$partitionId, storeName=$storeName ]
- |""".stripMargin.replaceAll("\n", "")
+ s"StateStoreId[ operatorId=$operatorId, partitionId=$partitionId,
storeName=$storeName ]"
Review Comment:
My suggestion here is we shouldn't remove the location in the store id
string representation. Might be needed in other case. Might be better to have a
loggingId func separate from toString, to format id specifically for appending
to log. What do you think?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]