HeartSaVioR commented on code in PR #45038:
URL: https://github.com/apache/spark/pull/45038#discussion_r1487155443
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -69,19 +79,25 @@ private[sql] class RocksDBStateStoreProvider
verify(state == UPDATING, "Cannot put after already committed or
aborted")
verify(key != null, "Key cannot be null")
require(value != null, "Cannot put a null value")
- rocksDB.put(encoder.encodeKey(key), encoder.encodeValue(value),
colFamilyName)
+ val kvEncoder = keyValueEncoderMap.get(colFamilyName)
+ rocksDB.put(kvEncoder._1.encodeKey(key),
+ kvEncoder._2.encodeValue(value), colFamilyName)
}
override def remove(key: UnsafeRow, colFamilyName: String): Unit = {
verify(state == UPDATING, "Cannot remove after already committed or
aborted")
verify(key != null, "Key cannot be null")
- rocksDB.remove(encoder.encodeKey(key), colFamilyName)
+ val kvEncoder = keyValueEncoderMap.get(colFamilyName)
+ rocksDB.remove(kvEncoder._1.encodeKey(key), colFamilyName)
}
override def iterator(colFamilyName: String): Iterator[UnsafeRowPair] = {
+ val kvEncoder = keyValueEncoderMap.get(colFamilyName)
rocksDB.iterator(colFamilyName).map { kv =>
- val rowPair = encoder.decode(kv)
- if (!isValidated && rowPair.value != null) {
+ val rowPair = new UnsafeRowPair()
Review Comment:
same here (sorry I reviewed backward)
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -215,7 +240,9 @@ private[sql] class RocksDBStateStoreProvider
(keySchema.length > numColsPrefixKey), "The number of columns in the key
must be " +
"greater than the number of columns for prefix key!")
- this.encoder = RocksDBStateEncoder.getEncoder(keySchema, valueSchema,
numColsPrefixKey)
+ keyValueEncoderMap.putIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME,
Review Comment:
(Maybe microbenchmark could tell that this could regress for default column
family only - map lookup with carefully crafted lock operation in every op,
though I'd rather not concern before we see actual regression.)
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -287,7 +314,8 @@ private[sql] class RocksDBStateStoreProvider
useColumnFamilies)
}
- @volatile private var encoder: RocksDBStateEncoder = _
+ @volatile private var keyValueEncoderMap = new
java.util.concurrent.ConcurrentHashMap[String,
Review Comment:
nit: could be `val`, and also probably no need to be `@volatile`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]