liviazhu commented on code in PR #54298:
URL: https://github.com/apache/spark/pull/54298#discussion_r2806513149


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -1408,6 +1408,30 @@ class RocksDBStateStoreChangeDataReader(
 
   override protected val changelogSuffix: String = "changelog"
 
+  /**
+   * Read the next changelog record, skipping DELETE_RANGE_RECORD entries as 
they cannot
+   * be represented as individual key-value change records in the state change 
data feed.
+   * Returns null if there are no more records.
+   */
+  private def readNextChangelogRecord():
+      (RecordType.Value, Array[Byte], Array[Byte]) = {
+    while (true) {

Review Comment:
   Can we refactor this a little bit to use `while (reader != null)` or 
something so we don't have the funky unreachable statement?



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala:
##########
@@ -1408,6 +1408,30 @@ class RocksDBStateStoreChangeDataReader(
 
   override protected val changelogSuffix: String = "changelog"
 
+  /**
+   * Read the next changelog record, skipping DELETE_RANGE_RECORD entries as 
they cannot
+   * be represented as individual key-value change records in the state change 
data feed.
+   * Returns null if there are no more records.
+   */
+  private def readNextChangelogRecord():
+      (RecordType.Value, Array[Byte], Array[Byte]) = {
+    while (true) {
+      val reader = currentChangelogReader()
+      if (reader == null) {
+        return null
+      }
+      val nextRecord = reader.next()
+      if (nextRecord._1 == RecordType.DELETE_RANGE_RECORD) {
+        logWarning(log"Skipping DELETE_RANGE_RECORD in state change data feed 
" +

Review Comment:
   I don't know if a warning is enough. Should we not fail the query?



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -1438,29 +1442,33 @@ class RocksDB(
    * Delete all keys in the range [beginKey, endKey).
    * Uses RocksDB's native deleteRange for efficient bulk deletion.
    *
-   * @param beginKey The start key of the range (inclusive)
-   * @param endKey   The end key of the range (exclusive)
-   * @param cfName   The column family name
+   * @param beginKey       The start key of the range (inclusive)
+   * @param endKey         The end key of the range (exclusive)
+   * @param cfName         The column family name
+   * @param includesPrefix Whether the keys already include the column family 
prefix.
+   *                       Set to true during changelog replay to avoid 
double-encoding.
    */
   def deleteRange(
       beginKey: Array[Byte],
       endKey: Array[Byte],
-      cfName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Unit = {
+      cfName: String = StateStore.DEFAULT_COL_FAMILY_NAME,
+      includesPrefix: Boolean = false): Unit = {
     updateMemoryUsageIfNeeded()
 
-    val beginKeyWithPrefix = if (useColumnFamilies) {
+    val beginKeyWithPrefix = if (useColumnFamilies && !includesPrefix) {
       encodeStateRowWithPrefix(beginKey, cfName)
     } else {
       beginKey
     }
 
-    val endKeyWithPrefix = if (useColumnFamilies) {
+    val endKeyWithPrefix = if (useColumnFamilies && !includesPrefix) {
       encodeStateRowWithPrefix(endKey, cfName)
     } else {
       endKey
     }
 
     db.deleteRange(writeOptions, beginKeyWithPrefix, endKeyWithPrefix)
+    changelogWriter.foreach(_.deleteRange(beginKeyWithPrefix, 
endKeyWithPrefix))

Review Comment:
   Let's also add a TODO for the metrics update



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to