jingz-db commented on code in PR #47107:
URL: https://github.com/apache/spark/pull/47107#discussion_r1668934152


##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala:
##########
@@ -194,43 +207,35 @@ class PrefixKeyScanStateEncoder(
     val prefixKeyEncoded = encodeUnsafeRow(extractPrefixKey(row))
     val remainingEncoded = encodeUnsafeRow(remainingKeyProjection(row))
 
-    val encodedBytes = new Array[Byte](prefixKeyEncoded.length +
-      remainingEncoded.length + 4 + offsetForColFamilyPrefix)
-    encodeColumnFamilyPrefix(encodedBytes, useColumnFamilies)
+    val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
+      prefixKeyEncoded.length + remainingEncoded.length + 4
+    )
 
-    Platform.putInt(encodedBytes, Platform.BYTE_ARRAY_OFFSET + 
offsetForColFamilyPrefix,
-      prefixKeyEncoded.length)
+    Platform.putInt(encodedBytes, startingOffset, prefixKeyEncoded.length)
     Platform.copyMemory(prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-      encodedBytes, Platform.BYTE_ARRAY_OFFSET + 4 + offsetForColFamilyPrefix,
-      prefixKeyEncoded.length)
+      encodedBytes, startingOffset + 4, prefixKeyEncoded.length)
     // NOTE: We don't put the length of remainingEncoded as we can calculate 
later
     // on deserialization.
     Platform.copyMemory(remainingEncoded, Platform.BYTE_ARRAY_OFFSET,
-      encodedBytes,
-      Platform.BYTE_ARRAY_OFFSET + 4 + offsetForColFamilyPrefix + 
prefixKeyEncoded.length,
+      encodedBytes, startingOffset + 4 + prefixKeyEncoded.length,
       remainingEncoded.length)
 
     encodedBytes
   }
 
   override def decodeKey(keyBytes: Array[Byte]): UnsafeRow = {
-    val prefixKeyEncodedLen = Platform.getInt(
-      keyBytes, Platform.BYTE_ARRAY_OFFSET + offsetForColFamilyPrefix)
+    val prefixKeyEncodedLen = Platform.getInt(keyBytes, decodeKeyStartOffset)
     val prefixKeyEncoded = new Array[Byte](prefixKeyEncodedLen)
-    Platform.copyMemory(keyBytes,
-      Platform.BYTE_ARRAY_OFFSET + 4 + offsetForColFamilyPrefix,
-      prefixKeyEncoded,
-      Platform.BYTE_ARRAY_OFFSET, prefixKeyEncodedLen)
+    Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4,
+      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncodedLen)
 
     // Here we calculate the remainingKeyEncodedLen leveraging the length of 
keyBytes
     val remainingKeyEncodedLen = keyBytes.length - 4 - prefixKeyEncodedLen -

Review Comment:
   Thanks for taking a close look :) 
   IIUC, The `prefixKeyEncoded` is part of the original implementation of the 
`PrefixKeyScanStateEncoder.decode` ( the `prefixKeyEncoded` is the length of 
the prefix of the key itself, not virtual column family prefix). So the virtual 
column family prefix is already dealt with in the `decodeKeyStartOffset`.
   
   Though for `decodeKey` I found it hard to rid subclass of dealing with the 
col family prefix completely - e.g. for `remainingKeyEncodedLen` 
[here](https://github.com/apache/spark/blob/95e5f99c1c5aa731f202fff24e824302f1067b2a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala#L233),
 we still need to substract the length of the column family prefix.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to