HeartSaVioR commented on code in PR #53911:
URL: https://github.com/apache/spark/pull/53911#discussion_r2801094343


##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBTimestampEncoderOperationsSuite.scala:
##########
@@ -0,0 +1,414 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import java.util.UUID
+
+import scala.util.Random
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.matchers.should.Matchers
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.streaming.runtime.StreamExecution
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.ExtendedSQLTest
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
+
+@ExtendedSQLTest
+class RocksDBTimestampEncoderOperationsSuite extends SharedSparkSession
+  with BeforeAndAfterEach with Matchers {
+
+  // Test schemas
+  private val keySchema = StructType(Seq(
+    StructField("key", StringType, nullable = true),
+    StructField("partitionId", IntegerType, nullable = true)
+  ))
+  private val valueSchema = StructType(Seq(StructField("value", IntegerType, 
nullable = true)))
+
+  // Column family names for testing
+  private val testColFamily = "test_cf"
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    StateStore.stop()
+    require(!StateStore.isMaintenanceRunning)
+    spark.streams.stateStoreCoordinator // initialize the lazy coordinator
+  }
+
+  override def afterEach(): Unit = {
+    StateStore.stop()
+    require(!StateStore.isMaintenanceRunning)
+    super.afterEach()
+  }
+
+  private def newDir(): String = Utils.createTempDir().getCanonicalPath
+
+  // TODO: Address the new state format with Avro and enable the test with 
Avro encoding
+  Seq("unsaferow").foreach { encoding =>
+    Seq("prefix", "postfix").foreach { encoderType =>
+      test(s"Event time as $encoderType: basic put and get operations 
(encoding = $encoding)") {
+        tryWithProviderResource(
+          newStoreProviderWithTimestampEncoder(
+            encoderType = encoderType, dataEncoding = encoding)) { provider =>
+          val store = provider.getStore(0)
+
+          try {
+            // Test put and get
+            val keyWithTimestamp1 = keyAndTimestampToRow("key1", 1, 1000L)
+            val value1 = valueToRow(100)
+
+            store.put(keyWithTimestamp1, value1)
+            val retrievedValue = store.get(keyWithTimestamp1)
+
+            assert(retrievedValue != null)
+            assert(retrievedValue.getInt(0) === 100)
+
+            // Test get with different event time should return null
+            val keyWithTimestamp2 = keyAndTimestampToRow("key1", 1, 2000L)
+            assert(store.get(keyWithTimestamp2) === null)
+
+            // Test with different key should return null
+            val keyWithTimestamp3 = keyAndTimestampToRow("key2", 1, 1000L)
+            assert(store.get(keyWithTimestamp3) === null)
+          } finally {
+            store.abort()
+          }
+        }
+      }
+
+      test(s"Event time as $encoderType: remove operations (encoding = 
$encoding)") {
+        tryWithProviderResource(
+          newStoreProviderWithTimestampEncoder(
+            encoderType = encoderType, dataEncoding = encoding)) { provider =>
+          val store = provider.getStore(0)
+
+          try {
+            val keyWithTimestamp1 = keyAndTimestampToRow("key1", 1, 1000L)
+            val value1 = valueToRow(100)
+
+            // Put and verify
+            store.put(keyWithTimestamp1, value1)
+            assert(store.get(keyWithTimestamp1) != null)
+
+            // Remove and verify
+            store.remove(keyWithTimestamp1)
+            assert(store.get(keyWithTimestamp1) === null)
+
+            // Removing non-existent key should not throw error
+            store.remove(keyAndTimestampToRow("nonexistent", 1, 2000L))
+          } finally {
+            store.abort()
+          }
+        }
+      }
+
+      test(s"Event time as $encoderType: multiple values per key (encoding = 
$encoding)") {
+        tryWithProviderResource(
+          newStoreProviderWithTimestampEncoder(
+            encoderType = encoderType,
+            useMultipleValuesPerKey = true,
+            dataEncoding = encoding)
+        ) { provider =>
+          val store = provider.getStore(0)
+
+          try {
+            val keyWithTimestamp1 = keyAndTimestampToRow("key1", 1, 1000L)
+            val values = Array(valueToRow(100), valueToRow(200), 
valueToRow(300))
+
+            // Test putList
+            store.putList(keyWithTimestamp1, values)
+
+            // Test valuesIterator
+            val retrievedValues =
+              store.valuesIterator(keyWithTimestamp1).map(_.copy()).toList
+            assert(retrievedValues.length === 3)
+            assert(
+              retrievedValues.map(_.getInt(0)).sorted === Array(
+                100,
+                200,
+                300
+              ).sorted
+            )
+
+            // Test with different event time should return empty iterator
+            val keyWithTimestamp2 = keyAndTimestampToRow("key1", 1, 2000L)
+            val emptyIterator = store.valuesIterator(keyWithTimestamp2)
+            assert(!emptyIterator.hasNext)
+          } finally {
+            store.abort()
+          }
+        }
+      }
+
+      test(s"Event time as $encoderType: merge operations (encoding = 
$encoding)") {
+        tryWithProviderResource(
+          newStoreProviderWithTimestampEncoder(
+            encoderType = encoderType,
+            useMultipleValuesPerKey = true,
+            dataEncoding = encoding)
+        ) { provider =>
+          val store = provider.getStore(0)
+
+          try {
+            val keyWithTimestamp1 = keyAndTimestampToRow("key1", 1, 1000L)
+            val value1 = valueToRow(100)
+            val value2 = valueToRow(200)
+
+            // Test merge single values
+            store.merge(keyWithTimestamp1, value1)
+            store.merge(keyWithTimestamp1, value2)
+
+            val retrievedValues =
+              store.valuesIterator(keyWithTimestamp1).map(_.copy()).toList
+            assert(retrievedValues.length === 2)
+            assert(retrievedValues.map(_.getInt(0)).toSet === Set(100, 200))
+
+            // Test mergeList
+            val additionalValues = Array(valueToRow(300), valueToRow(400))
+            store.mergeList(keyWithTimestamp1, additionalValues)
+
+            val allValues = 
store.valuesIterator(keyWithTimestamp1).map(_.copy()).toList
+            assert(allValues.length === 4)
+            assert(allValues.map(_.getInt(0)).toSet === Set(100, 200, 300, 
400))
+          } finally {
+            store.abort()
+          }
+        }
+      }
+
+      test(s"Event time as $encoderType: null value validation (encoding = 
$encoding)") {
+        tryWithProviderResource(
+          newStoreProviderWithTimestampEncoder(
+            encoderType = encoderType, dataEncoding = encoding)) { provider =>
+          val store = provider.getStore(0)
+
+          try {
+            val keyWithTimestamp = keyAndTimestampToRow("key1", 1, 1000L)
+
+            // Test null value should throw exception
+            intercept[IllegalArgumentException] {
+              store.put(keyWithTimestamp, null)
+            }
+          } finally {
+            store.abort()
+          }
+        }
+      }
+    }
+  }
+
+  // TODO: Address the new state format with Avro and enable the test with 
Avro encoding
+  Seq("unsaferow").foreach { encoding =>
+    test(s"Event time as prefix: iterator operations (encoding = $encoding)") {
+      tryWithProviderResource(
+        newStoreProviderWithTimestampEncoder(
+          encoderType = "prefix", dataEncoding = encoding)) { provider =>
+        val store = provider.getStore(0)
+
+        try {
+          val entries = Map(
+            keyAndTimestampToRow("key1", 1, 2000L) -> valueToRow(100),
+            keyAndTimestampToRow("key2", 1, 1000L) -> valueToRow(200),
+            keyAndTimestampToRow("key1", 2, -3000L) -> valueToRow(300)
+          )
+
+          // Put all entries (in non-sorted order)
+          entries.foreach { case (keyAndTimestampRow, value) =>
+            store.put(keyAndTimestampRow, value)
+          }
+
+          // Test iterator - should return all entries ordered by event time
+          val iterator = store.iterator()
+          val results = iterator.map { pair =>
+            assert(pair.key.numFields() === 3) // key fields + timestamp
+
+            val keyString = pair.key.getString(0)
+            val partitionId = pair.key.getInt(1)
+            // The timestamp will be placed at the end of the key row.
+            val timestamp = pair.key.getLong(2)
+            val value = pair.value.getInt(0)
+            (keyString, partitionId, timestamp, value)
+          }.toList
+
+          iterator.close()
+
+          assert(results.length === 3)
+
+          // Verify results are ordered by event time (ascending)
+          val eventTimes = results.map(_._3)
+          assert(
+            eventTimes === Seq(-3000L, 1000L, 2000L),
+            "Results should be ordered by event time"
+          )
+
+          // Verify all expected entries are present
+          val retrievedEntries = results.map {
+            case (key, partId, time, value) =>
+              ((key, partId, time), value)
+          }.toMap
+          assert(retrievedEntries(("key1", 2, -3000L)) === 300)
+          assert(retrievedEntries(("key2", 1, 1000L)) === 200)
+          assert(retrievedEntries(("key1", 1, 2000L)) === 100)
+        } finally {
+          store.abort()
+        }
+      }
+    }
+
+    test(
+      s"Event time as postfix: prefix scan operations (encoding = $encoding)"
+    ) {

Review Comment:
   Cursor does this all the time. Will change.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to