uros-b commented on code in PR #56718:
URL: https://github.com/apache/spark/pull/56718#discussion_r3466585469


##########
sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala:
##########
@@ -1826,6 +1826,29 @@ class RocksDBSuite extends AlsoTestWithRocksDBFeatures 
with SharedSparkSession
     }
   }
 
+  test("RocksDBFileManager: missing snapshot during load reports the available 
versions") {
+    // Loading a snapshot version that has not been uploaded yet (e.g. the 
asynchronous
+    // maintenance thread has not finished uploading it when reading state with
+    // snapshotStartBatchId) should fail with a FileNotFoundException whose 
message lists the
+    // snapshot/changelog files that ARE present, so intermittent failures in 
scheduled jobs are
+    // diagnosable straight from the logs.
+    val hadoopConf = new Configuration()
+    val remoteDir = Utils.createTempDir().toString
+    val fileManager = new RocksDBFileManager(remoteDir, Utils.createTempDir(), 
hadoopConf)
+    val fileMapping = new RocksDBFileMapping()
+    // Upload only snapshot version 1, leaving version 2 absent.
+    saveCheckpointFiles(
+      fileManager, Seq("001.sst" -> 10, "002.sst" -> 20), version = 1, numKeys 
= 10, fileMapping)
+
+    val ex = intercept[FileNotFoundException] {
+      fileManager.loadCheckpointFromDfs(2, Utils.createTempDir(), fileMapping)
+    }
+    assert(ex.getMessage.contains("Failed to load the snapshot file for 
version 2"))
+    assert(ex.getMessage.contains("Files currently present"))
+    // The version-1 snapshot that does exist must be surfaced in the 
diagnostic.
+    assert(ex.getMessage.contains("snapshots=[1.zip]"))
+  }
+

Review Comment:
   Nit regarding coverage gap: the new test only exercises the else-branch 
(loadCheckpointFromDfs with no checkpointUniqueId, local fs.open). The if 
(checkpointUniqueId.isDefined) fm.open (V2/checksum) path is not directly 
tested.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to