anishshri-db commented on code in PR #41099:
URL: https://github.com/apache/spark/pull/41099#discussion_r1190249907
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -286,44 +322,49 @@ class RocksDB(
*/
def commit(): Long = {
val newVersion = loadedVersion + 1
- val checkpointDir = createTempDir("checkpoint")
- var rocksDBBackgroundThreadPaused = false
try {
- // Make sure the directory does not exist. Native RocksDB fails if the
directory to
- // checkpoint exists.
- Utils.deleteRecursively(checkpointDir)
logInfo(s"Flushing updates for $newVersion")
- val flushTimeMs = timeTakenMs { db.flush(flushOptions) }
-
- val compactTimeMs = if (conf.compactOnCommit) {
Review Comment:
We still need to retain this option for the non changelog checkpointing case
right ? Otherwise, where would the `compactOnCommit` option be used ?
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala:
##########
@@ -286,44 +322,49 @@ class RocksDB(
*/
def commit(): Long = {
val newVersion = loadedVersion + 1
- val checkpointDir = createTempDir("checkpoint")
- var rocksDBBackgroundThreadPaused = false
try {
- // Make sure the directory does not exist. Native RocksDB fails if the
directory to
- // checkpoint exists.
- Utils.deleteRecursively(checkpointDir)
logInfo(s"Flushing updates for $newVersion")
- val flushTimeMs = timeTakenMs { db.flush(flushOptions) }
-
- val compactTimeMs = if (conf.compactOnCommit) {
- logInfo("Compacting")
- timeTakenMs { db.compactRange() }
- } else 0
- logInfo("Pausing background work")
- val pauseTimeMs = timeTakenMs {
- db.pauseBackgroundWork() // To avoid files being changed while
committing
- rocksDBBackgroundThreadPaused = true
- }
-
- logInfo(s"Creating checkpoint for $newVersion in $checkpointDir")
- val checkpointTimeMs = timeTakenMs {
- val cp = Checkpoint.create(db)
- cp.createCheckpoint(checkpointDir.toString)
+ var flushTimeMs = 0L
+ var checkpointTimeMs = 0L
+ if (shouldCreateSnapshot()) {
+ flushTimeMs = timeTakenMs { db.flush(flushOptions) }
Review Comment:
Lets add a comment to say that flush and create checkpoint will happen every
commit if changelog checkpointing is not enabled ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]