baohe-zhang commented on a change in pull request #28412: URL: https://github.com/apache/spark/pull/28412#discussion_r437598233
########## File path: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala ########## @@ -1197,6 +1213,78 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) KVUtils.open(newStorePath, metadata) } + private def createHybridStore( + dm: HistoryServerDiskManager, + appId: String, + attempt: AttemptInfoWrapper, + metadata: AppStatusStoreMetadata): KVStore = { + + var retried = false + var hybridStore: HybridStore = null + while (hybridStore == null) { + val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath), + attempt.lastIndex) + val isCompressed = reader.compressionCodec.isDefined + + // Throws an exception if the memory space is not enough + memoryManager.lease(appId, attempt.info.attemptId, reader.totalSize, isCompressed) + + logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...") + val lease = dm.lease(reader.totalSize, isCompressed) + val isLeaseRolledBack = new java.util.concurrent.atomic.AtomicBoolean(false) + var store: HybridStore = null + try { + store = new HybridStore() + val levelDB = KVUtils.open(lease.tmpPath, metadata) + store.setLevelDB(levelDB) + rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime()) + + // Start the background thread to dump data to levelDB when writing to + // InMemoryStore is completed. + store.switchToLevelDB(new HybridStore.SwitchToLevelDBListener { Review comment: I would prefer to keep store.switchToLevelDB() in the try block and remove isLeaseRolledBack variable. If we move it out of the try block, we would need one more variable to track whether we have encountered exceptions during the rebuildAppStore(), otherwise, store.switchToLevelDB() will be executed when the rebuildAppStore() is failed due to IOException. What do you think? I added a closed variable to indicate whether the hybrid store is closed, and not to start the background thread when the closed is true. This can address the issue above. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org