HeartSaVioR commented on a change in pull request #26821: 
[SPARK-20656][CORE]Support Incremental parsing of event logs in SHS
URL: https://github.com/apache/spark/pull/26821#discussion_r357000598
 
 

 ##########
 File path: 
core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
 ##########
 @@ -1095,11 +1139,30 @@ private[history] class FsHistoryProvider(conf: 
SparkConf, clock: Clock)
     KVUtils.open(newStorePath, metadata)
   }
 
-  private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
-    val store = new InMemoryStore()
+  private def createInMemoryStore(appId: String, attempt: AttemptInfoWrapper): 
KVStore = {
+    val store = if (isIncrementalParsingEnabled) {
+      storeMap.getOrDefault(appId -> attempt.info.attemptId, new 
InMemoryStore())
+    } else {
+      new InMemoryStore
+    }
     val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
       attempt.lastIndex)
-    rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
+    // Incremental info is valid only if incremental parsing feature is 
enabled.
+    val info: Option[IncrimentalMetaInfo] = try {
+      if (isIncrementalParsingEnabled) {
+        Some(listing.read(classOf[IncrimentalMetaInfo], Array(Some(appId), 
attempt.info.attemptId)))
+      } else None
+    } catch {
+      case _: NoSuchElementException =>
+        val info = IncrimentalMetaInfo(appId, attempt.info.attemptId,
+          fileIndex = 0, lineToSkip = -1)
+        listing.write(info)
+        Some(info)
+    }
+    rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime(), info)
 
 Review comment:
   We may want to discard `store` when there's any exception thrown from 
`rebuildAppStore()`, and then IncrementalMetaInfo also becomes invalid. They 
should be discarded from both storeMap and listing.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to