vanzin commented on a change in pull request #27208: [SPARK-30481][CORE] 
Integrate event log compactor into Spark History Server
URL: https://github.com/apache/spark/pull/27208#discussion_r369671282
 
 

 ##########
 File path: 
core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
 ##########
 @@ -795,15 +800,42 @@ private[history] class FsHistoryProvider(conf: 
SparkConf, clock: Clock)
         // mean the end event is before the configured threshold, so call the 
method again to
         // re-parse the whole log.
         logInfo(s"Reparsing $logPath since end event was not found.")
-        doMergeApplicationListing(reader, scanTime, enableOptimizations = 
false)
+        doMergeApplicationListing(reader, scanTime, enableOptimizations = 
false,
+          lastEvaluatedForCompaction)
 
       case _ =>
         // If the app hasn't written down its app ID to the logs, still record 
the entry in the
         // listing db, with an empty ID. This will make the log eligible for 
deletion if the app
         // does not make progress after the configured max log age.
         listing.write(
           LogInfo(logPath.toString(), scanTime, LogType.EventLogs, None, None,
-            reader.fileSizeForLastIndex, reader.lastIndex, reader.completed))
+            reader.fileSizeForLastIndex, reader.lastIndex, 
lastEvaluatedForCompaction,
+            reader.completed))
+    }
+  }
+
+  private def compact(reader: EventLogFileReader): Unit = {
+    val rootPath = reader.rootPath
+    try {
+      reader.lastIndex match {
+        case Some(lastIndex) =>
+          try {
+            val info = listing.read(classOf[LogInfo], reader.rootPath.toString)
+            if (info.lastEvaluatedForCompaction.isEmpty ||
+              info.lastEvaluatedForCompaction.get < lastIndex) {
 
 Review comment:
   indent more

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to