Github user ankuriitg commented on a diff in the pull request:
https://github.com/apache/spark/pull/22504#discussion_r226718233
--- Diff:
core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala ---
@@ -800,14 +817,33 @@ private[history] class FsHistoryProvider(conf:
SparkConf, clock: Clock)
stale.foreach { log =>
if (log.appId.isEmpty) {
logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
- deleteLog(new Path(log.logPath))
+ deleteLog(fs, new Path(log.logPath))
listing.delete(classOf[LogInfo], log.logPath)
}
}
// Clean the blacklist from the expired entries.
clearBlacklist(CLEAN_INTERVAL_S)
}
+ /**
+ * Delete driver logs from the configured spark dfs dir that exceed the
configured max age
+ */
+ private[history] def cleanDriverLogs(): Unit = Utils.tryLog {
+ val driverLogDir = conf.get(DRIVER_LOG_DFS_DIR)
+ driverLogDir.foreach { dl =>
+ val maxTime = clock.getTimeMillis() -
+ conf.get(MAX_DRIVER_LOG_AGE_S) * 1000
+ val appDirs = driverLogFs.get.listLocatedStatus(new Path(dl))
+ while (appDirs.hasNext()) {
+ val appDirStatus = appDirs.next()
+ if (appDirStatus.getModificationTime() < maxTime) {
+ logInfo(s"Deleting expired driver log for:
${appDirStatus.getPath().getName()}")
+ deleteLog(driverLogFs.get, appDirStatus.getPath())
--- End diff --
Ohh right, the fix is similar to what exists currently. I just wanted to
add the note that logs will be deleted after that time period.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]