Github user zsxwing commented on a diff in the pull request:
https://github.com/apache/spark/pull/19984#discussion_r158119082
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
---
@@ -266,6 +266,21 @@ class HDFSMetadataLog[T <: AnyRef :
ClassTag](sparkSession: SparkSession, path:
}
}
+ /**
+ * Removes all log entries later than thresholdBatchId (exclusive).
+ */
+ def purgeAfter(thresholdBatchId: Long): Unit = {
+ val batchIds = fileManager.list(metadataPath, batchFilesFilter)
+ .map(f => pathToBatchId(f.getPath))
+
+ for (batchId <- batchIds if batchId > thresholdBatchId) {
+ print(s"AAAAA purging\n")
--- End diff --
nit: remove this
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]