This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 16a7cda [SPARK-33790][CORE][3.1] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader 16a7cda is described below commit 16a7cdae647cbad587f47468ca61e694c8907de9 Author: sychen <syc...@ctrip.com> AuthorDate: Sat Jan 16 00:49:30 2021 +0900 [SPARK-33790][CORE][3.1] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader ### What changes were proposed in this pull request? `FsHistoryProvider#checkForLogs` already has `FileStatus` when constructing `SingleFileEventLogFileReader`, and there is no need to get the `FileStatus` again when `SingleFileEventLogFileReader#fileSizeForLastIndex`. ### Why are the changes needed? This can reduce a lot of rpc calls and improve the speed of the history server. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist ut Closes #31186 from HeartSaVioR/SPARK-33790-branch-3.1. Authored-by: sychen <syc...@ctrip.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../org/apache/spark/deploy/history/EventLogFileReaders.scala | 7 ++++--- project/MimaExcludes.scala | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala index 30ff2c3..6fe3a7b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala @@ -116,7 +116,7 @@ object EventLogFileReader { def apply(fs: FileSystem, status: FileStatus): Option[EventLogFileReader] = { if (isSingleEventLog(status)) { - Some(new SingleFileEventLogFileReader(fs, status.getPath)) + Some(new SingleFileEventLogFileReader(fs, status.getPath, Option(status))) } else if (isRollingEventLogs(status)) { Some(new RollingEventLogFilesFileReader(fs, status.getPath)) } else { @@ -166,8 +166,9 @@ object EventLogFileReader { */ private[history] class SingleFileEventLogFileReader( fs: FileSystem, - path: Path) extends EventLogFileReader(fs, path) { - private lazy val status = fileSystem.getFileStatus(rootPath) + path: Path, + maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, path) { + private lazy val status = maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath)) override def lastIndex: Option[Long] = None diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index c29dd9e..820eda7 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -105,7 +105,10 @@ object MimaExcludes { ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"), // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession - ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this") + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"), + + // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this") ) // Exclude rules for 3.0.x --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org