This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 16a7cda  [SPARK-33790][CORE][3.1] Reduce the rpc call of getFileStatus 
in SingleFileEventLogFileReader
16a7cda is described below

commit 16a7cdae647cbad587f47468ca61e694c8907de9
Author: sychen <syc...@ctrip.com>
AuthorDate: Sat Jan 16 00:49:30 2021 +0900

    [SPARK-33790][CORE][3.1] Reduce the rpc call of getFileStatus in 
SingleFileEventLogFileReader
    
    ### What changes were proposed in this pull request?
    `FsHistoryProvider#checkForLogs` already has `FileStatus` when constructing 
`SingleFileEventLogFileReader`, and there is no need to get the `FileStatus` 
again when `SingleFileEventLogFileReader#fileSizeForLastIndex`.
    
    ### Why are the changes needed?
    This can reduce a lot of rpc calls and improve the speed of the history 
server.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    exist ut
    
    Closes #31186 from HeartSaVioR/SPARK-33790-branch-3.1.
    
    Authored-by: sychen <syc...@ctrip.com>
    Signed-off-by: HyukjinKwon <gurwls...@apache.org>
---
 .../org/apache/spark/deploy/history/EventLogFileReaders.scala      | 7 ++++---
 project/MimaExcludes.scala                                         | 5 ++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
index 30ff2c3..6fe3a7b 100644
--- 
a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
+++ 
b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -116,7 +116,7 @@ object EventLogFileReader {
 
   def apply(fs: FileSystem, status: FileStatus): Option[EventLogFileReader] = {
     if (isSingleEventLog(status)) {
-      Some(new SingleFileEventLogFileReader(fs, status.getPath))
+      Some(new SingleFileEventLogFileReader(fs, status.getPath, 
Option(status)))
     } else if (isRollingEventLogs(status)) {
       Some(new RollingEventLogFilesFileReader(fs, status.getPath))
     } else {
@@ -166,8 +166,9 @@ object EventLogFileReader {
  */
 private[history] class SingleFileEventLogFileReader(
     fs: FileSystem,
-    path: Path) extends EventLogFileReader(fs, path) {
-  private lazy val status = fileSystem.getFileStatus(rootPath)
+    path: Path,
+    maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, 
path) {
+  private lazy val status = 
maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath))
 
   override def lastIndex: Option[Long] = None
 
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index c29dd9e..820eda7 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -105,7 +105,10 @@ object MimaExcludes {
     
ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"),
 
     // [SPARK-32879] Pass SparkSession.Builder options explicitly to 
SparkSession
-    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this")
+    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"),
+
+    // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in 
SingleFileEventLogFileReader
+    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this")
   )
 
   // Exclude rules for 3.0.x


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to