This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6a604a4e31a [SPARK-44936][CORE] Simplify the log when Spark 
HybridStore hits the memory limit
6a604a4e31a is described below

commit 6a604a4e31afa8af619a451c1b6b033b3b0eed19
Author: Dongjoon Hyun <dh...@apple.com>
AuthorDate: Wed Aug 23 20:12:58 2023 -0700

    [SPARK-44936][CORE] Simplify the log when Spark HybridStore hits the memory 
limit
    
    ### What changes were proposed in this pull request?
    
    This PR aims to simplify the log when Spark HybridStore hits the memory 
limit.
    
    ### Why are the changes needed?
    
    `HistoryServerMemoryManager.lease` throws `RuntimeException`s frequently 
when the current usage is high.
    
    
https://github.com/apache/spark/blob/d382c6b3aef28bde6adcdf62b7be565ff1152942/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala#L52-L55
    
    In this case, although Apache Spark shows `RuntimeException` as `INFO` 
level log, HybridStore works fine by fallback to disk store. So, there is no 
need to surprise the users with `RuntimeException` in the log. After this PR, 
we will provide a simpler message with the all messages without StrackTrace and 
`RuntimeException`.
    
    **BEFORE**
    ```
    23/08/23 22:40:34 INFO FsHistoryProvider: Failed to create HybridStore for 
spark-xxx/None. Using ROCKSDB.
    java.lang.RuntimeException: Not enough memory to create hybrid store for 
app spark-xxx / None.
            at 
org.apache.spark.deploy.history.HistoryServerMemoryManager.lease(HistoryServerMemoryManager.scala:54)
            at 
org.apache.spark.deploy.history.FsHistoryProvider.createHybridStore(FsHistoryProvider.scala:1256)
            at 
org.apache.spark.deploy.history.FsHistoryProvider.loadDiskStore(FsHistoryProvider.scala:1231)
            at 
org.apache.spark.deploy.history.FsHistoryProvider.getAppUI(FsHistoryProvider.scala:342)
            at 
org.apache.spark.deploy.history.HistoryServer.getAppUI(HistoryServer.scala:199)
            at 
org.apache.spark.deploy.history.ApplicationCache.$anonfun$loadApplicationEntry$2(ApplicationCache.scala:163)
            at 
org.apache.spark.deploy.history.ApplicationCache.time(ApplicationCache.scala:134)
            at 
org.apache.spark.deploy.history.ApplicationCache.org$apache$spark$deploy$history$ApplicationCache$$loadApplicationEntry(ApplicationCache.scala:161)
            at 
org.apache.spark.deploy.history.ApplicationCache$$anon$1.load(ApplicationCache.scala:55)
            at 
org.apache.spark.deploy.history.ApplicationCache$$anon$1.load(ApplicationCache.scala:51)
            at 
org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
            at 
org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
            at 
org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
            at 
org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
            at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000)
            at 
org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
            at 
org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
            at 
org.apache.spark.deploy.history.ApplicationCache.get(ApplicationCache.scala:88)
            at 
org.apache.spark.deploy.history.ApplicationCache.withSparkUI(ApplicationCache.scala:100)
            at 
org.apache.spark.deploy.history.HistoryServer.org$apache$spark$deploy$history$HistoryServer$$loadAppUi(HistoryServer.scala:256)
            at 
org.apache.spark.deploy.history.HistoryServer$$anon$1.doGet(HistoryServer.scala:104)
            at javax.servlet.http.HttpServlet.service(HttpServlet.java:503)
            at javax.servlet.http.HttpServlet.service(HttpServlet.java:590)
            at 
org.sparkproject.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
            at 
org.sparkproject.jetty.servlet.ServletHandler$ChainEnd.doFilter(ServletHandler.java:1656)
            at 
org.apache.spark.ui.HttpSecurityFilter.doFilter(HttpSecurityFilter.scala:95)
            at 
org.sparkproject.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193)
            at 
org.sparkproject.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
            at 
org.sparkproject.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552)
            at 
org.sparkproject.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)
            at 
org.sparkproject.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440)
            at 
org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)
            at 
org.sparkproject.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505)
            at 
org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)
            at 
org.sparkproject.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355)
            at 
org.sparkproject.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
            at 
org.sparkproject.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:772)
            at 
org.sparkproject.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:234)
            at 
org.sparkproject.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
            at org.sparkproject.jetty.server.Server.handle(Server.java:516)
            at 
org.sparkproject.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487)
            at 
org.sparkproject.jetty.server.HttpChannel.dispatch(HttpChannel.java:732)
            at 
org.sparkproject.jetty.server.HttpChannel.handle(HttpChannel.java:479)
            at 
org.sparkproject.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
            at 
org.sparkproject.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
            at 
org.sparkproject.jetty.io.FillInterest.fillable(FillInterest.java:105)
            at 
org.sparkproject.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
            at 
org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
            at 
org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
            at 
org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
            at 
org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
            at 
org.sparkproject.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409)
            at 
org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
            at 
org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
            at java.base/java.lang.Thread.run(Thread.java:833)
    ```
    
    **AFTER**
    ```
    23/08/23 15:49:45 INFO FsHistoryProvider: Failed to create HybridStore for 
spark-xxx/None. Using ROCKSDB. Not enough memory to create hybrid store for app 
spark-xxx / None.
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Manually.
    
    ```
    spark.history.fs.logDirectory YOUR_HISTORY_DIR
    spark.history.store.path /tmp/rocksdb
    spark.history.store.hybridStore.enabled true
    spark.history.store.hybridStore.maxMemoryUsage 0g
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #42638 from dongjoon-hyun/SPARK-44936.
    
    Lead-authored-by: Dongjoon Hyun <dh...@apple.com>
    Co-authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../scala/org/apache/spark/deploy/history/FsHistoryProvider.scala    | 5 +++++
 1 file changed, 5 insertions(+)

diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 49b479f3124..7b226137070 100644
--- 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -1230,6 +1230,11 @@ private[history] class FsHistoryProvider(conf: 
SparkConf, clock: Clock)
       try {
         return createHybridStore(dm, appId, attempt, metadata)
       } catch {
+        case e: RuntimeException if e.getMessage != null &&
+            e.getMessage.contains("Not enough memory to create hybrid") =>
+          // Handle exception from `HistoryServerMemoryManager.lease`.
+          logInfo(s"Failed to create HybridStore for 
$appId/${attempt.info.attemptId}." +
+            s" Using $hybridStoreDiskBackend. " + e.getMessage)
         case e: Exception =>
           logInfo(s"Failed to create HybridStore for 
$appId/${attempt.info.attemptId}." +
             s" Using $hybridStoreDiskBackend.", e)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to