This is an automated email from the ASF dual-hosted git repository.

nicholasjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 1d3558bd1 [CELEBORN-1385] HttpServer support idle timeout 
configuration of Jetty
1d3558bd1 is described below

commit 1d3558bd143b4f690c8f615e3bae6a4c539f607d
Author: SteNicholas <[email protected]>
AuthorDate: Sun Apr 14 12:40:57 2024 +0800

    [CELEBORN-1385] HttpServer support idle timeout configuration of Jetty
    
    ### What changes were proposed in this pull request?
    
    Introduce `celeborn.master.http.idleTimeout` and 
`celeborn.worker.http.idleTimeout` to support idle timeout configuration of 
Jetty for `HttpServer`.
    
    ### Why are the changes needed?
    
    `ServerConnector` supports HTTP idle timeout configuration via 
`jetty.http.idleTimeout`, of which default value is 30000ms that is configured 
as `jetty.http.idleTimeout=300000`. `HttpServer` should also support idle 
timeout configuration of Jetty, which timeout is as follows:
    
    ```
    2024-04-12 16:04:00,926 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.io.IdleTimeout -IdleTimeout.java(161) 
-SocketChannelEndPoint567d3f82{l=/127.0.0.1:9097,r=/127.0.0.1:35276,OPEN,fill=FI,flush=-,to=29999/30000}{io=1/1,kio=1,kro=1}->HttpConnection2f88da0c[p=HttpParser{s=START,0
 of 
-1},g=HttpGenerator796c3666{s=START}]=>HttpChannelOverHttp63815646{s=HttpChannelState5c192497{s=IDLE
 rs=BLOCKING os=OPEN is=IDLE awp=false se=false i=true 
al=0},r=5,c=false/false,a=IDLE,uri=null,age [...]
    2024-04-12 16:04:00,927 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.io.IdleTimeout -IdleTimeout.java(161) 
-SocketChannelEndPoint567d3f82{l=/127.0.0.1:9097,r=/127.0.0.1:35276,OPEN,fill=FI,flush=-,to=30001/30000}{io=1/1,kio=1,kro=1}->HttpConnection2f88da0c[p=HttpParser{s=START,0
 of 
-1},g=HttpGenerator796c3666{s=START}]=>HttpChannelOverHttp63815646{s=HttpChannelState5c192497{s=IDLE
 rs=BLOCKING os=OPEN is=IDLE awp=false se=false i=true 
al=0},r=5,c=false/false,a=IDLE,uri=null,age [...]
    2024-04-12 16:04:00,927 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.io.IdleTimeout -IdleTimeout.java(168) 
-SocketChannelEndPoint567d3f82{l=/127.0.0.1:9097,r=/127.0.0.1:35276,OPEN,fill=FI,flush=-,to=30001/30000}{io=1/1,kio=1,kro=1}->HttpConnection2f88da0c[p=HttpParser{s=START,0
 of 
-1},g=HttpGenerator796c3666{s=START}]=>HttpChannelOverHttp63815646{s=HttpChannelState5c192497{s=IDLE
 rs=BLOCKING os=OPEN is=IDLE awp=false se=false i=true 
al=0},r=5,c=false/false,a=IDLE,uri=null,age [...]
    2024-04-12 16:04:00,927 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.io.FillInterest -FillInterest.java(136) -onFail 
FillInterest6cc48840{AC.ReadCB2f88da0c{HttpConnection2f88da0c::SocketChannelEndPoint567d3f82{l=/127.0.0.1:9097,r=/127.0.0.1:35276,OPEN,fill=FI,flush=-,to=30001/30000}{io=1/1,kio=1,kro=1}->HttpConnection2f88da0c[p=HttpParser{s=START,0
 of 
-1},g=HttpGenerator796c3666{s=START}]=>HttpChannelOverHttp63815646{s=HttpChannelState5c192497{s=IDLE
 rs=BLOCKING os=OPEN is=ID [...]
    java.util.concurrent.TimeoutException: Idle timeout expired: 30001/30000 ms
        at 
org.eclipse.jetty.io.IdleTimeout.checkIdleTimeout(IdleTimeout.java:171) 
~[jetty-io-9.4.52.v20230823.jar:9.4.52.v20230823]
        at org.eclipse.jetty.io.IdleTimeout.idleCheck(IdleTimeout.java:113) 
~[jetty-io-9.4.52.v20230823.jar:9.4.52.v20230823]
        at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) 
~[?:1.8.0_162]
        at java.util.concurrent.FutureTask.run(FutureTask.java:266) 
~[?:1.8.0_162]
        at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
 ~[?:1.8.0_162]
        at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
 ~[?:1.8.0_162]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
~[?:1.8.0_162]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
~[?:1.8.0_162]
        at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_162]
    2024-04-12 16:04:00,927 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.http.HttpParser -HttpParser.java(1883) -close 
HttpParser{s=START,0 of -1}
    2024-04-12 16:04:00,927 [DEBUG] [master-JettyScheduler-1] - 
org.eclipse.jetty.http.HttpParser -HttpParser.java(1912) -START --> CLOSE
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    No.
    
    Closes #2455 from SteNicholas/CELEBORN-1385.
    
    Authored-by: SteNicholas <[email protected]>
    Signed-off-by: SteNicholas <[email protected]>
---
 .../org/apache/celeborn/common/CelebornConf.scala     | 19 +++++++++++++++++++
 docs/configuration/master.md                          |  1 +
 docs/configuration/worker.md                          |  1 +
 .../apache/celeborn/server/common/HttpService.scala   | 12 +++++++++++-
 .../celeborn/server/common/http/HttpServer.scala      |  9 ++++++++-
 5 files changed, 40 insertions(+), 2 deletions(-)

diff --git 
a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala 
b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
index 6ab67fb89..d6acd09a4 100644
--- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
+++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala
@@ -590,6 +590,8 @@ class CelebornConf(loadDefaults: Boolean) extends Cloneable 
with Logging with Se
 
   def masterHttpStopTimeout: Long = get(MASTER_HTTP_STOP_TIMEOUT)
 
+  def masterHttpIdleTimeout: Long = get(MASTER_HTTP_IDLE_TIMEOUT)
+
   def haEnabled: Boolean = get(HA_ENABLED)
 
   def haMasterNodeId: Option[String] = get(HA_MASTER_NODE_ID)
@@ -682,6 +684,7 @@ class CelebornConf(loadDefaults: Boolean) extends Cloneable 
with Logging with Se
   def workerHttpPort: Int = get(WORKER_HTTP_PORT)
   def workerHttpMaxWorkerThreads: Int = get(WORKER_HTTP_MAX_WORKER_THREADS)
   def workerHttpStopTimeout: Long = get(WORKER_HTTP_STOP_TIMEOUT)
+  def workerHttpIdleTimeout: Long = get(WORKER_HTTP_IDLE_TIMEOUT)
   def workerRpcPort: Int = get(WORKER_RPC_PORT)
   def workerPushPort: Int = get(WORKER_PUSH_PORT)
   def workerFetchPort: Int = get(WORKER_FETCH_PORT)
@@ -2001,6 +2004,14 @@ object CelebornConf extends Logging {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("5s")
 
+  val MASTER_HTTP_IDLE_TIMEOUT: ConfigEntry[Long] =
+    buildConf("celeborn.master.http.idleTimeout")
+      .categories("master")
+      .version("0.5.0")
+      .doc("Master http server idle timeout.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("30s")
+
   val HA_ENABLED: ConfigEntry[Boolean] =
     buildConf("celeborn.master.ha.enabled")
       .withAlternative("celeborn.ha.enabled")
@@ -2576,6 +2587,14 @@ object CelebornConf extends Logging {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("5s")
 
+  val WORKER_HTTP_IDLE_TIMEOUT: ConfigEntry[Long] =
+    buildConf("celeborn.worker.http.idleTimeout")
+      .categories("worker")
+      .version("0.5.0")
+      .doc("Worker http server idle timeout.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("30s")
+
   val WORKER_RPC_PORT: ConfigEntry[Int] =
     buildConf("celeborn.worker.rpc.port")
       .categories("worker")
diff --git a/docs/configuration/master.md b/docs/configuration/master.md
index 582f3f8b3..97aae4333 100644
--- a/docs/configuration/master.md
+++ b/docs/configuration/master.md
@@ -44,6 +44,7 @@ license: |
 | celeborn.master.heartbeat.worker.timeout | 120s | false | Worker heartbeat 
timeout. | 0.3.0 | celeborn.worker.heartbeat.timeout | 
 | celeborn.master.host | &lt;localhost&gt; | false | Hostname for master to 
bind. | 0.2.0 |  | 
 | celeborn.master.http.host | &lt;localhost&gt; | false | Master's http host. 
| 0.4.0 | 
celeborn.metrics.master.prometheus.host,celeborn.master.metrics.prometheus.host 
| 
+| celeborn.master.http.idleTimeout | 30s | false | Master http server idle 
timeout. | 0.5.0 |  | 
 | celeborn.master.http.maxWorkerThreads | 200 | false | Maximum number of 
threads in the master http worker thread pool. | 0.5.0 |  | 
 | celeborn.master.http.port | 9098 | false | Master's http port. | 0.4.0 | 
celeborn.metrics.master.prometheus.port,celeborn.master.metrics.prometheus.port 
| 
 | celeborn.master.http.stopTimeout | 5s | false | Master http server stop 
timeout. | 0.5.0 |  | 
diff --git a/docs/configuration/worker.md b/docs/configuration/worker.md
index 63bd69b7a..93cbae431 100644
--- a/docs/configuration/worker.md
+++ b/docs/configuration/worker.md
@@ -85,6 +85,7 @@ license: |
 | celeborn.worker.graceful.shutdown.saveCommittedFileInfo.sync | false | false 
| Whether to call sync method to save committed file infos into Level DB to 
handle OS crash. | 0.3.1 |  | 
 | celeborn.worker.graceful.shutdown.timeout | 600s | false | The worker's 
graceful shutdown timeout time. | 0.2.0 |  | 
 | celeborn.worker.http.host | &lt;localhost&gt; | false | Worker's http host. 
| 0.4.0 | 
celeborn.metrics.worker.prometheus.host,celeborn.worker.metrics.prometheus.host 
| 
+| celeborn.worker.http.idleTimeout | 30s | false | Worker http server idle 
timeout. | 0.5.0 |  | 
 | celeborn.worker.http.maxWorkerThreads | 200 | false | Maximum number of 
threads in the worker http worker thread pool. | 0.5.0 |  | 
 | celeborn.worker.http.port | 9096 | false | Worker's http port. | 0.4.0 | 
celeborn.metrics.worker.prometheus.port,celeborn.worker.metrics.prometheus.port 
| 
 | celeborn.worker.http.stopTimeout | 5s | false | Worker http server stop 
timeout. | 0.5.0 |  | 
diff --git 
a/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala 
b/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
index 540b3eadd..f11d03092 100644
--- a/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
+++ b/service/src/main/scala/org/apache/celeborn/server/common/HttpService.scala
@@ -181,7 +181,8 @@ abstract class HttpService extends Service with Logging {
       httpHost(),
       httpPort(),
       httpMaxWorkerThreads(),
-      httpStopTimeout())
+      httpStopTimeout(),
+      httpIdleTimeout())
     httpServer.start()
     startInternal()
     // block until the HTTP server is started, otherwise, we may get
@@ -228,6 +229,15 @@ abstract class HttpService extends Service with Logging {
     }
   }
 
+  private def httpIdleTimeout(): Long = {
+    serviceName match {
+      case Service.MASTER =>
+        conf.masterHttpIdleTimeout
+      case Service.WORKER =>
+        conf.workerHttpIdleTimeout
+    }
+  }
+
   def connectionUrl: String = {
     httpServer.getServerUri
   }
diff --git 
a/service/src/main/scala/org/apache/celeborn/server/common/http/HttpServer.scala
 
b/service/src/main/scala/org/apache/celeborn/server/common/http/HttpServer.scala
index 8592d696c..8dc4e5136 100644
--- 
a/service/src/main/scala/org/apache/celeborn/server/common/http/HttpServer.scala
+++ 
b/service/src/main/scala/org/apache/celeborn/server/common/http/HttpServer.scala
@@ -107,7 +107,13 @@ private[celeborn] case class HttpServer(
 
 object HttpServer {
 
-  def apply(role: String, host: String, port: Int, poolSize: Int, stopTimeout: 
Long): HttpServer = {
+  def apply(
+      role: String,
+      host: String,
+      port: Int,
+      poolSize: Int,
+      stopTimeout: Long,
+      idleTimeout: Long): HttpServer = {
     val pool = new QueuedThreadPool(math.max(poolSize, 8))
     pool.setName(s"$role-JettyThreadPool")
     pool.setDaemon(true)
@@ -137,6 +143,7 @@ object HttpServer {
     connector.setReuseAddress(!SystemUtils.IS_OS_WINDOWS)
     connector.setAcceptQueueSize(math.min(connector.getAcceptors, 8))
     connector.setStopTimeout(stopTimeout)
+    connector.setIdleTimeout(idleTimeout)
 
     new HttpServer(role, server, connector, collection)
   }

Reply via email to