This is an automated email from the ASF dual-hosted git repository.

zhouky pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new f93be4550 [CELEBORN-1060] Fix the master's http port conflicts with 
rpc port in celeborn default template file
f93be4550 is described below

commit f93be45506d3ec4a0aa2b4cfb9327a05926071cd
Author: xleoken <[email protected]>
AuthorDate: Fri Oct 20 19:26:42 2023 +0800

    [CELEBORN-1060] Fix the master's http port conflicts with rpc port in 
celeborn default template file
    
    ### What changes were proposed in this pull request?
    
    When startup a ha celeborn cluster, will met java.net.BindException if copy 
the default template conf file. The reson is the port of master's http port 
conflicts with rpc port.
    
    It's better to keep these rpc ports are same `9097`
    
    ```
    17:53:24.417 [main] ERROR org.apache.celeborn.service.deploy.master.Master 
- Initialize master failed.
    java.net.BindException: Address already in use
            at sun.nio.ch.Net.bind0(Native Method) ~[?:1.8.0_371]
            at sun.nio.ch.Net.bind(Net.java:438) ~[?:1.8.0_371]
            at sun.nio.ch.Net.bind(Net.java:430) ~[?:1.8.0_371]
            at 
sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:225) 
~[?:1.8.0_371]
            at 
io.netty.channel.socket.nio.NioServerSocketChannel.doBind(NioServerSocketChannel.java:141)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.AbstractChannel$AbstractUnsafe.bind(AbstractChannel.java:562) 
~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.DefaultChannelPipeline$HeadContext.bind(DefaultChannelPipeline.java:1334)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.AbstractChannelHandlerContext.invokeBind(AbstractChannelHandlerContext.java:600)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.AbstractChannelHandlerContext.bind(AbstractChannelHandlerContext.java:579)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.handler.logging.LoggingHandler.bind(LoggingHandler.java:230) 
~[netty-handler-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.AbstractChannelHandlerContext.invokeBind(AbstractChannelHandlerContext.java:602)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.AbstractChannelHandlerContext.bind(AbstractChannelHandlerContext.java:579)
 ~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.channel.DefaultChannelPipeline.bind(DefaultChannelPipeline.java:973) 
~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at io.netty.channel.AbstractChannel.bind(AbstractChannel.java:260) 
~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.bootstrap.AbstractBootstrap$2.run(AbstractBootstrap.java:356) 
~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.concurrent.AbstractEventExecutor.runTask(AbstractEventExecutor.java:174)
 ~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:167)
 ~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:470)
 ~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:569) 
~[netty-transport-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)
 ~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) 
~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at 
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
 ~[netty-common-4.1.93.Final.jar:4.1.93.Final]
            at java.lang.Thread.run(Thread.java:750) ~[?:1.8.0_371]
    
    ```
    
    ### Why are the changes needed?
    
    After patch, will friendly for new players.
    
    ### How was this patch tested?
    
    local test.
    
    Closes #2008 from xleoken/patch6.
    
    Authored-by: xleoken <[email protected]>
    Signed-off-by: zky.zhoukeyong <[email protected]>
---
 conf/celeborn-defaults.conf.template | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/celeborn-defaults.conf.template 
b/conf/celeborn-defaults.conf.template
index c08428fdb..8a9312047 100644
--- a/conf/celeborn-defaults.conf.template
+++ b/conf/celeborn-defaults.conf.template
@@ -26,10 +26,10 @@ celeborn.worker.push.io.threads                  8
 
 celeborn.metrics.enabled                         true
 
-celeborn.worker.storage.dirs                     
/mnt/disk1,/mnt/disk2,/mnt/disk3,/mnt/disk4,/mnt/disk5,/mnt/disk6,/mnt/disk7,/mnt/disk8,/mnt/disk9,/mnt/disk10
+celeborn.worker.storage.dirs                     
/mnt/disk1,/mnt/disk2,/mnt/disk3,/mnt/disk4,/mnt/disk5
 celeborn.worker.http.port                        9096
 
-celeborn.master.endpoints                        
clb-1:9097,clb-2:9098,clb-3:9099
+celeborn.master.endpoints                        
clb-1:9097,clb-2:9097,clb-3:9097
 celeborn.master.http.port                        9098
 
 celeborn.master.ha.enabled                       true
@@ -37,11 +37,11 @@ celeborn.master.ha.node.1.host                   clb-1
 celeborn.master.ha.node.1.port                   9097
 celeborn.master.ha.node.1.ratis.port             9872
 celeborn.master.ha.node.2.host                   clb-2
-celeborn.master.ha.node.2.port                   9098
-celeborn.master.ha.node.2.ratis.port             9873
+celeborn.master.ha.node.2.port                   9097
+celeborn.master.ha.node.2.ratis.port             9872
 celeborn.master.ha.node.3.host                   clb-3
-celeborn.master.ha.node.3.port                   9099
-celeborn.master.ha.node.3.ratis.port             9874
+celeborn.master.ha.node.3.port                   9097
+celeborn.master.ha.node.3.ratis.port             9872
 celeborn.master.ha.ratis.raft.server.storage.dir                           
/mnt/disk1/celeborn_ratis/
 celeborn.master.ha.ratis.raft.server.snapshot.auto.trigger.enabled         true
 celeborn.master.ha.ratis.raft.server.snapshot.auto.trigger.threshold       
200000

Reply via email to