NEUpanning commented on PR #9181: URL: https://github.com/apache/incubator-gluten/pull/9181#issuecomment-2785116866
@baibaichen could you help to show the log of the failed [ClickHouse CI](https://opencicd.kyligence.com/job/gluten/job/gluten-ci/15516/)? And the failed CI ` run-tpc-test-ubuntu-2204-celeborn` seems unrelated to this PR : ``` 25/04/07 09:45:30 ERROR CelebornShuffleReader: Exception caught when readPartition 72! org.apache.celeborn.common.exception.CelebornIOException: createPartitionReader failed! PartitionLocation[ id-epoch:72-0 host-rpcPort-pushPort-fetchPort-replicatePort:172.18.0.2-41173-42161-33177-42025 mode:PRIMARY peer:(empty) storage hint:StorageInfo{type=HDD, mountPoint='', finalResult=true, filePath=} mapIdBitMap:null] at org.apache.celeborn.client.read.CelebornInputStream$CelebornInputStreamImpl.createReaderWithRetry(CelebornInputStream.java:370) at org.apache.celeborn.client.read.CelebornInputStream$CelebornInputStreamImpl.moveToNextReader(CelebornInputStream.java:273) at org.apache.celeborn.client.read.CelebornInputStream$CelebornInputStreamImpl.<init>(CelebornInputStream.java:222) at org.apache.celeborn.client.read.CelebornInputStream.create(CelebornInputStream.java:72) at org.apache.celeborn.client.ShuffleClientImpl.readPartition(ShuffleClientImpl.java:1675) at org.apache.spark.shuffle.celeborn.CelebornShuffleReader$$anon$3.run(CelebornShuffleReader.scala:125) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.IOException: Exception in sendRpcSync to: /172.18.0.2:33177 at org.apache.celeborn.common.network.client.TransportClient.sendRpcSync(TransportClient.java:324) at org.apache.celeborn.client.read.WorkerPartitionReader.<init>(WorkerPartitionReader.java:129) at org.apache.celeborn.client.read.CelebornInputStream$CelebornInputStreamImpl.createReader(CelebornInputStream.java:444) at org.apache.celeborn.client.read.CelebornInputStream$CelebornInputStreamImpl.createReaderWithRetry(CelebornInputStream.java:341) ... 10 more Caused by: java.util.concurrent.ExecutionException: java.io.IOException: org.apache.celeborn.common.exception.PartitionUnRetryAbleException: Could not find file 72-0-0 for local-1744017116595-72. at org.apache.celeborn.common.util.ExceptionUtils.wrapIOExceptionToUnRetryable(ExceptionUtils.java:41) at org.apache.celeborn.service.deploy.worker.FetchHandler.handleRpcException(FetchHandler.scala:350) at org.apache.celeborn.service.deploy.worker.FetchHandler.handleRpcIOException(FetchHandler.scala:342) at org.apache.celeborn.service.deploy.worker.FetchHandler.handleOpenStreamInternal(FetchHandler.scala:293) at org.apache.celeborn.service.deploy.worker.FetchHandler.handleRpcRequest(FetchHandler.scala:138) at org.apache.celeborn.service.deploy.worker.FetchHandler.receive(FetchHandler.scala:97) at org.apache.celeborn.common.network.server.TransportRequestHandler.processRpcRequest(TransportRequestHandler.java:96) at org.apache.celeborn.common.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:84) at org.apache.celeborn.common.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:156) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:286) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:788) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:724) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:650) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:562) at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997) at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) at java.lang.Thread.run(Thread.java:748) Caused by: java.io.FileNotFoundException: Could not find file 72-0-0 for local-[17440](https://github.com/apache/incubator-gluten/actions/runs/14304948985/job/40086868424?pr=9181#step:7:17441)17116595-72. at org.apache.celeborn.service.deploy.worker.FetchHandler.getRawFileInfo(FetchHandler.scala:88) at org.apache.celeborn.service.deploy.worker.FetchHandler.handleOpenStreamInternal(FetchHandler.scala:214) ... 29 more at org.apache.celeborn.common.network.client.TransportResponseHandler.handle(TransportResponseHandler.java:390) at org.apache.celeborn.common.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:158) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) at org.apache.celeborn.shaded.io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:286) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) at org.apache.celeborn.common.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:74) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) at org.apache.celeborn.shaded.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) at org.apache.celeborn.shaded.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) at org.apache.celeborn.shaded.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) at org.apache.celeborn.shaded.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166) at org.apache.celeborn.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:788) at org.apache.celeborn.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:724) at org.apache.celeborn.shaded.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:650) at org.apache.celeborn.shaded.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:562) at org.apache.celeborn.shaded.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997) at org.apache.celeborn.shaded.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) at org.apache.celeborn.shaded.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) ... 1 more ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
