This is an automated email from the ASF dual-hosted git repository.
rexxiong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git
The following commit(s) were added to refs/heads/main by this push:
new 6fdeced15 [CELEBORN-1359] Support Netty Logging at the network layer
6fdeced15 is described below
commit 6fdeced158606a3554331a86cc03bcff46468814
Author: SteNicholas <[email protected]>
AuthorDate: Thu Mar 28 16:11:37 2024 +0800
[CELEBORN-1359] Support Netty Logging at the network layer
### What changes were proposed in this pull request?
Support Netty level logging at the network layer for Celeborn. To configure
Netty level logging a LogHandler must be added to the channel pipeline.
`NettyLogger` is introduced as a new class which is able to construct a log
handler depending on the log level:
- In case of `<Logger
name="org.apache.celeborn.common.network.util.NettyLogger" level="DEBUG"
additivity="false">`: a custom log handler is created which does not dump the
message contents. This way the log is a bit more compact. Moreover when network
level encryption is switched on this level might be sufficient.
- In case of `<Logger
name="org.apache.celeborn.common.network.util.NettyLogger" level="TRACE"
additivity="false">`: Netty's own log handler is used which dumps the message
contents.
- Otherwise (when the logger is not `TRACE` or `DEBUG`) the pipeline does
not contain a log handler (there is no runtime penalty for the default setting
but a long running service must be restarted along with the new log level to
have an effect).
Backport:
- [[SPARK-36719][CORE] Supporting Netty Logging at the network
layer](https://github.com/apache/spark/pull/33962)
- [[SPARK-45377][CORE] Handle InputStream in
NettyLogger](https://github.com/apache/spark/pull/43165)
### Why are the changes needed?
This level of logging proved to be sufficient during debugging some
external shuffle related problem. Compared with the tcpdump this log lines can
be more easily correlated with the Celeborn internal calls. Moreover the log
layout can be configured to contain the thread names that way for a timeout a
busy thread could be identified.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Local manually test.
Closes #2423 from SteNicholas/CELEBORN-1359.
Authored-by: SteNicholas <[email protected]>
Signed-off-by: Shuang <[email protected]>
---
LICENSE | 2 +
.../celeborn/common/network/TransportContext.java | 12 +++-
.../common/network/protocol/MessageWithHeader.java | 5 ++
.../celeborn/common/network/util/NettyLogger.java | 81 ++++++++++++++++++++++
4 files changed, 97 insertions(+), 3 deletions(-)
diff --git a/LICENSE b/LICENSE
index 95c7a9c72..76555a026 100644
--- a/LICENSE
+++ b/LICENSE
@@ -213,6 +213,7 @@ Apache Spark
./client-spark/spark-2/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
./client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java
./common/src/main/java/org/apache/celeborn/common/network/ssl/ReloadingX509TrustManager.java
+./common/src/main/java/org/apache/celeborn/common/network/util/NettyLogger.java
./common/src/main/java/org/apache/celeborn/common/unsafe/Platform.java
./common/src/main/java/org/apache/celeborn/common/util/JavaUtils.java
./common/src/main/scala/org/apache/celeborn/common/util/SignalUtils.scala
@@ -229,6 +230,7 @@ Apache Spark
./worker/src/main/java/org/apache/celeborn/service/deploy/worker/shuffledb/LevelDBIterator.java
./worker/src/main/java/org/apache/celeborn/service/deploy/worker/shuffledb/RocksDB.java
./worker/src/main/java/org/apache/celeborn/service/deploy/worker/shuffledb/RocksDBProvider.java
+./worker/src/main/scala/org/apache/celeborn/service/deploy/worker/profiler/JVMProfiler.scala
Apache Kyuubi
./common/src/main/java/org/apache/celeborn/reflect/DynClasses.java
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
b/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
index 488e0fd04..ec0d1fd87 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/TransportContext.java
@@ -23,6 +23,7 @@ import java.util.List;
import io.netty.channel.Channel;
import io.netty.channel.ChannelDuplexHandler;
import io.netty.channel.ChannelInboundHandlerAdapter;
+import io.netty.channel.ChannelPipeline;
import io.netty.channel.socket.SocketChannel;
import io.netty.handler.timeout.IdleStateHandler;
import org.slf4j.Logger;
@@ -36,6 +37,7 @@ import
org.apache.celeborn.common.network.client.TransportResponseHandler;
import org.apache.celeborn.common.network.protocol.MessageEncoder;
import org.apache.celeborn.common.network.server.*;
import org.apache.celeborn.common.network.util.FrameDecoder;
+import org.apache.celeborn.common.network.util.NettyLogger;
import org.apache.celeborn.common.network.util.TransportConf;
import org.apache.celeborn.common.network.util.TransportFrameDecoder;
@@ -55,6 +57,7 @@ import
org.apache.celeborn.common.network.util.TransportFrameDecoder;
public class TransportContext {
private static final Logger logger =
LoggerFactory.getLogger(TransportContext.class);
+ private static final NettyLogger nettyLogger = new NettyLogger();
private final TransportConf conf;
private final BaseMessageHandler msgHandler;
private final ChannelDuplexHandler channelsLimiter;
@@ -147,12 +150,15 @@ public class TransportContext {
ChannelInboundHandlerAdapter decoder,
BaseMessageHandler resolvedMsgHandler) {
try {
+ ChannelPipeline pipeline = channel.pipeline();
+ if (nettyLogger.getLoggingHandler() != null) {
+ pipeline.addLast("loggingHandler", nettyLogger.getLoggingHandler());
+ }
if (channelsLimiter != null) {
- channel.pipeline().addLast("limiter", channelsLimiter);
+ pipeline.addLast("limiter", channelsLimiter);
}
TransportChannelHandler channelHandler = createChannelHandler(channel,
resolvedMsgHandler);
- channel
- .pipeline()
+ pipeline
.addLast("encoder", ENCODER)
.addLast(FrameDecoder.HANDLER_NAME, decoder)
.addLast(
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
index 21f11d49f..c2de26128 100644
---
a/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
+++
b/common/src/main/java/org/apache/celeborn/common/network/protocol/MessageWithHeader.java
@@ -192,4 +192,9 @@ class MessageWithHeader extends AbstractFileRegion {
}
return super.release(decrement);
}
+
+ @Override
+ public String toString() {
+ return "MessageWithHeader [headerLength: " + headerLength + ", bodyLength:
" + bodyLength + "]";
+ }
}
diff --git
a/common/src/main/java/org/apache/celeborn/common/network/util/NettyLogger.java
b/common/src/main/java/org/apache/celeborn/common/network/util/NettyLogger.java
new file mode 100644
index 000000000..eb7c5ffbf
--- /dev/null
+++
b/common/src/main/java/org/apache/celeborn/common/network/util/NettyLogger.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.celeborn.common.network.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ByteBufHolder;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.handler.logging.LogLevel;
+import io.netty.handler.logging.LoggingHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A Netty logger that constructs a log handler depending on the log level.
+ *
+ * <p>Note: code copied from Apache Spark.
+ */
+public class NettyLogger {
+ private static final Logger logger =
LoggerFactory.getLogger(NettyLogger.class);
+
+ /** A Netty LoggingHandler which does not dump the message contents. */
+ private static class NoContentLoggingHandler extends LoggingHandler {
+
+ NoContentLoggingHandler(Class<?> clazz, LogLevel level) {
+ super(clazz, level);
+ }
+
+ @Override
+ protected String format(ChannelHandlerContext ctx, String eventName,
Object arg) {
+ if (arg instanceof ByteBuf) {
+ return format(ctx, eventName) + " " + ((ByteBuf) arg).readableBytes()
+ "B";
+ } else if (arg instanceof ByteBufHolder) {
+ return format(ctx, eventName) + " " + ((ByteBufHolder)
arg).content().readableBytes() + "B";
+ } else if (arg instanceof InputStream) {
+ int available = -1;
+ try {
+ available = ((InputStream) arg).available();
+ } catch (IOException ex) {
+ // Swallow, but return -1 to indicate an error happened
+ }
+ return format(ctx, eventName, arg) + " " + available + "B";
+ } else {
+ return super.format(ctx, eventName, arg);
+ }
+ }
+ }
+
+ private final LoggingHandler loggingHandler;
+
+ public NettyLogger() {
+ if (logger.isTraceEnabled()) {
+ loggingHandler = new LoggingHandler(NettyLogger.class, LogLevel.TRACE);
+ } else if (logger.isDebugEnabled()) {
+ loggingHandler = new NoContentLoggingHandler(NettyLogger.class,
LogLevel.DEBUG);
+ } else {
+ loggingHandler = null;
+ }
+ }
+
+ public LoggingHandler getLoggingHandler() {
+ return loggingHandler;
+ }
+}