This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ba81b92 [SPARK-36719][CORE] Supporting Netty Logging at the network layer ba81b92 is described below commit ba81b92402eb5b912d8d365d3b0720dd6a719dfd Author: attilapiros <piros.attila.zs...@gmail.com> AuthorDate: Sat Sep 11 16:14:02 2021 -0700 [SPARK-36719][CORE] Supporting Netty Logging at the network layer ### What changes were proposed in this pull request? Supporting Netty level logging at the network layer. To configure Netty level logging a `LogHandler` must be added to the channel pipeline. In this PR I have introduced a new class `NettyLogger` which is able to construct a log handler depending on the log level: - in case of `log4j.logger.org.apache.spark.network.util.NettyLogger=DEBUG`: a custom log handler is created which does not dump the message contents. This way the log is a bit more compact. Moreover when network level encryption is switched on this level might be sufficient. - in case of `log4j.logger.org.apache.spark.network.util.NettyLogger=TRACE`: Netty's own log handler is used which dumps the message contents. - otherwise (when the logger is not TRACE or DEBUG) the pipeline does not contain a log handler (there is no runtime penalty for the default setting but a long running app/service must be restarted along with the new log level to have an effect). ### Why are the changes needed? This level of logging proved to be sufficient during debugging some external shuffle related problem. Compared with the tcpdump this log lines can be more easily correlated with the Spark internal calls. Moreover the log layout can be configured to contain the thread names that way for a timeout a busy thread could be identified. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually. #### DEBUG level ``` ╭─attilazsoltpirosapiros-MBP16 ~/git/attilapiros/spark ‹SPARK-36719*› ╰─$ tail -1 ./conf/log4j.properties log4j.logger.org.apache.spark.network.util.NettyLogger=DEBUG ╭─attilazsoltpirosapiros-MBP16 ~/git/attilapiros/spark ‹SPARK-36719*› ╰─$ ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master local\[8\] ./examples/target/original-spark-examples_2.12-3.3.0-SNAPSHOT.jar README.md 2> >(grep NettyLogger) 1> /dev/null 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf] REGISTERED 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf] CONNECT: /172.30.64.219:61014 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] ACTIVE 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] REGISTERED 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] ACTIVE 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] WRITE 66B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] FLUSH 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] READ 66B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] WRITE: MessageWithHeader [headerLength: 74, bodyLength: 1552705] 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] FLUSH 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 74B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 - R:/172.30.64.219:61015] READ COMPLETE 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ COMPLETE 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 2048B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 32768B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ COMPLETE 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 65536B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ 10561B 21/09/10 15:24:35 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ COMPLETE 21/09/10 15:24:40 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 ! R:/172.30.64.219:61015] INACTIVE 21/09/10 15:24:40 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 - R:/172.30.64.219:61014] READ COMPLETE 21/09/10 15:24:40 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 ! R:/172.30.64.219:61014] INACTIVE 21/09/10 15:24:40 DEBUG NettyLogger: [id: 0xb9d94fcf, L:/172.30.64.219:61015 ! R:/172.30.64.219:61014] UNREGISTERED 21/09/10 15:24:40 DEBUG NettyLogger: [id: 0x28101520, L:/172.30.64.219:61014 ! R:/172.30.64.219:61015] UNREGISTERED ``` #### TRACE level ``` ╭─attilazsoltpirosapiros-MBP16 ~/git/attilapiros/spark ‹SPARK-36719*› ╰─$ tail -1 ./conf/log4j.properties log4j.logger.org.apache.spark.network.util.NettyLogger=TRACE ╭─attilazsoltpirosapiros-MBP16 ~/git/attilapiros/spark ‹SPARK-36719*› ╰─$ ./bin/spark-submit --class org.apache.spark.examples.JavaWordCount --master local\[8\] ./examples/target/original-spark-examples_2.12-3.3.0-SNAPSHOT.jar README.md 1> /dev/null 2>&1 ... 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786] REGISTERED 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786] CONNECT: /172.30.64.219:61044 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786, L:/172.30.64.219:61045 - R:/172.30.64.219:61044] ACTIVE 21/09/10 15:29:14 INFO TransportClientFactory: Successfully created connection to /172.30.64.219:61044 after 37 ms (0 ms spent in bootstraps) 21/09/10 15:29:14 TRACE NettyLogger: [id: 0x362fc693, L:/172.30.64.219:61044 - R:/172.30.64.219:61045] REGISTERED 21/09/10 15:29:14 TRACE NettyLogger: [id: 0x362fc693, L:/172.30.64.219:61044 - R:/172.30.64.219:61045] ACTIVE 21/09/10 15:29:14 INFO Utils: Fetching spark://172.30.64.219:61044/jars/original-spark-examples_2.12-3.3.0-SNAPSHOT.jar to /private/var/folders/t_/fr_vqcyx23vftk81ftz1k5hw0000gn/T/spark-91e059f5-1e29-4727-8602-f81206bbe48b/userFiles-50b48490-8950-4c46-b3d3-61a2c85412a3/fetchFileTemp8803030587223485061.tmp 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786, L:/172.30.64.219:61045 - R:/172.30.64.219:61044] WRITE: 66B +-------------------------------------------------+ | 0 1 2 3 4 5 6 7 8 9 a b c d e f | +--------+-------------------------------------------------+----------------+ |00000000| 00 00 00 00 00 00 00 42 06 00 00 00 35 2f 6a 61 |.......B....5/ja| |00000010| 72 73 2f 6f 72 69 67 69 6e 61 6c 2d 73 70 61 72 |rs/original-spar| |00000020| 6b 2d 65 78 61 6d 70 6c 65 73 5f 32 2e 31 32 2d |k-examples_2.12-| |00000030| 33 2e 33 2e 30 2d 53 4e 41 50 53 48 4f 54 2e 6a |3.3.0-SNAPSHOT.j| |00000040| 61 72 |ar | +--------+-------------------------------------------------+----------------+ 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786, L:/172.30.64.219:61045 - R:/172.30.64.219:61044] FLUSH 21/09/10 15:29:14 TRACE NettyLogger: [id: 0x362fc693, L:/172.30.64.219:61044 - R:/172.30.64.219:61045] READ: 66B +-------------------------------------------------+ | 0 1 2 3 4 5 6 7 8 9 a b c d e f | +--------+-------------------------------------------------+----------------+ |00000000| 00 00 00 00 00 00 00 42 06 00 00 00 35 2f 6a 61 |.......B....5/ja| |00000010| 72 73 2f 6f 72 69 67 69 6e 61 6c 2d 73 70 61 72 |rs/original-spar| |00000020| 6b 2d 65 78 61 6d 70 6c 65 73 5f 32 2e 31 32 2d |k-examples_2.12-| |00000030| 33 2e 33 2e 30 2d 53 4e 41 50 53 48 4f 54 2e 6a |3.3.0-SNAPSHOT.j| |00000040| 61 72 |ar | +--------+-------------------------------------------------+----------------+ 21/09/10 15:29:14 TRACE NettyLogger: [id: 0x362fc693, L:/172.30.64.219:61044 - R:/172.30.64.219:61045] WRITE: MessageWithHeader [headerLength: 74, bodyLength: 1552705] 21/09/10 15:29:14 TRACE NettyLogger: [id: 0x362fc693, L:/172.30.64.219:61044 - R:/172.30.64.219:61045] FLUSH 21/09/10 15:29:14 TRACE NettyLogger: [id: 0xf1d25786, L:/172.30.64.219:61045 - R:/172.30.64.219:61044] READ: 74B ... ``` Closes #33962 from attilapiros/SPARK-36719. Authored-by: attilapiros <piros.attila.zs...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/spark/network/TransportContext.java | 8 ++- .../spark/network/protocol/MessageWithHeader.java | 6 ++ .../org/apache/spark/network/util/NettyLogger.java | 65 ++++++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java index a0de9df..6948e59 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java +++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java @@ -44,6 +44,7 @@ import org.apache.spark.network.server.TransportServer; import org.apache.spark.network.server.TransportServerBootstrap; import org.apache.spark.network.util.IOMode; import org.apache.spark.network.util.NettyUtils; +import org.apache.spark.network.util.NettyLogger; import org.apache.spark.network.util.TransportConf; import org.apache.spark.network.util.TransportFrameDecoder; @@ -64,6 +65,7 @@ import org.apache.spark.network.util.TransportFrameDecoder; public class TransportContext implements Closeable { private static final Logger logger = LoggerFactory.getLogger(TransportContext.class); + private static final NettyLogger nettyLogger = new NettyLogger(); private final TransportConf conf; private final RpcHandler rpcHandler; private final boolean closeIdleConnections; @@ -187,7 +189,11 @@ public class TransportContext implements Closeable { RpcHandler channelRpcHandler) { try { TransportChannelHandler channelHandler = createChannelHandler(channel, channelRpcHandler); - ChannelPipeline pipeline = channel.pipeline() + ChannelPipeline pipeline = channel.pipeline(); + if (nettyLogger.getLoggingHandler() != null) { + pipeline.addLast("loggingHandler", nettyLogger.getLoggingHandler()); + } + pipeline .addLast("encoder", ENCODER) .addLast(TransportFrameDecoder.HANDLER_NAME, NettyUtils.createFrameDecoder()) .addLast("decoder", DECODER) diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java index b81c25af..19eeddb 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java @@ -194,4 +194,10 @@ class MessageWithHeader extends AbstractFileRegion { } return super.release(decrement); } + + @Override + public String toString() { + return "MessageWithHeader [headerLength: " + headerLength + ", bodyLength: " + bodyLength + "]"; + } + } diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java new file mode 100644 index 0000000..914c970 --- /dev/null +++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.util; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufHolder; +import io.netty.channel.ChannelHandlerContext; +import io.netty.handler.logging.LoggingHandler; +import io.netty.handler.logging.LogLevel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NettyLogger { + private static final Logger logger = LoggerFactory.getLogger(NettyLogger.class); + + /** A Netty LoggingHandler which does not dump the message contents. */ + private static class NoContentLoggingHandler extends LoggingHandler { + + NoContentLoggingHandler(Class<?> clazz, LogLevel level) { + super(clazz, level); + } + + protected String format(ChannelHandlerContext ctx, String eventName, Object arg) { + if (arg instanceof ByteBuf) { + return format(ctx, eventName) + " " + ((ByteBuf) arg).readableBytes() + "B"; + } else if (arg instanceof ByteBufHolder) { + return format(ctx, eventName) + " " + + ((ByteBufHolder) arg).content().readableBytes() + "B"; + } else { + return super.format(ctx, eventName, arg); + } + } + } + + private final LoggingHandler loggingHandler; + + public NettyLogger() { + if (logger.isTraceEnabled()) { + loggingHandler = new LoggingHandler(NettyLogger.class, LogLevel.TRACE); + } else if (logger.isDebugEnabled()) { + loggingHandler = new NoContentLoggingHandler(NettyLogger.class, LogLevel.DEBUG); + } else { + loggingHandler = null; + } + } + + public LoggingHandler getLoggingHandler() { + return loggingHandler; + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org