rkhachatryan commented on a change in pull request #14721:
URL: https://github.com/apache/flink/pull/14721#discussion_r562029491
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
package org.apache.flink.runtime.io.network.logger;
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import
org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
/** Utility class for logging actions that happened in the network stack for
debugging purposes. */
public class NetworkActionsLogger {
private static final Logger LOG =
LoggerFactory.getLogger(NetworkActionsLogger.class);
-
private static final boolean ENABLED = LOG.isTraceEnabled();
private static final boolean INCLUDE_HASH = true;
- public static void log(Class<?> clazz, String action, Buffer buffer) {
+ public static void traceInput(
+ Class<?> clazz,
+ String action,
+ Buffer buffer,
+ InputChannelInfo channelInfo,
+ ChannelStatePersister channelStatePersister,
+ int sequenceNumber) {
+ if (ENABLED) {
+ LOG.trace(
+ "{}#{} {}, seq {}, {} @ {}",
+ clazz.getSimpleName(),
Review comment:
This call has some overhead, as well as `getClass()` (though this code
shouldn't execute in prod it can affect debugging).
How about using some string constant (in caller)?
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
package org.apache.flink.runtime.io.network.logger;
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import
org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
/** Utility class for logging actions that happened in the network stack for
debugging purposes. */
public class NetworkActionsLogger {
private static final Logger LOG =
LoggerFactory.getLogger(NetworkActionsLogger.class);
-
private static final boolean ENABLED = LOG.isTraceEnabled();
private static final boolean INCLUDE_HASH = true;
- public static void log(Class<?> clazz, String action, Buffer buffer) {
+ public static void traceInput(
+ Class<?> clazz,
+ String action,
+ Buffer buffer,
+ InputChannelInfo channelInfo,
+ ChannelStatePersister channelStatePersister,
+ int sequenceNumber) {
+ if (ENABLED) {
+ LOG.trace(
+ "{}#{} {}, seq {}, {} @ {}",
+ clazz.getSimpleName(),
+ action,
+ buffer.toDebugString(INCLUDE_HASH),
+ sequenceNumber,
+ channelStatePersister,
+ channelInfo);
+ }
+ }
+
+ public static void traceOutput(
+ Class<?> clazz, String action, Buffer buffer,
ResultSubpartitionInfo channelInfo) {
+ if (ENABLED) {
+ LOG.trace(
+ "{}#{} {} @ {}",
+ clazz.getSimpleName(),
+ action,
+ buffer.toDebugString(INCLUDE_HASH),
+ channelInfo);
+ }
+ }
+
+ public static void traceRecover(
+ Class<?> clazz, String action, Buffer buffer, InputChannelInfo
channelInfo) {
if (ENABLED) {
- LOG.trace("{}#{} buffer = [{}]", clazz.getSimpleName(), action,
toPrettyString(buffer));
+ LOG.trace(
+ "{}#{} {} @ {}",
+ clazz.getSimpleName(),
+ action,
+ buffer.toDebugString(INCLUDE_HASH),
+ channelInfo);
}
}
- public static void log(Class<?> clazz, String action, BufferConsumer
bufferConsumer) {
+ public static void traceRecover(
+ Class<?> clazz,
+ String action,
+ BufferConsumer bufferConsumer,
+ ResultSubpartitionInfo channelInfo) {
if (ENABLED) {
- Buffer buffer = null;
- try (BufferConsumer copiedBufferConsumer = bufferConsumer.copy()) {
- buffer = copiedBufferConsumer.build();
- log(clazz, action, buffer);
- checkState(copiedBufferConsumer.isFinished());
- } finally {
- if (buffer != null) {
- buffer.recycleBuffer();
- }
- }
+ LOG.trace(
+ "{}#{} {} @ {}",
+ clazz.getSimpleName(),
+ action,
+ bufferConsumer.toDebugString(INCLUDE_HASH),
+ channelInfo);
}
}
- private static String toPrettyString(Buffer buffer) {
- StringBuilder prettyString = new
StringBuilder("size=").append(buffer.getSize());
- if (INCLUDE_HASH) {
- byte[] bytes = new byte[buffer.getSize()];
- buffer.readOnlySlice().asByteBuf().readBytes(bytes);
- prettyString.append(", hash=").append(Arrays.hashCode(bytes));
+ public static void traceWrite(
Review comment:
nit: `tracePersist`?
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/consumer/ChannelStatePersister.java
##########
@@ -62,9 +68,18 @@
}
protected void startPersisting(long barrierId, List<Buffer> knownBuffers) {
+ LOG.debug(
Review comment:
Does it makes sense to log `channelInfo`?
nit: extract log method?
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/partition/consumer/ChannelStatePersister.java
##########
@@ -95,16 +115,39 @@ protected void maybePersist(Buffer buffer) {
protected Optional<Long> checkForBarrier(Buffer buffer) throws IOException
{
final AbstractEvent event = parseEvent(buffer);
if (event instanceof CheckpointBarrier) {
- if (((CheckpointBarrier) event).getId() >= lastSeenBarrier) {
+ final long barrierId = ((CheckpointBarrier) event).getId();
+ long expectedBarrierId =
+ checkpointStatus == CheckpointStatus.COMPLETED
+ ? lastSeenBarrier + 1
Review comment:
Good catch! :1st_place_medal: :)
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/logger/NetworkActionsLogger.java
##########
@@ -18,51 +18,89 @@
package org.apache.flink.runtime.io.network.logger;
+import org.apache.flink.runtime.checkpoint.channel.InputChannelInfo;
+import org.apache.flink.runtime.checkpoint.channel.ResultSubpartitionInfo;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
+import
org.apache.flink.runtime.io.network.partition.consumer.ChannelStatePersister;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.Arrays;
-
-import static org.apache.flink.util.Preconditions.checkState;
-
/** Utility class for logging actions that happened in the network stack for
debugging purposes. */
public class NetworkActionsLogger {
private static final Logger LOG =
LoggerFactory.getLogger(NetworkActionsLogger.class);
-
private static final boolean ENABLED = LOG.isTraceEnabled();
private static final boolean INCLUDE_HASH = true;
- public static void log(Class<?> clazz, String action, Buffer buffer) {
+ public static void traceInput(
+ Class<?> clazz,
+ String action,
+ Buffer buffer,
+ InputChannelInfo channelInfo,
+ ChannelStatePersister channelStatePersister,
+ int sequenceNumber) {
+ if (ENABLED) {
+ LOG.trace(
+ "{}#{} {}, seq {}, {} @ {}",
+ clazz.getSimpleName(),
+ action,
Review comment:
nit: I think `action` is not needed if we have method per action.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]