pnowojski commented on a change in pull request #6417: [FLINK-9913][runtime]
Improve output serialization only once in RecordWriter
URL: https://github.com/apache/flink/pull/6417#discussion_r215615035
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/io/network/api/writer/RecordWriter.java
##########
@@ -89,77 +88,101 @@ public RecordWriter(ResultPartitionWriter writer,
ChannelSelector<T> channelSele
this.numChannels = writer.getNumberOfSubpartitions();
- /*
- * The runtime exposes a channel abstraction for the produced
results
- * (see {@link ChannelSelector}). Every channel has an
independent
- * serializer.
- */
- this.serializers = new SpanningRecordSerializer[numChannels];
+ this.serializer = new SpanningRecordSerializer<T>();
this.bufferBuilders = new Optional[numChannels];
+ this.broadcastChannels = new int[numChannels];
for (int i = 0; i < numChannels; i++) {
- serializers[i] = new SpanningRecordSerializer<T>();
+ broadcastChannels[i] = i;
bufferBuilders[i] = Optional.empty();
}
}
public void emit(T record) throws IOException, InterruptedException {
- for (int targetChannel : channelSelector.selectChannels(record,
numChannels)) {
- sendToTarget(record, targetChannel);
- }
+ emitToTargetChannels(record,
channelSelector.selectChannels(record, numChannels));
}
/**
* This is used to broadcast Streaming Watermarks in-band with records.
This ignores
* the {@link ChannelSelector}.
*/
public void broadcastEmit(T record) throws IOException,
InterruptedException {
- for (int targetChannel = 0; targetChannel < numChannels;
targetChannel++) {
- sendToTarget(record, targetChannel);
- }
+ emitToTargetChannels(record, broadcastChannels);
}
/**
* This is used to send LatencyMarks to a random target channel.
*/
public void randomEmit(T record) throws IOException,
InterruptedException {
- sendToTarget(record, rng.nextInt(numChannels));
+ serializer.serializeRecord(record);
+
+ if (copyToTargetBuffers(rng.nextInt(numChannels))) {
+ serializer.prune();
+ }
}
- private void sendToTarget(T record, int targetChannel) throws
IOException, InterruptedException {
- RecordSerializer<T> serializer = serializers[targetChannel];
+ private void emitToTargetChannels(T record, int[] targetChannels)
throws IOException, InterruptedException {
+ serializer.serializeRecord(record);
+
+ boolean pruneAfterCopying = false;
+ for (int channel : targetChannels) {
+ if (copyToTargetBuffers(channel)) {
+ pruneAfterCopying = true;
+ }
+ }
- SerializationResult result = serializer.addRecord(record);
+ // Make sure we don't hold onto the large intermediate
serialization buffer for too long
+ if (pruneAfterCopying) {
+ serializer.prune();
+ }
+ }
+ /**
+ * Copies the intermediate serialization buffer to the BufferBuilder of
the target channel, also
+ * checks to prune the intermediate buffer iif the target BufferBuilder
is fulfilled and the record
+ * is full.
+ *
+ * @param targetChannel the target channel to get BufferBuilder
+ * @return <tt>true</tt> if the intermediate serialization buffer
should be pruned
+ */
+ private boolean copyToTargetBuffers(int targetChannel) throws
IOException, InterruptedException {
Review comment:
Third time I'm looking at this PR and third time I had to think for a minute
what does the this method. I'm always forgetting that `serializer` is a class
field and that this method copies from it.
Maybe rename to `copyFromSerializerToTargetChannel`? Imo rename would allow
us to drop most of the java doc and simplify it to just:
```
/**
* @param targetChannel
* @return <tt>true</tt> if the intermediate serialization buffer
should be pruned
*/
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services