deniskuzZ commented on code in PR #5174:
URL: https://github.com/apache/hive/pull/5174#discussion_r1551589164
##########
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java:
##########
@@ -362,13 +461,42 @@ private List<Event> createEventList(boolean
sendSerializedEvents, InputSplitInfo
events.add(configureVertexEvent);
if (sendSerializedEvents) {
- MRSplitsProto splitsProto = inputSplitInfo.getSplitsProto();
int count = 0;
- for (MRSplitProto mrSplit : splitsProto.getSplitsList()) {
- InputDataInformationEvent diEvent =
InputDataInformationEvent.createWithSerializedPayload(
- count++, mrSplit.toByteString().asReadOnlyByteBuffer());
+ long inMemoryPayloadSize = 0;
+ long serializedPayloadSize = 0;
+
+ int payloadSerializationThresholdBytes =
+ HiveConf.getIntVar(jobConf,
HiveConf.ConfVars.HIVE_TEZ_SPLIT_FS_SERIALIZATION_THRESHOLD);
+ SplitSerializer splitSerializer = new SplitSerializer();
+
+ List<MRSplitProto> splits =
inputSplitInfo.getSplitsProto().getSplitsList();
+
+ LOG.info("Start creating events for {} splits", splits.size());
+
+ for (MRSplitProto mrSplit : splits) {
+ ByteBuffer payloadBuffer =
mrSplit.toByteString().asReadOnlyByteBuffer();
+ int payloadSize = payloadBuffer.limit();
+ boolean shouldSerializeEventToFile =
+ payloadSerializationThresholdBytes != -1 && payloadSize >
payloadSerializationThresholdBytes;
+ LOG.debug("Split #{} ByteBuffer size: {} bytes, serialize to file: {}
(threshold: {} bytes)", count, payloadSize,
Review Comment:
Split #{}, byteBuffer size: {} bytes, threshold: {} bytes
you can add extra log in if branch when serializing providing the path
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]