This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tez.git


The following commit(s) were added to refs/heads/master by this push:
     new 642f4c052 TEZ-4542: Tez application may fail due to int overflow when 
record size is large and sort memory is low. (#336) (zhengchenyu reviewed by 
Rajesh Balamohan)
642f4c052 is described below

commit 642f4c052c68e2a3e45641150965fa3321563949
Author: zhengchenyu <zhengcheny...@163.com>
AuthorDate: Tue May 14 15:28:52 2024 +0800

    TEZ-4542: Tez application may fail due to int overflow when record size is 
large and sort memory is low. (#336) (zhengchenyu reviewed by Rajesh Balamohan)
---
 .../library/common/sort/impl/PipelinedSorter.java  |  2 +-
 .../common/sort/impl/TestPipelinedSorter.java      | 26 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git 
a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
 
b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
index 067dcca0c..1815006cd 100644
--- 
a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
+++ 
b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/sort/impl/PipelinedSorter.java
@@ -357,7 +357,7 @@ public class PipelinedSorter extends ExternalSorter {
       }
       Preconditions.checkArgument(buffers.get(bufferIndex) != null, "block 
should not be empty");
       //TODO: fix per item being passed.
-      span = new SortSpan((ByteBuffer)buffers.get(bufferIndex).clear(), 
(1024*1024),
+      span = new SortSpan((ByteBuffer)buffers.get(bufferIndex).clear(), items,
           perItem, ConfigUtils.getIntermediateOutputKeyComparator(this.conf));
     } else {
       // queue up the sort
diff --git 
a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
 
b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
index 84ec14380..395567607 100644
--- 
a/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
+++ 
b/tez-runtime-library/src/test/java/org/apache/tez/runtime/library/common/sort/impl/TestPipelinedSorter.java
@@ -858,6 +858,32 @@ public class TestPipelinedSorter {
     basicTest(1, 5, (2 << 20), (48 * 1024l * 1024l), 16 << 20);
   }
 
+  @Test
+  public void testWithLargeRecordAndLowMemory() throws IOException {
+    this.numOutputs = 1;
+    this.initialAvailableMem = 1 * 1024 * 1024;
+    
conf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_ENABLE_FINAL_MERGE_IN_OUTPUT,
 true);
+    
conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_PIPELINED_SORTER_MIN_BLOCK_SIZE_IN_MB,
 1);
+    PipelinedSorter sorter = new PipelinedSorter(this.outputContext, conf, 
numOutputs, initialAvailableMem);
+
+    // Set the record size to exceed 2k to trigger bug described in TEZ-4542.
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 3072; i++) {
+      builder.append("1");
+    }
+    Text value = new Text(builder.toString());
+    long size = 50 * 1024 * 1024;
+    while (size > 0) {
+      Text key = RandomTextGenerator.generateSentence();
+      sorter.write(key, value);
+      size -= key.getLength();
+    }
+
+    sorter.flush();
+    sorter.close();
+    verifyOutputPermissions(outputContext.getUniqueIdentifier());
+  }
+
   private void verifyOutputPermissions(String spillId) throws IOException {
     String subpath = Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR + "/" + spillId
         + "/" + Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING;

Reply via email to