This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new e55476580 ORC-1528: Fix readBytes potential overflow in 
RecordReaderUtils.ChunkReader#create
e55476580 is described below

commit e554765801c8cc0f25713aaee09701ca47250d01
Author: yebukong <[email protected]>
AuthorDate: Mon Nov 27 12:37:46 2023 -0800

    ORC-1528: Fix readBytes potential overflow in 
RecordReaderUtils.ChunkReader#create
    
    ### What changes were proposed in this pull request?
    
    - I have adjusted the calculation of `reqBytes` and `readBytes` ranges in 
the `org.apache.orc.impl.RecordReaderUtils.ChunkReader#create` method from 
`Integer` to `Long`, and added validation for `Integer` overflow
    
    - Adds more test cases
    
    ### Why are the changes needed?
    
    This PR aims to fix 
[ORC-1528](https://issues.apache.org/jira/browse/ORC-1528)
    
    ### How was this patch tested?
    
    org.apache.orc.impl.TestRecordReaderUtils#testBufferChunkOffsetExceedsMaxInt
    
    Closes #1662 from yebukong/main.
    
    Lead-authored-by: yebukong <[email protected]>
    Co-authored-by: yebukong <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../core/src/java/org/apache/orc/impl/IOUtils.java |  5 ++
 .../org/apache/orc/impl/RecordReaderUtils.java     | 21 +++++---
 .../org/apache/orc/impl/TestRecordReaderUtils.java | 59 ++++++++++++++++++++++
 3 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/IOUtils.java 
b/java/core/src/java/org/apache/orc/impl/IOUtils.java
index d58029415..d28ea6c66 100644
--- a/java/core/src/java/org/apache/orc/impl/IOUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/IOUtils.java
@@ -30,6 +30,11 @@ public final class IOUtils {
 
   public static final int DEFAULT_BUFFER_SIZE = 8192;
 
+  /**
+   * The maximum size of array to allocate, value being the same as {@link 
java.util.Hashtable}
+   */
+  public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
   /**
    * Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}.
    *
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
index 8c37246b7..1b76f0d8b 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -770,18 +770,18 @@ public class RecordReaderUtils {
     }
 
     static ChunkReader create(BufferChunk from, BufferChunk to) {
-      long f = Integer.MAX_VALUE;
-      long e = Integer.MIN_VALUE;
+      long f = Long.MAX_VALUE;
+      long e = Long.MIN_VALUE;
 
-      long cf = Integer.MAX_VALUE;
-      long ef = Integer.MIN_VALUE;
-      int reqBytes = 0;
+      long cf = Long.MAX_VALUE;
+      long ef = Long.MIN_VALUE;
+      long reqBytes = 0L;
 
       BufferChunk current = from;
       while (current != to.next) {
         f = Math.min(f, current.getOffset());
         e = Math.max(e, current.getEnd());
-        if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) {
+        if (ef == Long.MIN_VALUE || current.getOffset() <= ef) {
           cf = Math.min(cf, current.getOffset());
           ef = Math.max(ef, current.getEnd());
         } else {
@@ -792,7 +792,14 @@ public class RecordReaderUtils {
         current = (BufferChunk) current.next;
       }
       reqBytes += ef - cf;
-      return new ChunkReader(from, to, (int) (e - f), reqBytes);
+      if (reqBytes > IOUtils.MAX_ARRAY_SIZE) {
+        throw new IllegalArgumentException("invalid reqBytes value " + 
reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+      }
+      long readBytes = e - f;
+      if (readBytes > IOUtils.MAX_ARRAY_SIZE) {
+        throw new IllegalArgumentException("invalid readBytes value " + 
readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+      }
+      return new ChunkReader(from, to, (int) readBytes, (int) reqBytes);
     }
 
     static ChunkReader create(BufferChunk from, int minSeekSize) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
index d5f3550d3..54a961550 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
@@ -25,6 +25,7 @@ import org.junit.jupiter.api.function.Executable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Objects;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -177,6 +178,64 @@ class TestRecordReaderUtils {
     assertTrue(dis.isAllReleased());
   }
 
+  @Test
+  public void testChunkReaderCreateOffsetExceedsMaxInt() {
+    List<long[]> mockData = Arrays.asList(
+      new long[]{15032282586L, 15032298848L},
+      new long[]{15032298848L, 15032299844L},
+      new long[]{15032299844L, 15032377804L},
+      new long[]{15058260587L, 15058261632L},
+      new long[]{15058261632L, 15058288409L},
+      new long[]{15058288409L, 15058288862L},
+      new long[]{15058339730L, 15058340775L},
+      new long[]{15058340775L, 15058342439L},
+      new long[]{15058449794L, 15058449982L},
+      new long[]{15058449982L, 15058451700L},
+      new long[]{15058451700L, 15058451749L},
+      new long[]{15058484358L, 15058484422L},
+      new long[]{15058484422L, 15058484862L},
+      new long[]{15058484862L, 15058484878L}
+    );
+    TestOrcLargeStripe.RangeBuilder rangeBuilder = new 
TestOrcLargeStripe.RangeBuilder();
+    mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0])));
+    BufferChunkList rangeList = rangeBuilder.build();
+
+    RecordReaderUtils.ChunkReader chunkReader = 
RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728);
+    long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0];
+    long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - 
e[0])).sum();
+    assertEquals(chunkReader.getReadBytes(), readBytes);
+    assertEquals(chunkReader.getReqBytes(), reqBytes);
+  }
+
+  @Test
+  public void testChunkReaderCreateReqBytesAndReadBytesValidation() {
+    BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder()
+      .range(0, IOUtils.MAX_ARRAY_SIZE)
+      .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1)
+      .range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4)
+      .range(3L * IOUtils.MAX_ARRAY_SIZE, 2)
+      .build();
+
+    // reqBytes,readBytes boundary value
+    RecordReaderUtils.ChunkReader chunkReader = 
RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0);
+    assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE);
+    assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE);
+
+    // reqBytes > IOUtils.MAX_ARRAY_SIZE validation
+    assertThrowsExactly(IllegalArgumentException.class,
+      () -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0),
+      () -> String.format("invalid reqBytes value %d,out of bounds %d",
+                          rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE)
+    );
+
+    // readBytes > IOUtils.MAX_ARRAY_SIZE validation
+    assertThrowsExactly(IllegalArgumentException.class,
+      () -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100),
+      () -> String.format("invalid readBytes value %d,out of bounds %d",
+                          rangeList.get(3).getEnd() - 
rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE)
+    );
+  }
+
   private static byte[] byteBufferToArray(ByteBuffer buf) {
     byte[] resultArray = new byte[buf.remaining()];
     ByteBuffer buffer = buf.slice();

Reply via email to