This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new c4bd03a1c ORC-1373: Provide directional exceptions instead of NPE in
case of `DynamicByteArray` length overflow
c4bd03a1c is described below
commit c4bd03a1c7805bfc2088ba31c8bccc986d2d511c
Author: sychen <[email protected]>
AuthorDate: Sun Feb 19 13:33:24 2023 -0800
ORC-1373: Provide directional exceptions instead of NPE in case of
`DynamicByteArray` length overflow
### What changes were proposed in this pull request?
When `DynamicByteArray` calculates `chunkIndex` overflow, it will throw NPE.
We can throw exception to remind users to avoid this problem by configuring
what ORC parameters.
### Why are the changes needed?
When the written string is very large, the grow calculation may overflow,
causing the array data not to be expanded, and then NPE.
org.apache.orc.impl.DynamicByteArray#add(byte[], int, int)
```java
grow((length + valueLength) / chunkSize);
```
#### Log
```java
Caused by: java.lang.NullPointerException
at java.lang.System.arraycopy(Native Method)
at
org.apache.orc.impl.DynamicByteArray.add(DynamicByteArray.java:115)
at
org.apache.orc.impl.StringRedBlackTree.addNewKey(StringRedBlackTree.java:48)
at
org.apache.orc.impl.StringRedBlackTree.add(StringRedBlackTree.java:60)
at
org.apache.orc.impl.writer.StringTreeWriter.writeBatch(StringTreeWriter.java:69)
at
org.apache.orc.impl.writer.StructTreeWriter.writeRootBatch(StructTreeWriter.java:56)
at org.apache.orc.impl.WriterImpl.addRowBatch(WriterImpl.java:696)
```
### How was this patch tested?
#### Local Test
org.apache.orc.impl.TestDynamicArray#testBigByteArray
```java
Test
public void testBigByteArray() {
DynamicByteArray dba = new DynamicByteArray(128, 32 * 1024);
byte[] val = new byte[1024];
dba.add(val, 0, val.length);
byte[] bigVal = new byte[Integer.MAX_VALUE - 16];
dba.add(bigVal, 0, bigVal.length);
}
```
#### Output
```java
java.lang.RuntimeException: chunkIndex overflow:-65535. You can set
orc.column.encoding.direct=columnName, or orc.dictionary.key.threshold=0 to
turn off dictionary encoding.
at
org.apache.orc.impl.DynamicByteArray.grow(DynamicByteArray.java:73)
at
org.apache.orc.impl.DynamicByteArray.add(DynamicByteArray.java:122)
at
org.apache.orc.impl.TestDynamicArray.testBigByteArray(TestDynamicArray.java:36)
```
Closes #1412 from cxzl25/ORC-1373.
Authored-by: sychen <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../java/org/apache/orc/impl/DynamicByteArray.java | 11 ++++++++++-
.../test/org/apache/orc/impl/TestDynamicArray.java | 22 ++++++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/java/core/src/java/org/apache/orc/impl/DynamicByteArray.java
b/java/core/src/java/org/apache/orc/impl/DynamicByteArray.java
index 851f0f144..d88cbcd45 100644
--- a/java/core/src/java/org/apache/orc/impl/DynamicByteArray.java
+++ b/java/core/src/java/org/apache/orc/impl/DynamicByteArray.java
@@ -18,6 +18,7 @@
package org.apache.orc.impl;
import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcConf;
import java.io.IOException;
import java.io.InputStream;
@@ -52,14 +53,22 @@ public final class DynamicByteArray {
/**
* Ensure that the given index is valid.
+ * Throws an exception if chunkIndex is negative.
*/
private void grow(int chunkIndex) {
+ if (chunkIndex < 0) {
+ throw new RuntimeException(String.format("chunkIndex overflow:%d. " +
+ "You can set %s=columnName, or %s=0 to turn off dictionary encoding.",
+ chunkIndex,
+ OrcConf.DIRECT_ENCODING_COLUMNS.getAttribute(),
+ OrcConf.DICTIONARY_KEY_SIZE_THRESHOLD.getAttribute()));
+ }
if (chunkIndex >= initializedChunks) {
if (chunkIndex >= data.length) {
int newSize = Math.max(chunkIndex + 1, 2 * data.length);
data = Arrays.copyOf(data, newSize);
}
- for(int i=initializedChunks; i <= chunkIndex; ++i) {
+ for (int i = initializedChunks; i <= chunkIndex; ++i) {
data[i] = new byte[chunkSize];
}
initializedChunks = chunkIndex + 1;
diff --git a/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java
b/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java
index 1179b333c..5c4f199e6 100644
--- a/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java
+++ b/java/core/src/test/org/apache/orc/impl/TestDynamicArray.java
@@ -22,6 +22,7 @@ import org.junit.jupiter.api.Test;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
public class TestDynamicArray {
@@ -91,4 +92,25 @@ public class TestDynamicArray {
DynamicIntArray dia = new DynamicIntArray();
assertEquals("{}", dia.toString());
}
+
+ @Test
+ public void testByteArrayOverflow() {
+ DynamicByteArray dba = new DynamicByteArray();
+
+ byte[] val = new byte[1024];
+ dba.add(val, 0, val.length);
+
+ byte[] bigVal = new byte[2048];
+ RuntimeException exception = assertThrows(
+ RuntimeException.class,
+ // Need to construct a large array, limited by the heap limit of UT, may
cause OOM.
+ // The add method does not check whether byte[] and length are
consistent,
+ // so it is a bit hacky.
+ () -> dba.add(bigVal, 0, Integer.MAX_VALUE - 16));
+
+ assertEquals("chunkIndex overflow:-65535. " +
+ "You can set orc.column.encoding.direct=columnName, " +
+ "or orc.dictionary.key.threshold=0 to turn off dictionary encoding.",
+ exception.getMessage());
+ }
}