This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new 354076a35 ORC-1205: `nextVector` should invoke `ensureSize` when
reusing vectors
354076a35 is described below
commit 354076a350eda3af5d5986e5444d8b67d0afdd83
Author: Laszlo Bodor <[email protected]>
AuthorDate: Sat Jun 25 06:25:02 2022 -0700
ORC-1205: `nextVector` should invoke `ensureSize` when reusing vectors
### What changes were proposed in this pull request?
The fix simply ensures the size of some batches in ConvertTreeReader
instances without API change.
### Why are the changes needed?
As described on jira, if there are batches of increasing size while
reading, if they are in different stripes, we can hit an edge case where
nextBatch doesn't force them to ensure the size of some batches.
### How was this patch tested?
Unit tests added for all possible ConvertTreeReader, also tested in hive
locally.
Closes #1168 from abstractdog/ORC-1205-branch-1.7.
Authored-by: Laszlo Bodor <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../apache/orc/impl/ConvertTreeReaderFactory.java | 52 ++++
.../orc/impl/TestConvertTreeReaderFactory.java | 284 +++++++++++++++++++--
2 files changed, 320 insertions(+), 16 deletions(-)
diff --git
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index 7ead469df..477cee155 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -462,6 +462,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
doubleColVector = new DoubleColumnVector(batchSize);
longColVector = (LongColumnVector) previousVector;
+ } else {
+ doubleColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(doubleColVector, isNull, batchSize, filterContext,
readPhase);
@@ -548,6 +550,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
decimalColVector = new DecimalColumnVector(batchSize, precision,
scale);
longColVector = (LongColumnVector) previousVector;
+ } else {
+ decimalColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(decimalColVector, isNull, batchSize,
filterContext, readPhase);
@@ -589,6 +593,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
bytesColVector = new BytesColumnVector(batchSize);
longColVector = (LongColumnVector) previousVector;
+ } else {
+ bytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(bytesColVector, isNull, batchSize, filterContext,
readPhase);
@@ -626,6 +632,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
timestampColVector = new TimestampColumnVector(batchSize);
longColVector = (LongColumnVector) previousVector;
+ } else {
+ timestampColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(timestampColVector, isNull, batchSize,
filterContext, readPhase);
@@ -666,6 +674,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new LongColumnVector(batchSize);
doubleColVector = (DoubleColumnVector) previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(longColVector, isNull, batchSize, filterContext,
readPhase);
@@ -704,6 +714,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
decimalColVector = new DecimalColumnVector(batchSize, precision,
scale);
doubleColVector = (DoubleColumnVector) previousVector;
+ } else {
+ decimalColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(decimalColVector, isNull, batchSize,
filterContext, readPhase);
@@ -743,6 +755,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
bytesColVector = new BytesColumnVector(batchSize);
doubleColVector = (DoubleColumnVector) previousVector;
+ } else {
+ bytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(bytesColVector, isNull, batchSize, filterContext,
readPhase);
@@ -781,6 +795,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
timestampColVector = new TimestampColumnVector(batchSize);
doubleColVector = (DoubleColumnVector) previousVector;
+ } else {
+ timestampColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(timestampColVector, isNull, batchSize,
filterContext, readPhase);
@@ -844,6 +860,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new LongColumnVector(batchSize);
decimalColVector = previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(longColVector, isNull, batchSize, filterContext,
readPhase);
@@ -892,6 +910,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
doubleColVector = new DoubleColumnVector(batchSize);
decimalColVector = previousVector;
+ } else {
+ doubleColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(doubleColVector, isNull, batchSize, filterContext,
readPhase);
@@ -936,6 +956,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
bytesColVector = new BytesColumnVector(batchSize);
decimalColVector = previousVector;
+ } else {
+ bytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(bytesColVector, isNull, batchSize, filterContext,
readPhase);
@@ -982,6 +1004,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
timestampColVector = new TimestampColumnVector(batchSize);
decimalColVector = previousVector;
+ } else {
+ timestampColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(timestampColVector, isNull, batchSize,
filterContext, readPhase);
@@ -1028,6 +1052,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
fileDecimalColVector = new DecimalColumnVector(batchSize,
filePrecision, fileScale);
decimalColVector = previousVector;
+ } else {
+ fileDecimalColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(fileDecimalColVector, isNull, batchSize,
filterContext, readPhase);
@@ -1064,6 +1090,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new LongColumnVector(batchSize);
bytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(longColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1125,6 +1153,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
doubleColVector = new DoubleColumnVector(batchSize);
bytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ doubleColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(doubleColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1175,6 +1205,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
decimalColVector = new DecimalColumnVector(batchSize, precision,
scale);
bytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ decimalColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(decimalColVector, isNull, batchSize,
filterContext, readPhase);
@@ -1298,6 +1330,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
timestampColVector = new TimestampColumnVector(batchSize);
bytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ timestampColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(timestampColVector, isNull, batchSize,
filterContext, readPhase);
@@ -1337,6 +1371,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new DateColumnVector(batchSize);
bytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(longColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1427,6 +1463,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
inBytesColVector = new BytesColumnVector(batchSize);
outBytesColVector = (BytesColumnVector) previousVector;
+ } else {
+ inBytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(inBytesColVector, isNull, batchSize,
filterContext, readPhase);
@@ -1472,6 +1510,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new LongColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
timestampColVector.changeCalendar(fileUsedProlepticGregorian, false);
// Read present/isNull stream
@@ -1534,6 +1574,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
doubleColVector = new DoubleColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ } else {
+ doubleColVector.ensureSize(batchSize, false);
}
timestampColVector.changeCalendar(fileUsedProlepticGregorian, false);
// Read present/isNull stream
@@ -1594,6 +1636,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
decimalColVector = new DecimalColumnVector(batchSize, precision,
scale);
timestampColVector = (TimestampColumnVector) previousVector;
+ } else {
+ decimalColVector.ensureSize(batchSize, false);
}
timestampColVector.changeCalendar(fileUsedProlepticGregorian, false);
// Read present/isNull stream
@@ -1653,6 +1697,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
bytesColVector = new BytesColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ } else {
+ bytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(bytesColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1697,6 +1743,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
// Allocate column vector for file; cast column vector for reader.
longColVector = new DateColumnVector(batchSize);
timestampColVector = (TimestampColumnVector) previousVector;
+ } else {
+ longColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(longColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1750,6 +1798,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
" proleptic Gregorian
dates.");
}
}
+ } else {
+ bytesColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(bytesColVector, isNull, batchSize, filterContext,
readPhase);
@@ -1798,6 +1848,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
throw new IllegalArgumentException("Can't use LongColumnVector with"
+
" proleptic Gregorian
dates.");
}
+ } else {
+ timestampColVector.ensureSize(batchSize, false);
}
// Read present/isNull stream
fromReader.nextVector(timestampColVector, isNull, batchSize,
filterContext, readPhase);
diff --git
a/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
b/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
index 684d26039..4efe79d0c 100644
--- a/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
+++ b/java/core/src/test/org/apache/orc/impl/TestConvertTreeReaderFactory.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
+import org.apache.orc.OrcFile.WriterOptions;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TestProlepticConversions;
@@ -62,6 +63,8 @@ public class TestConvertTreeReaderFactory {
private FileSystem fs;
private Path testFilePath;
private int LARGE_BATCH_SIZE;
+ private static final int INCREASING_BATCH_SIZE_FIRST = 30;
+ private static final int INCREASING_BATCH_SIZE_SECOND = 50;
@BeforeEach
public void setupPath(TestInfo testInfo) throws Exception {
@@ -92,22 +95,7 @@ public class TestConvertTreeReaderFactory {
TExpectedColumnVector dcv = (TExpectedColumnVector) (listCol).child;
batch.size = 1;
for (int row = 0; row < LARGE_BATCH_SIZE; ++row) {
- if (dcv instanceof DecimalColumnVector) {
- ((DecimalColumnVector) dcv).set(row, HiveDecimal.create(row * 2 + 1));
- } else if (dcv instanceof DoubleColumnVector) {
- ((DoubleColumnVector) dcv).vector[row] = row * 2 + 1;
- } else if (dcv instanceof BytesColumnVector) {
- ((BytesColumnVector) dcv).setVal(row, ((row * 2 + 1) +
"").getBytes(StandardCharsets.UTF_8));
- } else if (dcv instanceof LongColumnVector) {
- ((LongColumnVector) dcv).vector[row] = row * 2 + 1;
- } else if (dcv instanceof TimestampColumnVector) {
- ((TimestampColumnVector) dcv).set(row, Timestamp.valueOf((1900 + row)
+ "-04-01 12:34:56.9"));
- } else if (dcv instanceof DateColumnVector) {
- String date = String.format("%04d-01-23", row * 2 + 1);
- ((DateColumnVector) dcv).vector[row] =
TimeUnit.MILLISECONDS.toDays(dateFormat.parse(date).getTime());
- } else {
- throw new IllegalStateException("Writing File with a large array of "+
expectedColumnType + " is not supported!");
- }
+ setElementInVector(expectedColumnType, dateFormat, dcv, row);
}
listCol.childCount = 1;
@@ -120,6 +108,65 @@ public class TestConvertTreeReaderFactory {
return (TExpectedColumnVector) ((ListColumnVector) batch.cols[0]).child;
}
+ public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector
createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(
+ TypeDescription schema, Class<TExpectedColumnVector> typeClass, boolean
useDecimal64)
+ throws IOException, ParseException {
+ conf = new Configuration();
+ fs = FileSystem.getLocal(conf);
+ fs.setWorkingDirectory(workDir);
+ WriterOptions options = OrcFile.writerOptions(conf);
+ Writer w = OrcFile.createWriter(testFilePath, options.setSchema(schema));
+
+ SimpleDateFormat dateFormat =
TestProlepticConversions.createParser("yyyy-MM-dd", new GregorianCalendar());
+ VectorizedRowBatch batch = schema.createRowBatch(
+ useDecimal64 ? TypeDescription.RowBatchVersion.USE_DECIMAL64 :
TypeDescription.RowBatchVersion.ORIGINAL,
+ INCREASING_BATCH_SIZE_FIRST);
+
+ TExpectedColumnVector columnVector = (TExpectedColumnVector) batch.cols[0];
+ batch.size = INCREASING_BATCH_SIZE_FIRST;
+ for (int row = 0; row < INCREASING_BATCH_SIZE_FIRST; ++row) {
+ setElementInVector(typeClass, dateFormat, columnVector, row);
+ }
+
+ w.addRowBatch(batch);
+ w.writeIntermediateFooter(); //forcing a new stripe
+
+ batch = schema.createRowBatch(
+ useDecimal64 ? TypeDescription.RowBatchVersion.USE_DECIMAL64 :
TypeDescription.RowBatchVersion.ORIGINAL,
+ INCREASING_BATCH_SIZE_SECOND);
+
+ columnVector = (TExpectedColumnVector) batch.cols[0];
+ batch.size = INCREASING_BATCH_SIZE_SECOND;
+ for (int row = 0; row < INCREASING_BATCH_SIZE_SECOND; ++row) {
+ setElementInVector(typeClass, dateFormat, columnVector, row);
+ }
+
+ w.addRowBatch(batch);
+ w.close();
+ return (TExpectedColumnVector) batch.cols[0];
+ }
+
+ private void setElementInVector(
+ Class<?> expectedColumnType, SimpleDateFormat dateFormat, ColumnVector
dcv, int row)
+ throws ParseException {
+ if (dcv instanceof DecimalColumnVector) {
+ ((DecimalColumnVector) dcv).set(row, HiveDecimal.create(row * 2 + 1));
+ } else if (dcv instanceof DoubleColumnVector) {
+ ((DoubleColumnVector) dcv).vector[row] = row * 2 + 1;
+ } else if (dcv instanceof BytesColumnVector) {
+ ((BytesColumnVector) dcv).setVal(row, ((row * 2 + 1) +
"").getBytes(StandardCharsets.UTF_8));
+ } else if (dcv instanceof LongColumnVector) {
+ ((LongColumnVector) dcv).vector[row] = row * 2 + 1;
+ } else if (dcv instanceof TimestampColumnVector) {
+ ((TimestampColumnVector) dcv).set(row, Timestamp.valueOf((1900 + row) +
"-04-01 12:34:56.9"));
+ } else if (dcv instanceof DateColumnVector) {
+ String date = String.format("%04d-01-23", row * 2 + 1);
+ ((DateColumnVector) dcv).vector[row] =
TimeUnit.MILLISECONDS.toDays(dateFormat.parse(date).getTime());
+ } else {
+ throw new IllegalStateException("Writing File with a large array of "+
expectedColumnType + " is not supported!");
+ }
+ }
+
public <TExpectedColumnVector extends ColumnVector> TExpectedColumnVector
readORCFileWithLargeArray(
String typeString, Class<TExpectedColumnVector> expectedColumnType)
throws Exception {
Reader.Options options = new Reader.Options();
@@ -143,6 +190,31 @@ public class TestConvertTreeReaderFactory {
return (TExpectedColumnVector) ((ListColumnVector) batch.cols[0]).child;
}
+ public void readORCFileIncreasingBatchSize(String typeString, Class<?>
expectedColumnType) throws Exception {
+ Reader.Options options = new Reader.Options();
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeString + ">");
+ options.schema(schema);
+ String expected = options.toString();
+
+ Configuration conf = new Configuration();
+
+ Reader reader = OrcFile.createReader(testFilePath,
OrcFile.readerOptions(conf));
+ RecordReader rows = reader.rows(options);
+ VectorizedRowBatch batch = schema.createRowBatchV2();
+
+ rows.nextBatch(batch);
+ assertEquals(INCREASING_BATCH_SIZE_FIRST , batch.size);
+ assertEquals(expected, options.toString());
+ assertEquals(batch.cols.length, 1);
+ assertEquals(batch.cols[0].getClass(), expectedColumnType);
+
+ rows.nextBatch(batch);
+ assertEquals(INCREASING_BATCH_SIZE_SECOND , batch.size);
+ assertEquals(expected, options.toString());
+ assertEquals(batch.cols.length, 1);
+ assertEquals(batch.cols[0].getClass(), expectedColumnType);
+ }
+
public void testConvertToDecimal() throws Exception {
Decimal64ColumnVector columnVector =
readORCFileWithLargeArray("decimal(6,1)", Decimal64ColumnVector.class);
@@ -344,9 +416,189 @@ public class TestConvertTreeReaderFactory {
// check ConvertTreeReaderFactory.createDateConvertTreeReader
testConvertToVarchar();
testConvertToTimestamp();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testDecimalVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "decimal(6,1)";
+ Class typeClass = DecimalColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass, typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToIntegerIncreasingSize();
+ testConvertToDoubleIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ testConvertToDecimalIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testDecimal64VectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "decimal(6,1)";
+ Class typeClass = Decimal64ColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToIntegerIncreasingSize();
+ testConvertToDoubleIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ testConvertToDecimalIncreasingSize();
} finally {
// Make sure we delete file across tests
fs.delete(testFilePath, false);
}
}
+
+ @Test
+ public void testStringVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "varchar(10)";
+ Class typeClass = BytesColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToIntegerIncreasingSize();
+ testConvertToDoubleIncreasingSize();
+ testConvertToDecimalIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToBinaryIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ testConvertToDateIncreasingSize();
+ } finally {
+ // Make sure we delete file across tests
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ public void testBinaryVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "binary";
+ Class typeClass = BytesColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToVarcharIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testDoubleVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "double";
+ Class typeClass = DoubleColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToDoubleIncreasingSize();
+ testConvertToIntegerIncreasingSize();
+ testConvertToFloatIncreasingSize();
+ testConvertToDecimalIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testIntVectorIncreasingSizeInDifferentStripes() throws Exception
{
+ String typeStr = "int";
+ Class typeClass = LongColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToIntegerIncreasingSize();
+ testConvertToDoubleIncreasingSize();
+ testConvertToDecimalIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testTimestampVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "timestamp";
+ Class typeClass = TimestampColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToIntegerIncreasingSize();
+ testConvertToDoubleIncreasingSize();
+ testConvertToDecimalIncreasingSize();
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ testConvertToDateIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ @Test
+ public void testDateVectorIncreasingSizeInDifferentStripes() throws
Exception {
+ String typeStr = "date";
+ Class typeClass = DateColumnVector.class;
+
+ TypeDescription schema = TypeDescription.fromString("struct<col1:" +
typeStr + ">");
+ createORCFileWithBatchesOfIncreasingSizeInDifferentStripes(schema,
typeClass,
+ typeClass.equals(Decimal64ColumnVector.class));
+ try {
+ testConvertToVarcharIncreasingSize();
+ testConvertToTimestampIncreasingSize();
+ } finally {
+ fs.delete(testFilePath, false);
+ }
+ }
+
+ private void testConvertToDoubleIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("double", DoubleColumnVector.class);
+ }
+
+ private void testConvertToIntegerIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("int", LongColumnVector.class);
+ }
+
+ private void testConvertToFloatIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("float", DoubleColumnVector.class);
+ }
+
+ public void testConvertToDecimalIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("decimal(6,1)",
Decimal64ColumnVector.class);
+ }
+
+ private void testConvertToVarcharIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("varchar(10)", BytesColumnVector.class);
+ }
+
+ private void testConvertToTimestampIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("timestamp", TimestampColumnVector.class);
+ }
+
+ private void testConvertToDateIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("date", DateColumnVector.class);
+ }
+
+ private void testConvertToBinaryIncreasingSize() throws Exception {
+ readORCFileIncreasingBatchSize("binary", BytesColumnVector.class);
+ }
}