This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 40495ba ORC-709: FIX Boolean to StringGroup schema evolution (#594)
40495ba is described below
commit 40495baa71fe4ce267e77e27f3ee4cd2c1b9b57e
Author: Panagiotis Garefalakis <[email protected]>
AuthorDate: Wed Dec 30 12:28:26 2020 +0200
ORC-709: FIX Boolean to StringGroup schema evolution (#594)
### What changes were proposed in this pull request?
Special ConvertTreeReader for Boolean using
StringGroupFromAnyIntegerTreeReader for String/Char/Varchar types
### Why are the changes needed?
Properly handle Boolean to String/Char/Varchar conversions
### How was this patch tested?
TestSchemaEvolution.testBooleanToStringEvolution
---
.../apache/orc/impl/ConvertTreeReaderFactory.java | 64 ++++++++++++++++++++--
.../org/apache/orc/impl/TestSchemaEvolution.java | 31 +++++++++++
2 files changed, 91 insertions(+), 4 deletions(-)
diff --git
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index a1c0d6b..b2b651b 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1774,12 +1774,64 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
}
}
+ private static TypeReader createBooleanConvertTreeReader(int columnId,
+ TypeDescription
fileType,
+ TypeDescription
readerType,
+ Context context)
throws IOException {
+
+ // CONVERT from BOOLEAN to schema type.
+ //
+ switch (readerType.getCategory()) {
+
+ case BOOLEAN:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ if (fileType.getCategory() == readerType.getCategory()) {
+ throw new IllegalArgumentException("No conversion of type " +
+ readerType.getCategory() + " to self needed");
+ }
+ return new AnyIntegerFromAnyIntegerTreeReader(columnId, fileType,
readerType,
+ context);
+
+ case FLOAT:
+ case DOUBLE:
+ return new DoubleFromAnyIntegerTreeReader(columnId, fileType, context);
+
+ case DECIMAL:
+ return new DecimalFromAnyIntegerTreeReader(columnId, fileType, context);
+
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ return new StringGroupFromBooleanTreeReader(columnId, fileType,
readerType,
+ context);
+
+ case TIMESTAMP:
+ case TIMESTAMP_INSTANT:
+ return new TimestampFromAnyIntegerTreeReader(columnId, fileType, context,
+ readerType.getCategory() == Category.TIMESTAMP_INSTANT);
+
+ // Not currently supported conversion(s):
+ case BINARY:
+ case DATE:
+ case STRUCT:
+ case LIST:
+ case MAP:
+ case UNION:
+ default:
+ throw new IllegalArgumentException("Unsupported type " +
+ readerType.getCategory());
+ }
+ }
+
private static TypeReader createAnyIntegerConvertTreeReader(int columnId,
TypeDescription
fileType,
TypeDescription
readerType,
Context context)
throws IOException {
- // CONVERT from (BOOLEAN, BYTE, SHORT, INT, LONG) to schema type.
+ // CONVERT from (BYTE, SHORT, INT, LONG) to schema type.
//
switch (readerType.getCategory()) {
@@ -2065,7 +2117,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
TypeDescription
readerType,
Context context)
throws IOException {
- // CONVERT from DATE to schema type.
+ // CONVERT from BINARY to schema type.
switch (readerType.getCategory()) {
case STRING:
@@ -2145,7 +2197,8 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
* DecimalFromStringGroupTreeReader (written)
*
* To STRING, CHAR, VARCHAR:
- * Convert from (BOOLEAN, BYTE, SHORT, INT, LONG) using to string
conversion
+ * Convert from (BYTE, SHORT, INT, LONG) using to string conversion
+ * Convert from BOOLEAN using boolean (True/False) conversion
* Convert from (FLOAT, DOUBLE) using to string conversion
* Convert from DECIMAL using HiveDecimal.toString
* Convert from CHAR by stripping pads
@@ -2155,6 +2208,7 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
* Convert from BINARY using Text.decode
*
* StringGroupFromAnyIntegerTreeReader (written)
+ * StringGroupFromBooleanTreeReader (written)
* StringGroupFromFloatTreeReader (written)
* StringGroupFromDoubleTreeReader (written)
* StringGroupFromDecimalTreeReader (written)
@@ -2233,13 +2287,15 @@ public class ConvertTreeReaderFactory extends
TreeReaderFactory {
switch (fileType.getCategory()) {
- case BOOLEAN:
case BYTE:
case SHORT:
case INT:
case LONG:
return createAnyIntegerConvertTreeReader(columnId, fileType, readerType,
context);
+ case BOOLEAN:
+ return createBooleanConvertTreeReader(columnId, fileType, readerType,
context);
+
case FLOAT:
case DOUBLE:
return createDoubleConvertTreeReader(columnId, fileType, readerType,
context);
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2ba72bd..8e7de66 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -580,6 +580,37 @@ public class TestSchemaEvolution {
}
@Test
+ public void testBooleanToStringEvolution() throws Exception {
+ testFilePath = new Path(workDir, "TestSchemaEvolution." +
+ testCaseName.getMethodName() + ".orc");
+ TypeDescription schema = TypeDescription.createBoolean();
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+ .bufferSize(10000));
+ VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+ LongColumnVector lcv = new LongColumnVector(1024);
+ batch.cols[0] = lcv;
+ batch.reset();
+ batch.size = 3;
+ lcv.vector[0] = 1L; // True
+ lcv.vector[1] = 0L; // False
+ lcv.vector[2] = 1L; // True
+ writer.addRowBatch(batch);
+ writer.close();
+
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+ TypeDescription schemaOnRead = TypeDescription.createString();
+ RecordReader rows = reader.rows(reader.options().schema(schemaOnRead));
+ batch = schemaOnRead.createRowBatch();
+ rows.nextBatch(batch);
+ assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(0));
+ assertEquals("FALSE", ((BytesColumnVector) batch.cols[0]).toString(1));
+ assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(2));
+ rows.close();
+ }
+
+ @Test
public void testCharToStringEvolution() throws IOException {
TypeDescription fileType =
TypeDescription.fromString("struct<x:char(10)>");
TypeDescription readType = TypeDescription.fromString("struct<x:string>");