[orc] branch master updated: ORC-709: FIX Boolean to StringGroup schema evolution (#594)

dongjoon Wed, 30 Dec 2020 02:28:38 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git



The following commit(s) were added to refs/heads/master by this push:
     new 40495ba  ORC-709: FIX Boolean to StringGroup schema evolution (#594)
40495ba is described below

commit 40495baa71fe4ce267e77e27f3ee4cd2c1b9b57e
Author: Panagiotis Garefalakis <[email protected]>
AuthorDate: Wed Dec 30 12:28:26 2020 +0200

    ORC-709: FIX Boolean to StringGroup schema evolution (#594)
    
    ### What changes were proposed in this pull request?
    Special ConvertTreeReader for Boolean using 
StringGroupFromAnyIntegerTreeReader for String/Char/Varchar types
    
    ### Why are the changes needed?
    Properly handle Boolean to String/Char/Varchar conversions
    
    ### How was this patch tested?
    TestSchemaEvolution.testBooleanToStringEvolution
---
 .../apache/orc/impl/ConvertTreeReaderFactory.java  | 64 ++++++++++++++++++++--
 .../org/apache/orc/impl/TestSchemaEvolution.java   | 31 +++++++++++
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git 
a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java 
b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index a1c0d6b..b2b651b 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -1774,12 +1774,64 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
     }
   }
 
+  private static TypeReader createBooleanConvertTreeReader(int columnId,
+                                                           TypeDescription 
fileType,
+                                                           TypeDescription 
readerType,
+                                                           Context context) 
throws IOException {
+
+    // CONVERT from BOOLEAN to schema type.
+    //
+    switch (readerType.getCategory()) {
+
+    case BOOLEAN:
+    case BYTE:
+    case SHORT:
+    case INT:
+    case LONG:
+      if (fileType.getCategory() == readerType.getCategory()) {
+        throw new IllegalArgumentException("No conversion of type " +
+            readerType.getCategory() + " to self needed");
+      }
+      return new AnyIntegerFromAnyIntegerTreeReader(columnId, fileType, 
readerType,
+          context);
+
+    case FLOAT:
+    case DOUBLE:
+      return new DoubleFromAnyIntegerTreeReader(columnId, fileType, context);
+
+    case DECIMAL:
+      return new DecimalFromAnyIntegerTreeReader(columnId, fileType, context);
+
+    case STRING:
+    case CHAR:
+    case VARCHAR:
+      return new StringGroupFromBooleanTreeReader(columnId, fileType, 
readerType,
+          context);
+
+    case TIMESTAMP:
+    case TIMESTAMP_INSTANT:
+      return new TimestampFromAnyIntegerTreeReader(columnId, fileType, context,
+          readerType.getCategory() == Category.TIMESTAMP_INSTANT);
+
+    // Not currently supported conversion(s):
+    case BINARY:
+    case DATE:
+    case STRUCT:
+    case LIST:
+    case MAP:
+    case UNION:
+    default:
+      throw new IllegalArgumentException("Unsupported type " +
+          readerType.getCategory());
+    }
+  }
+
   private static TypeReader createAnyIntegerConvertTreeReader(int columnId,
                                                               TypeDescription 
fileType,
                                                               TypeDescription 
readerType,
                                                               Context context) 
throws IOException {
 
-    // CONVERT from (BOOLEAN, BYTE, SHORT, INT, LONG) to schema type.
+    // CONVERT from (BYTE, SHORT, INT, LONG) to schema type.
     //
     switch (readerType.getCategory()) {
 
@@ -2065,7 +2117,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
                                                           TypeDescription 
readerType,
                                                           Context context) 
throws IOException {
 
-    // CONVERT from DATE to schema type.
+    // CONVERT from BINARY to schema type.
     switch (readerType.getCategory()) {
 
     case STRING:
@@ -2145,7 +2197,8 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
    *   DecimalFromStringGroupTreeReader (written)
    *
    * To STRING, CHAR, VARCHAR:
-   *   Convert from (BOOLEAN, BYTE, SHORT, INT, LONG) using to string 
conversion
+   *   Convert from (BYTE, SHORT, INT, LONG) using to string conversion
+   *   Convert from BOOLEAN using boolean (True/False) conversion
    *   Convert from (FLOAT, DOUBLE) using to string conversion
    *   Convert from DECIMAL using HiveDecimal.toString
    *   Convert from CHAR by stripping pads
@@ -2155,6 +2208,7 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
    *   Convert from BINARY using Text.decode
    *
    *   StringGroupFromAnyIntegerTreeReader (written)
+   *   StringGroupFromBooleanTreeReader (written)
    *   StringGroupFromFloatTreeReader (written)
    *   StringGroupFromDoubleTreeReader (written)
    *   StringGroupFromDecimalTreeReader (written)
@@ -2233,13 +2287,15 @@ public class ConvertTreeReaderFactory extends 
TreeReaderFactory {
 
     switch (fileType.getCategory()) {
 
-    case BOOLEAN:
     case BYTE:
     case SHORT:
     case INT:
     case LONG:
       return createAnyIntegerConvertTreeReader(columnId, fileType, readerType, 
context);
 
+    case BOOLEAN:
+      return createBooleanConvertTreeReader(columnId, fileType, readerType, 
context);
+
     case FLOAT:
     case DOUBLE:
       return createDoubleConvertTreeReader(columnId, fileType, readerType, 
context);
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java 
b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 2ba72bd..8e7de66 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -580,6 +580,37 @@ public class TestSchemaEvolution {
   }
 
   @Test
+  public void testBooleanToStringEvolution() throws Exception {
+    testFilePath = new Path(workDir, "TestSchemaEvolution." +
+      testCaseName.getMethodName() + ".orc");
+    TypeDescription schema = TypeDescription.createBoolean();
+    Writer writer = OrcFile.createWriter(testFilePath,
+      OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000)
+        .bufferSize(10000));
+    VectorizedRowBatch batch = new VectorizedRowBatch(1, 1024);
+    LongColumnVector lcv = new LongColumnVector(1024);
+    batch.cols[0] = lcv;
+    batch.reset();
+    batch.size = 3;
+    lcv.vector[0] = 1L; // True
+    lcv.vector[1] = 0L; // False
+    lcv.vector[2] = 1L; // True
+    writer.addRowBatch(batch);
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath,
+      OrcFile.readerOptions(conf).filesystem(fs));
+    TypeDescription schemaOnRead = TypeDescription.createString();
+    RecordReader rows = reader.rows(reader.options().schema(schemaOnRead));
+    batch = schemaOnRead.createRowBatch();
+    rows.nextBatch(batch);
+    assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(0));
+    assertEquals("FALSE", ((BytesColumnVector) batch.cols[0]).toString(1));
+    assertEquals("TRUE", ((BytesColumnVector) batch.cols[0]).toString(2));
+    rows.close();
+  }
+
+  @Test
   public void testCharToStringEvolution() throws IOException {
     TypeDescription fileType = 
TypeDescription.fromString("struct<x:char(10)>");
     TypeDescription readType = TypeDescription.fromString("struct<x:string>");

[orc] branch master updated: ORC-709: FIX Boolean to StringGroup schema evolution (#594)

Reply via email to