[orc] branch master updated: ORC-670. RecordReaderImpl.findColumns should respect orc.schema.evolution.case.sensitive

omalley Mon, 05 Oct 2020 11:56:17 -0700

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git



The following commit(s) were added to refs/heads/master by this push:
     new 85b3660  ORC-670. RecordReaderImpl.findColumns should respect 
orc.schema.evolution.case.sensitive
85b3660 is described below

commit 85b36607c35579fdfb6879eaea0bf92786d8eb89
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Oct 4 23:14:13 2020 -0700

    ORC-670. RecordReaderImpl.findColumns should respect 
orc.schema.evolution.case.sensitive
    
    Fixes #548
    
    Signed-off-by: Owen O'Malley <[email protected]>
---
 .../core/src/java/org/apache/orc/TypeDescription.java |  8 +++++++-
 .../src/java/org/apache/orc/impl/ParserUtils.java     | 19 ++++++++++++++++++-
 .../java/org/apache/orc/impl/RecordReaderImpl.java    |  4 ++--
 .../src/java/org/apache/orc/impl/SchemaEvolution.java |  2 +-
 .../org/apache/orc/impl/TestRecordReaderImpl.java     | 10 ++++++++++
 5 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java 
b/java/core/src/java/org/apache/orc/TypeDescription.java
index ebdbb1c..72beb45 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -818,8 +818,14 @@ public class TypeDescription
    * @return the subtype
    */
   public TypeDescription findSubtype(String columnName) {
+    return findSubtype(columnName, true);
+  }
+
+  public TypeDescription findSubtype(String columnName,
+      boolean isSchemaEvolutionCaseAware) {
     ParserUtils.StringPosition source = new 
ParserUtils.StringPosition(columnName);
-    TypeDescription result = ParserUtils.findSubtype(this, source);
+    TypeDescription result = ParserUtils.findSubtype(this, source,
+        isSchemaEvolutionCaseAware);
     if (source.hasCharactersLeft()) {
       throw new IllegalArgumentException("Remaining text in parsing field name 
"
           + source);
diff --git a/java/core/src/java/org/apache/orc/impl/ParserUtils.java 
b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
index c6a31ad..1231818 100644
--- a/java/core/src/java/org/apache/orc/impl/ParserUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
@@ -246,6 +246,12 @@ public class ParserUtils {
 
   public static TypeDescription findSubtype(TypeDescription schema,
                                             ParserUtils.StringPosition source) 
{
+    return findSubtype(schema, source, true);
+  }
+
+  public static TypeDescription findSubtype(TypeDescription schema,
+                                            ParserUtils.StringPosition source,
+                                            boolean 
isSchemaEvolutionCaseAware) {
     List<String> names = ParserUtils.splitName(source);
     if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) {
       return schema.findSubtype(Integer.parseInt(names.get(0)));
@@ -256,7 +262,18 @@ public class ParserUtils {
       String first = names.remove(0);
       switch (current.getCategory()) {
         case STRUCT: {
-          int posn = current.getFieldNames().indexOf(first);
+          int posn = -1;
+          if (isSchemaEvolutionCaseAware) {
+            posn = current.getFieldNames().indexOf(first);
+          } else {
+            // Case-insensitive search like ORC 1.5
+            for (int i = 0; i < current.getFieldNames().size(); i++) {
+              if (current.getFieldNames().get(i).equalsIgnoreCase(first)) {
+                posn = i;
+                break;
+              }
+            }
+          }
           if (posn == -1) {
             throw new IllegalArgumentException("Field " + first +
                 " not found in " + current.toString());
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 2c0998a..65c9c0f 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -107,8 +107,8 @@ public class RecordReaderImpl implements RecordReader {
   static int findColumns(SchemaEvolution evolution,
                          String columnName) {
     try {
-      TypeDescription readerColumn =
-          evolution.getReaderBaseSchema().findSubtype(columnName);
+      TypeDescription readerColumn = 
evolution.getReaderBaseSchema().findSubtype(
+          columnName, evolution.isSchemaEvolutionCaseAware);
       TypeDescription fileColumn = evolution.getFileType(readerColumn);
       return fileColumn == null ? -1 : fileColumn.getId();
     } catch (IllegalArgumentException e) {
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java 
b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index d2df1b9..fa13e26 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -49,7 +49,7 @@ public class SchemaEvolution {
   private boolean hasConversion;
   private boolean isOnlyImplicitConversion;
   private final boolean isAcid;
-  private final boolean isSchemaEvolutionCaseAware;
+  final boolean isSchemaEvolutionCaseAware;
   /**
    * {@code true} if acid metadata columns should be decoded otherwise they 
will
    * be set to {@code null}.  {@link #acidEventFieldNames}.
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 9f3ef90..b11036e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -113,6 +113,16 @@ public class TestRecordReaderImpl {
   }
 
   @Test
+  public void testFindColumnCaseInsensitively() throws Exception {
+    Configuration conf = new Configuration();
+    TypeDescription file = TypeDescription.fromString("struct<A:int>");
+    TypeDescription reader = TypeDescription.fromString("struct<a:int>");
+    conf.setBoolean("orc.schema.evolution.case.sensitive", false);
+    SchemaEvolution evo = new SchemaEvolution(file, reader, new 
Reader.Options(conf));
+    assertEquals(1, RecordReaderImpl.findColumns(evo, "A"));
+  }
+
+  @Test
   public void testForcePositionalEvolution() throws Exception {
     Configuration conf = new Configuration();

[orc] branch master updated: ORC-670. RecordReaderImpl.findColumns should respect orc.schema.evolution.case.sensitive

Reply via email to