This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 85b3660 ORC-670. RecordReaderImpl.findColumns should respect
orc.schema.evolution.case.sensitive
85b3660 is described below
commit 85b36607c35579fdfb6879eaea0bf92786d8eb89
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun Oct 4 23:14:13 2020 -0700
ORC-670. RecordReaderImpl.findColumns should respect
orc.schema.evolution.case.sensitive
Fixes #548
Signed-off-by: Owen O'Malley <[email protected]>
---
.../core/src/java/org/apache/orc/TypeDescription.java | 8 +++++++-
.../src/java/org/apache/orc/impl/ParserUtils.java | 19 ++++++++++++++++++-
.../java/org/apache/orc/impl/RecordReaderImpl.java | 4 ++--
.../src/java/org/apache/orc/impl/SchemaEvolution.java | 2 +-
.../org/apache/orc/impl/TestRecordReaderImpl.java | 10 ++++++++++
5 files changed, 38 insertions(+), 5 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/TypeDescription.java
b/java/core/src/java/org/apache/orc/TypeDescription.java
index ebdbb1c..72beb45 100644
--- a/java/core/src/java/org/apache/orc/TypeDescription.java
+++ b/java/core/src/java/org/apache/orc/TypeDescription.java
@@ -818,8 +818,14 @@ public class TypeDescription
* @return the subtype
*/
public TypeDescription findSubtype(String columnName) {
+ return findSubtype(columnName, true);
+ }
+
+ public TypeDescription findSubtype(String columnName,
+ boolean isSchemaEvolutionCaseAware) {
ParserUtils.StringPosition source = new
ParserUtils.StringPosition(columnName);
- TypeDescription result = ParserUtils.findSubtype(this, source);
+ TypeDescription result = ParserUtils.findSubtype(this, source,
+ isSchemaEvolutionCaseAware);
if (source.hasCharactersLeft()) {
throw new IllegalArgumentException("Remaining text in parsing field name
"
+ source);
diff --git a/java/core/src/java/org/apache/orc/impl/ParserUtils.java
b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
index c6a31ad..1231818 100644
--- a/java/core/src/java/org/apache/orc/impl/ParserUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/ParserUtils.java
@@ -246,6 +246,12 @@ public class ParserUtils {
public static TypeDescription findSubtype(TypeDescription schema,
ParserUtils.StringPosition source)
{
+ return findSubtype(schema, source, true);
+ }
+
+ public static TypeDescription findSubtype(TypeDescription schema,
+ ParserUtils.StringPosition source,
+ boolean
isSchemaEvolutionCaseAware) {
List<String> names = ParserUtils.splitName(source);
if (names.size() == 1 && INTEGER_PATTERN.matcher(names.get(0)).matches()) {
return schema.findSubtype(Integer.parseInt(names.get(0)));
@@ -256,7 +262,18 @@ public class ParserUtils {
String first = names.remove(0);
switch (current.getCategory()) {
case STRUCT: {
- int posn = current.getFieldNames().indexOf(first);
+ int posn = -1;
+ if (isSchemaEvolutionCaseAware) {
+ posn = current.getFieldNames().indexOf(first);
+ } else {
+ // Case-insensitive search like ORC 1.5
+ for (int i = 0; i < current.getFieldNames().size(); i++) {
+ if (current.getFieldNames().get(i).equalsIgnoreCase(first)) {
+ posn = i;
+ break;
+ }
+ }
+ }
if (posn == -1) {
throw new IllegalArgumentException("Field " + first +
" not found in " + current.toString());
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 2c0998a..65c9c0f 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -107,8 +107,8 @@ public class RecordReaderImpl implements RecordReader {
static int findColumns(SchemaEvolution evolution,
String columnName) {
try {
- TypeDescription readerColumn =
- evolution.getReaderBaseSchema().findSubtype(columnName);
+ TypeDescription readerColumn =
evolution.getReaderBaseSchema().findSubtype(
+ columnName, evolution.isSchemaEvolutionCaseAware);
TypeDescription fileColumn = evolution.getFileType(readerColumn);
return fileColumn == null ? -1 : fileColumn.getId();
} catch (IllegalArgumentException e) {
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index d2df1b9..fa13e26 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -49,7 +49,7 @@ public class SchemaEvolution {
private boolean hasConversion;
private boolean isOnlyImplicitConversion;
private final boolean isAcid;
- private final boolean isSchemaEvolutionCaseAware;
+ final boolean isSchemaEvolutionCaseAware;
/**
* {@code true} if acid metadata columns should be decoded otherwise they
will
* be set to {@code null}. {@link #acidEventFieldNames}.
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 9f3ef90..b11036e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -113,6 +113,16 @@ public class TestRecordReaderImpl {
}
@Test
+ public void testFindColumnCaseInsensitively() throws Exception {
+ Configuration conf = new Configuration();
+ TypeDescription file = TypeDescription.fromString("struct<A:int>");
+ TypeDescription reader = TypeDescription.fromString("struct<a:int>");
+ conf.setBoolean("orc.schema.evolution.case.sensitive", false);
+ SchemaEvolution evo = new SchemaEvolution(file, reader, new
Reader.Options(conf));
+ assertEquals(1, RecordReaderImpl.findColumns(evo, "A"));
+ }
+
+ @Test
public void testForcePositionalEvolution() throws Exception {
Configuration conf = new Configuration();