This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push: new e22f537 ORC-1121: Fix column coversion check bug which causes column filters don't work (#1055) e22f537 is described below commit e22f5370fa7e579573e1c627cbdfa6173b206b76 Author: shipenglei <shipengle...@163.com> AuthorDate: Wed Mar 9 04:41:25 2022 +0800 ORC-1121: Fix column coversion check bug which causes column filters don't work (#1055) ### What changes were proposed in this pull request? Add a map in `SchemaEvolution` which contains the mapping from the file column id to the reader column id, the mapping will be used in `SchemaEvolution.isPPDSafeConversion()` ### Why are the changes needed? `RecordReaderImpl.pickRowGroups()` calls `SchemaEvolution.isPPDSafeConversion()` with file column id rather than reader column id which is required, this causes column filters can't work effectively and recordReader can't skip row groups which are not matched, so we need find the corresponding reader column id via file column id to ensure `SchemaEvolution.isPPDSafeConversion()` can work correctly. ### How was this patch tested? UT --- .../java/org/apache/orc/impl/SchemaEvolution.java | 20 +++++++++++--------- .../org/apache/orc/impl/TestSchemaEvolution.java | 10 ++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java index 168f391..cb35f2a 100644 --- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java +++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java @@ -56,7 +56,7 @@ public class SchemaEvolution { */ private final boolean includeAcidColumns; - // indexed by reader column id + // indexed by file column id private final boolean[] ppdSafeConversion; // columns are indexed, not named between Reader & File schema @@ -296,13 +296,13 @@ public class SchemaEvolution { /** * Check if column is safe for ppd evaluation - * @param colId reader column id + * @param fileColId file column id * @return true if the specified column is safe for ppd evaluation else false */ - public boolean isPPDSafeConversion(final int colId) { + public boolean isPPDSafeConversion(final int fileColId) { if (hasConversion()) { - return !(colId < 0 || colId >= ppdSafeConversion.length) && - ppdSafeConversion[colId]; + return !(fileColId < 0 || fileColId >= ppdSafeConversion.length) && + ppdSafeConversion[fileColId]; } // when there is no schema evolution PPD is safe @@ -314,9 +314,9 @@ public class SchemaEvolution { return null; } - boolean[] result = new boolean[readerSchema.getMaximumId() + 1]; + boolean[] result = new boolean[fileSchema.getMaximumId() + 1]; boolean safePpd = validatePPDConversion(fileSchema, readerSchema); - result[readerSchema.getId()] = safePpd; + result[fileSchema.getId()] = safePpd; return populatePpdSafeConversionForChildren(result, readerSchema.getChildren()); } @@ -337,12 +337,14 @@ public class SchemaEvolution { for (TypeDescription child : children) { TypeDescription fileType = getFileType(child.getId()); safePpd = validatePPDConversion(fileType, child); - ppdSafeConversion[child.getId()] = safePpd; + if (fileType != null) { + ppdSafeConversion[fileType.getId()] = safePpd; + } populatePpdSafeConversionForChildren(ppdSafeConversion, child.getChildren()); } } - return ppdSafeConversion; + return ppdSafeConversion; } private boolean validatePPDConversion(final TypeDescription fileType, diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java index 5c2d7d2..3a82fb5 100644 --- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java +++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java @@ -853,6 +853,16 @@ public class TestSchemaEvolution { assertTrue(both1.isPPDSafeConversion(2)); assertTrue(both1.isPPDSafeConversion(3)); assertFalse(both1.isPPDSafeConversion(4)); + + // column pruning + readerStruct1 = TypeDescription.createStruct() + .addField("f2", TypeDescription.createString()); + both1 = new SchemaEvolution(fileStruct1, readerStruct1, options); + assertTrue(both1.hasConversion()); + assertFalse(both1.isPPDSafeConversion(0)); + assertFalse(both1.isPPDSafeConversion(1)); + assertTrue(both1.isPPDSafeConversion(2)); + assertFalse(both1.isPPDSafeConversion(3)); } @Test