This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new 1593a9e  ORC-1121: Fix column coversion check bug which causes column 
filters don't work (#1055)
1593a9e is described below

commit 1593a9edcdaea0b8e56cd7a023153b499bfb8c2e
Author: shipenglei <shipengle...@163.com>
AuthorDate: Wed Mar 9 04:41:25 2022 +0800

    ORC-1121: Fix column coversion check bug which causes column filters don't 
work (#1055)
    
    ### What changes were proposed in this pull request?
    
    Add a map in `SchemaEvolution` which contains the mapping from the file 
column id to the reader column id, the mapping will be used in 
`SchemaEvolution.isPPDSafeConversion()`
    
    ### Why are the changes needed?
    
    `RecordReaderImpl.pickRowGroups()`  calls 
`SchemaEvolution.isPPDSafeConversion()` with file column id rather than reader 
column id which is required, this causes column filters can't work effectively 
and recordReader can't skip row groups which are not matched, so we need find 
the corresponding reader column id via file column id to ensure 
`SchemaEvolution.isPPDSafeConversion()` can work correctly.
    
    ### How was this patch tested?
    
    UT
    
    (cherry picked from commit e22f5370fa7e579573e1c627cbdfa6173b206b76)
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../java/org/apache/orc/impl/SchemaEvolution.java    | 20 +++++++++++---------
 .../org/apache/orc/impl/TestSchemaEvolution.java     | 10 ++++++++++
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java 
b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 168f391..cb35f2a 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -56,7 +56,7 @@ public class SchemaEvolution {
    */
   private final boolean includeAcidColumns;
 
-  // indexed by reader column id
+  // indexed by file column id
   private final boolean[] ppdSafeConversion;
 
   // columns are indexed, not named between Reader & File schema
@@ -296,13 +296,13 @@ public class SchemaEvolution {
 
   /**
    * Check if column is safe for ppd evaluation
-   * @param colId reader column id
+   * @param fileColId file column id
    * @return true if the specified column is safe for ppd evaluation else false
    */
-  public boolean isPPDSafeConversion(final int colId) {
+  public boolean isPPDSafeConversion(final int fileColId) {
     if (hasConversion()) {
-      return !(colId < 0 || colId >= ppdSafeConversion.length) &&
-          ppdSafeConversion[colId];
+      return !(fileColId < 0 || fileColId >= ppdSafeConversion.length) &&
+          ppdSafeConversion[fileColId];
     }
 
     // when there is no schema evolution PPD is safe
@@ -314,9 +314,9 @@ public class SchemaEvolution {
       return null;
     }
 
-    boolean[] result = new boolean[readerSchema.getMaximumId() + 1];
+    boolean[] result = new boolean[fileSchema.getMaximumId() + 1];
     boolean safePpd = validatePPDConversion(fileSchema, readerSchema);
-    result[readerSchema.getId()] = safePpd;
+    result[fileSchema.getId()] = safePpd;
     return populatePpdSafeConversionForChildren(result,
         readerSchema.getChildren());
   }
@@ -337,12 +337,14 @@ public class SchemaEvolution {
       for (TypeDescription child : children) {
         TypeDescription fileType = getFileType(child.getId());
         safePpd = validatePPDConversion(fileType, child);
-        ppdSafeConversion[child.getId()] = safePpd;
+        if (fileType != null) {
+          ppdSafeConversion[fileType.getId()] = safePpd;
+        }
         populatePpdSafeConversionForChildren(ppdSafeConversion,
             child.getChildren());
       }
     }
-    return  ppdSafeConversion;
+    return ppdSafeConversion;
   }
 
   private boolean validatePPDConversion(final TypeDescription fileType,
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java 
b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 5c2d7d2..3a82fb5 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -853,6 +853,16 @@ public class TestSchemaEvolution {
     assertTrue(both1.isPPDSafeConversion(2));
     assertTrue(both1.isPPDSafeConversion(3));
     assertFalse(both1.isPPDSafeConversion(4));
+
+    // column pruning
+    readerStruct1 = TypeDescription.createStruct()
+        .addField("f2", TypeDescription.createString());
+    both1 = new SchemaEvolution(fileStruct1, readerStruct1, options);
+    assertTrue(both1.hasConversion());
+    assertFalse(both1.isPPDSafeConversion(0));
+    assertFalse(both1.isPPDSafeConversion(1));
+    assertTrue(both1.isPPDSafeConversion(2));
+    assertFalse(both1.isPPDSafeConversion(3));
   }
 
   @Test

Reply via email to