[orc] branch branch-1.6 updated: ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl (#555)

dongjoon Tue, 27 Oct 2020 21:16:25 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git



The following commit(s) were added to refs/heads/branch-1.6 by this push:
     new 72aaadc  ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl 
(#555)
72aaadc is described below

commit 72aaadc45060d61cc824d6207b64b366fdb3853c
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Tue Oct 27 19:36:31 2020 -0700

    ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl (#555)
    
    ORC-676: Add getRawDataSizeFromColIndices back to ReaderImpl
    
    This closes #555
    
    (cherry picked from commit d068d50659c22cad2c16a0871a1b17e77a52b215)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../src/java/org/apache/orc/impl/ReaderImpl.java   | 16 +++++++++++++++
 .../test/org/apache/orc/impl/TestReaderImpl.java   | 24 ++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index e062239..11cdefd 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -791,6 +791,22 @@ public class ReaderImpl implements Reader {
     return getRawDataSizeFromColIndices(include, schema, fileStats);
   }
 
+  public static long getRawDataSizeFromColIndices(
+      List<Integer> colIndices,
+      List<OrcProto.Type> types,
+      List<OrcProto.ColumnStatistics> stats)
+      throws FileFormatException {
+    TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
+    boolean[] include = new boolean[schema.getMaximumId() + 1];
+    for(Integer rootId: colIndices) {
+      TypeDescription root = schema.findSubtype(rootId);
+      for(int c = root.getId(); c <= root.getMaximumId(); ++c) {
+        include[c] = true;
+      }
+    }
+    return getRawDataSizeFromColIndices(include, schema, stats);
+  }
+
   static long getRawDataSizeFromColIndices(boolean[] include,
                                            TypeDescription schema,
                                            List<OrcProto.ColumnStatistics> 
stats) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
index 5e8c9cf..8e5085b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
@@ -44,11 +44,13 @@ import org.apache.hadoop.util.Progressable;
 import org.apache.orc.FileFormatException;
 import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
+import org.apache.orc.OrcUtils;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
 import org.apache.orc.StripeStatistics;
 import org.apache.orc.TestVectorOrcFile;
 import org.apache.orc.TimestampColumnStatistics;
+import org.apache.orc.TypeDescription;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -368,4 +370,26 @@ public class TestReaderImpl {
       assertEquals(-28550000, tsStats.getMaximumUtc());
     }
   }
+
+  @Test
+  public void testGetRawDataSizeFromColIndices() throws Exception {
+    Configuration conf = new Configuration();
+    Path path = new Path(workDir, "orc_split_elim_new.orc");
+    FileSystem fs = path.getFileSystem(conf);
+    try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
+        OrcFile.readerOptions(conf).filesystem(fs))) {
+      TypeDescription schema = reader.getSchema();
+      List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
+      boolean[] include = new boolean[schema.getMaximumId() + 1];
+      List<Integer> list = new ArrayList<Integer>();
+      for (int i = 0; i < include.length; i++) {
+        include[i] = true;
+        list.add(i);
+      }
+      List<OrcProto.ColumnStatistics> stats = 
reader.getFileTail().getFooter().getStatisticsList();
+      assertEquals(
+        ReaderImpl.getRawDataSizeFromColIndices(include, schema, stats),
+        ReaderImpl.getRawDataSizeFromColIndices(list, types, stats));
+    }
+  }
 }

[orc] branch branch-1.6 updated: ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl (#555)

Reply via email to