This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push:
new 72aaadc ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl
(#555)
72aaadc is described below
commit 72aaadc45060d61cc824d6207b64b366fdb3853c
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Tue Oct 27 19:36:31 2020 -0700
ORC-676. Add getRawDataSizeFromColIndices back to ReaderImpl (#555)
ORC-676: Add getRawDataSizeFromColIndices back to ReaderImpl
This closes #555
(cherry picked from commit d068d50659c22cad2c16a0871a1b17e77a52b215)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../src/java/org/apache/orc/impl/ReaderImpl.java | 16 +++++++++++++++
.../test/org/apache/orc/impl/TestReaderImpl.java | 24 ++++++++++++++++++++++
2 files changed, 40 insertions(+)
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index e062239..11cdefd 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -791,6 +791,22 @@ public class ReaderImpl implements Reader {
return getRawDataSizeFromColIndices(include, schema, fileStats);
}
+ public static long getRawDataSizeFromColIndices(
+ List<Integer> colIndices,
+ List<OrcProto.Type> types,
+ List<OrcProto.ColumnStatistics> stats)
+ throws FileFormatException {
+ TypeDescription schema = OrcUtils.convertTypeFromProtobuf(types, 0);
+ boolean[] include = new boolean[schema.getMaximumId() + 1];
+ for(Integer rootId: colIndices) {
+ TypeDescription root = schema.findSubtype(rootId);
+ for(int c = root.getId(); c <= root.getMaximumId(); ++c) {
+ include[c] = true;
+ }
+ }
+ return getRawDataSizeFromColIndices(include, schema, stats);
+ }
+
static long getRawDataSizeFromColIndices(boolean[] include,
TypeDescription schema,
List<OrcProto.ColumnStatistics>
stats) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
index 5e8c9cf..8e5085b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
@@ -44,11 +44,13 @@ import org.apache.hadoop.util.Progressable;
import org.apache.orc.FileFormatException;
import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
+import org.apache.orc.OrcUtils;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.StripeStatistics;
import org.apache.orc.TestVectorOrcFile;
import org.apache.orc.TimestampColumnStatistics;
+import org.apache.orc.TypeDescription;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@@ -368,4 +370,26 @@ public class TestReaderImpl {
assertEquals(-28550000, tsStats.getMaximumUtc());
}
}
+
+ @Test
+ public void testGetRawDataSizeFromColIndices() throws Exception {
+ Configuration conf = new Configuration();
+ Path path = new Path(workDir, "orc_split_elim_new.orc");
+ FileSystem fs = path.getFileSystem(conf);
+ try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
+ OrcFile.readerOptions(conf).filesystem(fs))) {
+ TypeDescription schema = reader.getSchema();
+ List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
+ boolean[] include = new boolean[schema.getMaximumId() + 1];
+ List<Integer> list = new ArrayList<Integer>();
+ for (int i = 0; i < include.length; i++) {
+ include[i] = true;
+ list.add(i);
+ }
+ List<OrcProto.ColumnStatistics> stats =
reader.getFileTail().getFooter().getStatisticsList();
+ assertEquals(
+ ReaderImpl.getRawDataSizeFromColIndices(include, schema, stats),
+ ReaderImpl.getRawDataSizeFromColIndices(list, types, stats));
+ }
+ }
}