This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 93d51b9  IcebergGenerics: Support ORC format (#851)
93d51b9 is described below

commit 93d51b94d00e028f93334fa511551f35efb5b639
Author: Xuedong Luan <[email protected]>
AuthorDate: Mon Mar 23 07:35:58 2020 +0800

    IcebergGenerics: Support ORC format (#851)
---
 .../org/apache/iceberg/data/TableScanIterable.java | 10 ++++++++
 .../org/apache/iceberg/data/TestLocalScan.java     | 29 ++++++++++++++++++----
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java 
b/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
index 27a625d..75c479e 100644
--- a/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
+++ b/data/src/main/java/org/apache/iceberg/data/TableScanIterable.java
@@ -35,6 +35,7 @@ import org.apache.iceberg.TableOperations;
 import org.apache.iceberg.TableScan;
 import org.apache.iceberg.avro.Avro;
 import org.apache.iceberg.data.avro.DataReader;
+import org.apache.iceberg.data.orc.GenericOrcReader;
 import org.apache.iceberg.data.parquet.GenericParquetReaders;
 import org.apache.iceberg.exceptions.RuntimeIOException;
 import org.apache.iceberg.expressions.Evaluator;
@@ -42,6 +43,7 @@ import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.io.CloseableGroup;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.orc.ORC;
 import org.apache.iceberg.parquet.Parquet;
 
 class TableScanIterable extends CloseableGroup implements 
CloseableIterable<Record> {
@@ -99,6 +101,14 @@ class TableScanIterable extends CloseableGroup implements 
CloseableIterable<Reco
 
         return parquet.build();
 
+      case ORC:
+        ORC.ReadBuilder orc = ORC.read(input)
+                .schema(projection)
+                .createReaderFunc(fileSchema -> 
GenericOrcReader.buildReader(projection, fileSchema))
+                .split(task.start(), task.length());
+
+        return orc.build();
+
       default:
         throw new UnsupportedOperationException(String.format("Cannot read %s 
file: %s",
             task.file().format().name(), task.file().path()));
diff --git a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java 
b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
index e41c41d..6736730 100644
--- a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
+++ b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java
@@ -46,11 +46,13 @@ import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.Tables;
 import org.apache.iceberg.avro.Avro;
 import org.apache.iceberg.data.avro.DataWriter;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
 import org.apache.iceberg.data.parquet.GenericParquetWriter;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.hadoop.HadoopInputFile;
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.orc.ORC;
 import org.apache.iceberg.parquet.Parquet;
 import org.apache.iceberg.types.Types;
 import org.junit.Assert;
@@ -87,6 +89,7 @@ public class TestLocalScan {
   public static Object[][] parameters() {
     return new Object[][] {
         new Object[] { "parquet" },
+        new Object[] { "orc" },
         new Object[] { "avro" }
     };
   }
@@ -393,7 +396,7 @@ public class TestLocalScan {
     Preconditions.checkNotNull(fileFormat, "Cannot determine format for file: 
%s", filename);
     switch (fileFormat) {
       case AVRO:
-        FileAppender avroAppender = Avro.write(fromPath(path, CONF))
+        FileAppender<Record> avroAppender = Avro.write(fromPath(path, CONF))
             .schema(SCHEMA)
             .createWriterFunc(DataWriter::create)
             .named(fileFormat.name())
@@ -410,20 +413,36 @@ public class TestLocalScan {
             .build();
 
       case PARQUET:
-        FileAppender<Record> orcAppender = Parquet.write(fromPath(path, CONF))
+        FileAppender<Record> parquetAppender = Parquet.write(fromPath(path, 
CONF))
             .schema(SCHEMA)
             .createWriterFunc(GenericParquetWriter::buildWriter)
             .build();
         try {
-          orcAppender.addAll(records);
+          parquetAppender.addAll(records);
         } finally {
-          orcAppender.close();
+          parquetAppender.close();
         }
 
         return DataFiles.builder(PartitionSpec.unpartitioned())
             .withInputFile(HadoopInputFile.fromPath(path, CONF))
-            .withMetrics(orcAppender.metrics())
+            .withMetrics(parquetAppender.metrics())
+            .build();
+
+      case ORC:
+        FileAppender<Record> orcAppender = ORC.write(fromPath(path, CONF))
+            .schema(SCHEMA)
+            .createWriterFunc(GenericOrcWriter::buildWriter)
             .build();
+        try {
+          orcAppender.addAll(records);
+        } finally {
+          orcAppender.close();
+        }
+
+        return DataFiles.builder(PartitionSpec.unpartitioned())
+                .withInputFile(HadoopInputFile.fromPath(path, CONF))
+                .withMetrics(orcAppender.metrics())
+                .build();
 
       default:
         throw new UnsupportedOperationException("Cannot write format: " + 
fileFormat);

Reply via email to