[ 
https://issues.apache.org/jira/browse/ORC-363?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16677014#comment-16677014
 ] 

ASF GitHub Bot commented on ORC-363:
------------------------------------

xndai closed pull request #268: ORC-363 Enable zstd decompression in ORC Java 
reader
URL: https://github.com/apache/orc/pull/268
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/java/core/src/java/org/apache/orc/CompressionKind.java 
b/java/core/src/java/org/apache/orc/CompressionKind.java
index 3cffe57ee9..4a1cd5c883 100644
--- a/java/core/src/java/org/apache/orc/CompressionKind.java
+++ b/java/core/src/java/org/apache/orc/CompressionKind.java
@@ -23,5 +23,5 @@
  * can be applied to ORC files.
  */
 public enum CompressionKind {
-  NONE, ZLIB, SNAPPY, LZO, LZ4
+  NONE, ZLIB, SNAPPY, LZO, LZ4, ZSTD
 }
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java 
b/java/core/src/java/org/apache/orc/OrcFile.java
index b07355a970..6edb1e7c8d 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -423,6 +423,11 @@ protected WriterOptions(Properties tableProperties, 
Configuration conf) {
       compressValue =
           CompressionKind.valueOf(OrcConf.COMPRESS.getString(tableProperties,
               conf).toUpperCase());
+      // Zstd compression is not supported currently
+      // Will enable this once it's released through aircompressor lib
+      if (compressValue == CompressionKind.ZSTD) {
+          throw new IllegalArgumentException("Zstd compressor is not 
supported.");
+      }
       enforceBufferSize = 
OrcConf.ENFORCE_COMPRESSION_BUFFER_SIZE.getBoolean(tableProperties, conf);
       String versionName = OrcConf.WRITE_FORMAT.getString(tableProperties,
           conf);
@@ -581,6 +586,11 @@ public WriterOptions bloomFilterFpp(double fpp) {
      * Sets the generic compression that is used to compress the data.
      */
     public WriterOptions compress(CompressionKind value) {
+      // Zstd compression is not supported currently
+      // Will enable this once it's released through aircompressor lib
+      if (value == CompressionKind.ZSTD) {
+          throw new IllegalArgumentException("Zstd compressor is not 
supported.");
+      }
       compressValue = value;
       return this;
     }
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java 
b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index bba580faee..2517daaaba 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -443,6 +443,7 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) 
throws IOException {
       case SNAPPY:
       case LZO:
       case LZ4:
+      case ZSTD:
         break;
       default:
         throw new IllegalArgumentException("Unknown compression");
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java 
b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index d6239f2f36..f1e530f8e3 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -31,6 +31,7 @@
 import io.airlift.compress.lz4.Lz4Decompressor;
 import io.airlift.compress.lzo.LzoCompressor;
 import io.airlift.compress.lzo.LzoDecompressor;
+import io.airlift.compress.zstd.ZstdDecompressor;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.CompressionCodec;
 import org.apache.orc.CompressionKind;
@@ -241,6 +242,10 @@ public static CompressionCodec createCodec(CompressionKind 
kind) {
       case LZ4:
         return new AircompressorCodec(new Lz4Compressor(),
             new Lz4Decompressor());
+      case ZSTD:
+        // Zstd compressor is not availiable currently
+        // Will add it back after it's released
+        return new AircompressorCodec(null, new ZstdDecompressor());
       default:
         throw new IllegalArgumentException("Unknown compression codec: " +
             kind);
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java 
b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index 658c1cea71..67d01d141a 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -371,6 +371,49 @@ public void testReadFormat_0_11() throws Exception {
     rows.close();
   }
 
+  @Test
+  public void testReadZstd() throws Exception {
+    Path filePath =
+        new Path(getFileFromClasspath("orc-file-zstd.orc"));
+    Reader reader = OrcFile.createReader(filePath,
+        OrcFile.readerOptions(conf).filesystem(fs));
+
+    int stripeCount = 0;
+    int rowCount = 0;
+    long currentOffset = -1;
+    for(StripeInformation stripe : reader.getStripes()) {
+      stripeCount += 1;
+      rowCount += stripe.getNumberOfRows();
+    }
+    Assert.assertEquals(reader.getNumberOfRows(), rowCount);
+    assertEquals(1, stripeCount);
+
+    // check schema and read file contents
+    TypeDescription schema = reader.getSchema();
+    assertEquals(TypeDescription.Category.STRUCT, schema.getCategory());
+    assertEquals(
+                 "struct<c1:bigint,c2:string,c3:bigint,c4:string>",
+                 schema.toString());
+    VectorizedRowBatch batch = schema.createRowBatch();
+    RecordReader rows = reader.rows();
+    Assert.assertEquals(true, rows.nextBatch(batch));
+    assertEquals(25, batch.size);
+
+    // check the contents of the first row
+    assertEquals(0, ((LongColumnVector)batch.cols[0]).vector[0]);
+    assertEquals(
+                 "ALGERIA",
+                 getText((BytesColumnVector)batch.cols[1], 0).toString());
+    assertEquals(0, ((LongColumnVector)batch.cols[2]).vector[0]);
+    assertEquals(
+                 " haggle. carefully final deposits detect slyly agai",
+                 getText((BytesColumnVector)batch.cols[3], 0).toString());
+
+    // handle the close up
+    Assert.assertEquals(false, rows.nextBatch(batch));
+    rows.close();
+  }
+
   @Test
   public void testTimestamp() throws Exception {
     TypeDescription schema = TypeDescription.createTimestamp();
diff --git a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java 
b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
index 343ce5fd91..de8721990b 100644
--- a/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestWriterImpl.java
@@ -23,6 +23,7 @@
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.orc.CompressionKind;
 import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile;
 import org.apache.orc.TypeDescription;
@@ -70,4 +71,16 @@ public void testOverriddenOverwriteFlagForWriter() throws 
Exception {
     Writer w = OrcFile.createWriter(testFilePath, 
OrcFile.writerOptions(conf).setSchema(schema));
     w.close();
   }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testZstdConfThrowsException() throws Exception {
+    conf.set(OrcConf.COMPRESS.getAttribute(), "ZSTD");
+    Writer w = OrcFile.createWriter(testFilePath, 
OrcFile.writerOptions(conf).setSchema(schema));
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testZstdWriterOptionThrowsException() throws Exception {
+    OrcFile.WriterOptions options = OrcFile.writerOptions(conf);
+    options.compress(CompressionKind.ZSTD);
+  }
 }
diff --git a/java/core/src/test/resources/orc-file-zstd.orc 
b/java/core/src/test/resources/orc-file-zstd.orc
new file mode 100644
index 0000000000..daca5d45ec
Binary files /dev/null and b/java/core/src/test/resources/orc-file-zstd.orc 
differ
diff --git a/java/pom.xml b/java/pom.xml
index 67e20f84a8..470c16a800 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -415,7 +415,7 @@
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.10</version>
+        <version>0.11</version>
         <exclusions>
           <exclusion>
            <groupId>io.airlift</groupId>


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Enable zstd decompression in ORC Java reader
> --------------------------------------------
>
>                 Key: ORC-363
>                 URL: https://issues.apache.org/jira/browse/ORC-363
>             Project: ORC
>          Issue Type: Bug
>            Reporter: Xiening Dai
>            Assignee: Xiening Dai
>            Priority: Major
>
> Update to aircompress lib 0.11 and enable zstd decompression.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to