This is an automated email from the ASF dual-hosted git repository.

william pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new 725fbc513 ORC-1961: Support `orc.compression.zstd.strategy`
725fbc513 is described below

commit 725fbc5133601a5433ec3901b6e4682d14244009
Author: Dongjoon Hyun <dongj...@apache.org>
AuthorDate: Wed Jul 23 23:42:12 2025 -0700

    ORC-1961: Support `orc.compression.zstd.strategy`
    
    ### What changes were proposed in this pull request?
    
    This PR aims to support `orc.compression.zstd.strategy`.
    
    ### Why are the changes needed?
    
    To allow a user to choose a proper strategy based on their data.
    
    https://facebook.github.io/zstd/zstd_manual.html#Chapter5
    
    ```
    typedef enum { ZSTD_fast=1,
                   ZSTD_dfast=2,
                   ZSTD_greedy=3,
                   ZSTD_lazy=4,
                   ZSTD_lazy2=5,
                   ZSTD_btlazy2=6,
                   ZSTD_btopt=7,
                   ZSTD_btultra=8,
                   ZSTD_btultra2=9
                   /* note : new strategies _might_ be added in the future.
                             Only the order (from fast to strong) is guaranteed 
*/
    } ZSTD_strategy;
    ```
    
    ### How was this patch tested?
    
    Pass the CIs.
    
    ```
    $ cd java
    $ mvn package -DskipTests -Pbenchmark
    $ cd bench
    
    $ time java -Dorc.compression.zstd.strategy=1 -jar 
core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc
    ...
    54.51s user 1.28s system 103% cpu 53.984 total
    
    $ time java -Dorc.compression.zstd.strategy=9 -jar 
core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc
    ...
    148.21s user 1.75s system 101% cpu 2:28.13 total
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #2338 from dongjoon-hyun/ORC-1961.
    
    Authored-by: Dongjoon Hyun <dongj...@apache.org>
    Signed-off-by: William Hyun <will...@apache.org>
---
 java/core/src/java/org/apache/orc/OrcConf.java     |  4 ++++
 java/core/src/java/org/apache/orc/OrcFile.java     | 11 ++++++++++
 .../java/org/apache/orc/impl/PhysicalFsWriter.java |  1 +
 .../src/java/org/apache/orc/impl/ZstdCodec.java    | 24 ++++++++++++++++------
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java 
b/java/core/src/java/org/apache/orc/OrcConf.java
index 6516517ba..26d1b7881 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -80,6 +80,10 @@ public enum OrcConf {
       "hive.exec.orc.compression.zstd.windowlog", 0,
       "Set the maximum allowed back-reference distance for "
           + "ZStandard codec, expressed as power of 2."),
+  COMPRESSION_ZSTD_STRATEGY("orc.compression.zstd.strategy",
+      "hive.exec.orc.compression.zstd.strategy", 0,
+      "Define the compression strategy to use with ZStandard codec "
+          + "while writing data. The valid range is 0~9."),
   BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance",
       "hive.exec.orc.block.padding.tolerance", 0.05,
       "Define the tolerance for block padding as a decimal fraction of\n" +
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java 
b/java/core/src/java/org/apache/orc/OrcFile.java
index 278c0813e..160aaf1f9 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -429,6 +429,7 @@ public class OrcFile {
   public static class ZstdCompressOptions {
     private int compressionZstdLevel;
     private int compressionZstdWindowLog;
+    private int compressionZstdStrategy;
 
     public int getCompressionZstdLevel() {
       return compressionZstdLevel;
@@ -445,6 +446,14 @@ public class OrcFile {
     public void setCompressionZstdWindowLog(int compressionZstdWindowLog) {
       this.compressionZstdWindowLog = compressionZstdWindowLog;
     }
+
+    public int getCompressionZstdStrategy() {
+      return compressionZstdStrategy;
+    }
+
+    public void setCompressionZstdStrategy(int compressionZstdStrategy) {
+      this.compressionZstdStrategy = compressionZstdStrategy;
+    }
   }
 
   /**
@@ -520,6 +529,8 @@ public class OrcFile {
               OrcConf.COMPRESSION_ZSTD_LEVEL.getInt(tableProperties, conf));
       zstdCompressOptions.setCompressionZstdWindowLog(
               OrcConf.COMPRESSION_ZSTD_WINDOWLOG.getInt(tableProperties, 
conf));
+      zstdCompressOptions.setCompressionZstdStrategy(
+          OrcConf.COMPRESSION_ZSTD_STRATEGY.getInt(tableProperties, conf));
 
       paddingTolerance =
           OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf);
diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java 
b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
index 87f777a7e..d6fb296bd 100644
--- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
+++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java
@@ -121,6 +121,7 @@ public class PhysicalFsWriter implements PhysicalWriter {
         if (zstdCompressOptions != null) {
           options.setLevel(zstdCompressOptions.getCompressionZstdLevel());
           
options.setWindowLog(zstdCompressOptions.getCompressionZstdWindowLog());
+          
options.setStrategy(zstdCompressOptions.getCompressionZstdStrategy());
         }
       }
       compress.withCodec(codec, tempOptions);
diff --git a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java 
b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
index d352c860f..186e5696f 100644
--- a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
+++ b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java
@@ -29,12 +29,12 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
   private ZstdOptions zstdOptions = null;
   private ZstdCompressCtx zstdCompressCtx = null;
 
-  public ZstdCodec(int level, int windowLog) {
-    this.zstdOptions = new ZstdOptions(level, windowLog);
+  public ZstdCodec(int level, int windowLog, int strategy) {
+    this.zstdOptions = new ZstdOptions(level, windowLog, strategy);
   }
 
   public ZstdCodec() {
-    this(3, 0);
+    this(3, 0, 0);
   }
 
   public ZstdOptions getZstdOptions() {
@@ -57,15 +57,17 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
   static class ZstdOptions implements Options {
     private int level;
     private int windowLog;
+    private int strategy;
 
-    ZstdOptions(int level, int windowLog) {
+    ZstdOptions(int level, int windowLog, int strategy) {
       this.level = level;
       this.windowLog = windowLog;
+      this.strategy = strategy;
     }
 
     @Override
     public ZstdOptions copy() {
-      return new ZstdOptions(level, windowLog);
+      return new ZstdOptions(level, windowLog, strategy);
     }
 
     @Override
@@ -123,6 +125,13 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
       return this;
     }
 
+    public ZstdOptions setStrategy(int newValue) {
+      // https://facebook.github.io/zstd/zstd_manual.html#Chapter5
+      // Although the value is between 1 and 9 and 0 means `use default`, ZStd 
can change it.
+      strategy = newValue;
+      return this;
+    }
+
     @Override
     public ZstdOptions setData(DataKind newValue) {
       return this; // We don't support setting DataKind in ZstdCodec.
@@ -136,6 +145,7 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
       ZstdOptions that = (ZstdOptions) o;
 
       if (level != that.level) return false;
+      if (strategy != that.strategy) return false;
       return windowLog == that.windowLog;
     }
 
@@ -143,12 +153,13 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
     public int hashCode() {
       int result = level;
       result = 31 * result + windowLog;
+      result = 31 * result + strategy;
       return result;
     }
   }
 
   private static final ZstdOptions DEFAULT_OPTIONS =
-      new ZstdOptions(3, 0);
+      new ZstdOptions(3, 0, 0);
 
   @Override
   public Options getDefaultOptions() {
@@ -183,6 +194,7 @@ public class ZstdCodec implements CompressionCodec, 
DirectDecompressionCodec {
     zstdCompressCtx.setLevel(zso.level);
     zstdCompressCtx.setLong(zso.windowLog);
     zstdCompressCtx.setChecksum(false);
+    zstdCompressCtx.setStrategy(zso.strategy);
 
     try {
       byte[] compressed = getBuffer((int) Zstd.compressBound(inBytes));

Reply via email to