This is an automated email from the ASF dual-hosted git repository. william pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push: new 725fbc513 ORC-1961: Support `orc.compression.zstd.strategy` 725fbc513 is described below commit 725fbc5133601a5433ec3901b6e4682d14244009 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Wed Jul 23 23:42:12 2025 -0700 ORC-1961: Support `orc.compression.zstd.strategy` ### What changes were proposed in this pull request? This PR aims to support `orc.compression.zstd.strategy`. ### Why are the changes needed? To allow a user to choose a proper strategy based on their data. https://facebook.github.io/zstd/zstd_manual.html#Chapter5 ``` typedef enum { ZSTD_fast=1, ZSTD_dfast=2, ZSTD_greedy=3, ZSTD_lazy=4, ZSTD_lazy2=5, ZSTD_btlazy2=6, ZSTD_btopt=7, ZSTD_btultra=8, ZSTD_btultra2=9 /* note : new strategies _might_ be added in the future. Only the order (from fast to strong) is guaranteed */ } ZSTD_strategy; ``` ### How was this patch tested? Pass the CIs. ``` $ cd java $ mvn package -DskipTests -Pbenchmark $ cd bench $ time java -Dorc.compression.zstd.strategy=1 -jar core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc ... 54.51s user 1.28s system 103% cpu 53.984 total $ time java -Dorc.compression.zstd.strategy=9 -jar core/target/orc-benchmarks-core-*-uber.jar generate data -d sales -c zstd -f orc ... 148.21s user 1.75s system 101% cpu 2:28.13 total ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #2338 from dongjoon-hyun/ORC-1961. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: William Hyun <will...@apache.org> --- java/core/src/java/org/apache/orc/OrcConf.java | 4 ++++ java/core/src/java/org/apache/orc/OrcFile.java | 11 ++++++++++ .../java/org/apache/orc/impl/PhysicalFsWriter.java | 1 + .../src/java/org/apache/orc/impl/ZstdCodec.java | 24 ++++++++++++++++------ 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java index 6516517ba..26d1b7881 100644 --- a/java/core/src/java/org/apache/orc/OrcConf.java +++ b/java/core/src/java/org/apache/orc/OrcConf.java @@ -80,6 +80,10 @@ public enum OrcConf { "hive.exec.orc.compression.zstd.windowlog", 0, "Set the maximum allowed back-reference distance for " + "ZStandard codec, expressed as power of 2."), + COMPRESSION_ZSTD_STRATEGY("orc.compression.zstd.strategy", + "hive.exec.orc.compression.zstd.strategy", 0, + "Define the compression strategy to use with ZStandard codec " + + "while writing data. The valid range is 0~9."), BLOCK_PADDING_TOLERANCE("orc.block.padding.tolerance", "hive.exec.orc.block.padding.tolerance", 0.05, "Define the tolerance for block padding as a decimal fraction of\n" + diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java index 278c0813e..160aaf1f9 100644 --- a/java/core/src/java/org/apache/orc/OrcFile.java +++ b/java/core/src/java/org/apache/orc/OrcFile.java @@ -429,6 +429,7 @@ public class OrcFile { public static class ZstdCompressOptions { private int compressionZstdLevel; private int compressionZstdWindowLog; + private int compressionZstdStrategy; public int getCompressionZstdLevel() { return compressionZstdLevel; @@ -445,6 +446,14 @@ public class OrcFile { public void setCompressionZstdWindowLog(int compressionZstdWindowLog) { this.compressionZstdWindowLog = compressionZstdWindowLog; } + + public int getCompressionZstdStrategy() { + return compressionZstdStrategy; + } + + public void setCompressionZstdStrategy(int compressionZstdStrategy) { + this.compressionZstdStrategy = compressionZstdStrategy; + } } /** @@ -520,6 +529,8 @@ public class OrcFile { OrcConf.COMPRESSION_ZSTD_LEVEL.getInt(tableProperties, conf)); zstdCompressOptions.setCompressionZstdWindowLog( OrcConf.COMPRESSION_ZSTD_WINDOWLOG.getInt(tableProperties, conf)); + zstdCompressOptions.setCompressionZstdStrategy( + OrcConf.COMPRESSION_ZSTD_STRATEGY.getInt(tableProperties, conf)); paddingTolerance = OrcConf.BLOCK_PADDING_TOLERANCE.getDouble(tableProperties, conf); diff --git a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java index 87f777a7e..d6fb296bd 100644 --- a/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java +++ b/java/core/src/java/org/apache/orc/impl/PhysicalFsWriter.java @@ -121,6 +121,7 @@ public class PhysicalFsWriter implements PhysicalWriter { if (zstdCompressOptions != null) { options.setLevel(zstdCompressOptions.getCompressionZstdLevel()); options.setWindowLog(zstdCompressOptions.getCompressionZstdWindowLog()); + options.setStrategy(zstdCompressOptions.getCompressionZstdStrategy()); } } compress.withCodec(codec, tempOptions); diff --git a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java index d352c860f..186e5696f 100644 --- a/java/core/src/java/org/apache/orc/impl/ZstdCodec.java +++ b/java/core/src/java/org/apache/orc/impl/ZstdCodec.java @@ -29,12 +29,12 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { private ZstdOptions zstdOptions = null; private ZstdCompressCtx zstdCompressCtx = null; - public ZstdCodec(int level, int windowLog) { - this.zstdOptions = new ZstdOptions(level, windowLog); + public ZstdCodec(int level, int windowLog, int strategy) { + this.zstdOptions = new ZstdOptions(level, windowLog, strategy); } public ZstdCodec() { - this(3, 0); + this(3, 0, 0); } public ZstdOptions getZstdOptions() { @@ -57,15 +57,17 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { static class ZstdOptions implements Options { private int level; private int windowLog; + private int strategy; - ZstdOptions(int level, int windowLog) { + ZstdOptions(int level, int windowLog, int strategy) { this.level = level; this.windowLog = windowLog; + this.strategy = strategy; } @Override public ZstdOptions copy() { - return new ZstdOptions(level, windowLog); + return new ZstdOptions(level, windowLog, strategy); } @Override @@ -123,6 +125,13 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { return this; } + public ZstdOptions setStrategy(int newValue) { + // https://facebook.github.io/zstd/zstd_manual.html#Chapter5 + // Although the value is between 1 and 9 and 0 means `use default`, ZStd can change it. + strategy = newValue; + return this; + } + @Override public ZstdOptions setData(DataKind newValue) { return this; // We don't support setting DataKind in ZstdCodec. @@ -136,6 +145,7 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { ZstdOptions that = (ZstdOptions) o; if (level != that.level) return false; + if (strategy != that.strategy) return false; return windowLog == that.windowLog; } @@ -143,12 +153,13 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { public int hashCode() { int result = level; result = 31 * result + windowLog; + result = 31 * result + strategy; return result; } } private static final ZstdOptions DEFAULT_OPTIONS = - new ZstdOptions(3, 0); + new ZstdOptions(3, 0, 0); @Override public Options getDefaultOptions() { @@ -183,6 +194,7 @@ public class ZstdCodec implements CompressionCodec, DirectDecompressionCodec { zstdCompressCtx.setLevel(zso.level); zstdCompressCtx.setLong(zso.windowLog); zstdCompressCtx.setChecksum(false); + zstdCompressCtx.setStrategy(zso.strategy); try { byte[] compressed = getBuffer((int) Zstd.compressBound(inBytes));