This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.9 by this push:
new b1f894b82 ORC-1602: [C++] limit compression block size
b1f894b82 is described below
commit b1f894b826f661b84ec5a9c78287f00554f5f567
Author: ffacs <[email protected]>
AuthorDate: Wed Jan 31 08:52:32 2024 -0800
ORC-1602: [C++] limit compression block size
### What changes were proposed in this pull request?
limit compression block size on c++ side.
### Why are the changes needed?
to fix https://github.com/apache/orc/issues/1727
### How was this patch tested?
UT passed
### Was this patch authored or co-authored using generative AI tooling?
NO
Closes #1774 from ffacs/ORC-1602.
Authored-by: ffacs <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit f8b15aaa0664925c862705a7b55cb451d651cb1c)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
c++/include/orc/Writer.hh | 2 ++
c++/src/Writer.cc | 3 +++
c++/test/TestWriter.cc | 10 ++++++++++
3 files changed, 15 insertions(+)
diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh
index d1f7b4d18..047ee9ffc 100644
--- a/c++/include/orc/Writer.hh
+++ b/c++/include/orc/Writer.hh
@@ -77,6 +77,8 @@ namespace orc {
/**
* Set the data compression block size.
+ * Should less then 1 << 23 bytes (8M) which is limited by the
+ * 3 bytes size of compression block header (1 bit for isOriginal and 23
bits for length)
*/
WriterOptions& setCompressionBlockSize(uint64_t size);
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 84c8a502e..bf8a76011 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -114,6 +114,9 @@ namespace orc {
}
WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+ if (size >= (1 << 23)) {
+ throw std::invalid_argument("Compression block size cannot be greater or
equal than 8M");
+ }
privateBits->compressionBlockSize = size;
return *this;
}
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index dcc7f49a0..d160f82ff 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -2191,6 +2191,16 @@ namespace orc {
}
}
+ TEST_P(WriterTest, testValidateOptions) {
+ WriterOptions options;
+ constexpr uint64_t compressionBlockSizeThreshold = (1 << 23) - 1;
+
EXPECT_NO_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold));
+ EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold
+ 1),
+ std::invalid_argument);
+ EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold
+ 2),
+ std::invalid_argument);
+ }
+
INSTANTIATE_TEST_SUITE_P(OrcTest, WriterTest,
Values(FileVersion::v_0_11(), FileVersion::v_0_12(),
FileVersion::UNSTABLE_PRE_2_0()));