This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.8 by this push:
new 3c20534a0 ORC-1602: [C++] limit compression block size
3c20534a0 is described below
commit 3c20534a0b328c2e31cec64cfd58385e0a6106be
Author: ffacs <[email protected]>
AuthorDate: Sat Feb 3 01:15:03 2024 -0800
ORC-1602: [C++] limit compression block size
### What changes were proposed in this pull request?
limit compression block size on c++ side.
### Why are the changes needed?
to fix https://github.com/apache/orc/issues/1727
### How was this patch tested?
UT passed
### Was this patch authored or co-authored using generative AI tooling?
NO
Closes #1779 from ffacs/branch-1.8.
Authored-by: ffacs <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
c++/include/orc/Writer.hh | 2 ++
c++/src/Writer.cc | 3 +++
c++/test/TestWriter.cc | 12 +++++++++++-
3 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh
index 78b0b97d2..a4589595a 100644
--- a/c++/include/orc/Writer.hh
+++ b/c++/include/orc/Writer.hh
@@ -73,6 +73,8 @@ namespace orc {
/**
* Set the data compression block size.
+ * Should less then 1 << 23 bytes (8M) which is limited by the
+ * 3 bytes size of compression block header (1 bit for isOriginal and 23
bits for length)
*/
WriterOptions& setCompressionBlockSize(uint64_t size);
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 05adb4744..39f9e0f11 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -110,6 +110,9 @@ namespace orc {
}
WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+ if (size >= (1 << 23)) {
+ throw std::invalid_argument("Compression block size cannot be greater or
equal than 8M");
+ }
privateBits->compressionBlockSize = size;
return *this;
}
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index 3554c9047..42336278a 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -2122,5 +2122,15 @@ namespace orc {
}
}
+ TEST_P(WriterTest, testValidateOptions) {
+ WriterOptions options;
+ constexpr uint64_t compressionBlockSizeThreshold = (1 << 23) - 1;
+
EXPECT_NO_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold));
+ EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold
+ 1),
+ std::invalid_argument);
+ EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold
+ 2),
+ std::invalid_argument);
+ }
+
INSTANTIATE_TEST_CASE_P(OrcTest, WriterTest, Values(FileVersion::v_0_11(),
FileVersion::v_0_12(), FileVersion::UNSTABLE_PRE_2_0()));
-}
+} // namespace orc