This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.8 by this push:
     new 3c20534a0 ORC-1602: [C++] limit compression block size
3c20534a0 is described below

commit 3c20534a0b328c2e31cec64cfd58385e0a6106be
Author: ffacs <[email protected]>
AuthorDate: Sat Feb 3 01:15:03 2024 -0800

    ORC-1602: [C++] limit compression block size
    
    ### What changes were proposed in this pull request?
    limit compression block size on c++ side.
    
    ### Why are the changes needed?
    to fix https://github.com/apache/orc/issues/1727
    
    ### How was this patch tested?
    UT passed
    
    ### Was this patch authored or co-authored using generative AI tooling?
    NO
    
    Closes #1779 from ffacs/branch-1.8.
    
    Authored-by: ffacs <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Writer.hh |  2 ++
 c++/src/Writer.cc         |  3 +++
 c++/test/TestWriter.cc    | 12 +++++++++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh
index 78b0b97d2..a4589595a 100644
--- a/c++/include/orc/Writer.hh
+++ b/c++/include/orc/Writer.hh
@@ -73,6 +73,8 @@ namespace orc {
 
     /**
      * Set the data compression block size.
+     * Should less then 1 << 23 bytes (8M) which is limited by the
+     * 3 bytes size of compression block header (1 bit for isOriginal and 23 
bits for length)
      */
     WriterOptions& setCompressionBlockSize(uint64_t size);
 
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 05adb4744..39f9e0f11 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -110,6 +110,9 @@ namespace orc {
   }
 
   WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+    if (size >= (1 << 23)) {
+      throw std::invalid_argument("Compression block size cannot be greater or 
equal than 8M");
+    }
     privateBits->compressionBlockSize = size;
     return *this;
   }
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index 3554c9047..42336278a 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -2122,5 +2122,15 @@ namespace orc {
     }
   }
 
+  TEST_P(WriterTest, testValidateOptions) {
+    WriterOptions options;
+    constexpr uint64_t compressionBlockSizeThreshold = (1 << 23) - 1;
+    
EXPECT_NO_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold));
+    EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold 
+ 1),
+                 std::invalid_argument);
+    EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold 
+ 2),
+                 std::invalid_argument);
+  }
+
   INSTANTIATE_TEST_CASE_P(OrcTest, WriterTest, Values(FileVersion::v_0_11(), 
FileVersion::v_0_12(), FileVersion::UNSTABLE_PRE_2_0()));
-}
+}  // namespace orc

Reply via email to