This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new f8b15aaa0 ORC-1602: [C++] limit compression block size
f8b15aaa0 is described below

commit f8b15aaa0664925c862705a7b55cb451d651cb1c
Author: ffacs <[email protected]>
AuthorDate: Wed Jan 31 08:52:32 2024 -0800

    ORC-1602: [C++] limit compression block size
    
    ### What changes were proposed in this pull request?
    limit compression block size on c++ side.
    
    ### Why are the changes needed?
    to fix https://github.com/apache/orc/issues/1727
    
    ### How was this patch tested?
    UT passed
    
    ### Was this patch authored or co-authored using generative AI tooling?
    NO
    
    Closes #1774 from ffacs/ORC-1602.
    
    Authored-by: ffacs <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Writer.hh |  2 ++
 c++/src/Writer.cc         |  3 +++
 c++/test/TestWriter.cc    | 10 ++++++++++
 3 files changed, 15 insertions(+)

diff --git a/c++/include/orc/Writer.hh b/c++/include/orc/Writer.hh
index d1f7b4d18..047ee9ffc 100644
--- a/c++/include/orc/Writer.hh
+++ b/c++/include/orc/Writer.hh
@@ -77,6 +77,8 @@ namespace orc {
 
     /**
      * Set the data compression block size.
+     * Should less then 1 << 23 bytes (8M) which is limited by the
+     * 3 bytes size of compression block header (1 bit for isOriginal and 23 
bits for length)
      */
     WriterOptions& setCompressionBlockSize(uint64_t size);
 
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index f485e78d9..89eb3781c 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -114,6 +114,9 @@ namespace orc {
   }
 
   WriterOptions& WriterOptions::setCompressionBlockSize(uint64_t size) {
+    if (size >= (1 << 23)) {
+      throw std::invalid_argument("Compression block size cannot be greater or 
equal than 8M");
+    }
     privateBits->compressionBlockSize = size;
     return *this;
   }
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index dcc7f49a0..d160f82ff 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -2191,6 +2191,16 @@ namespace orc {
     }
   }
 
+  TEST_P(WriterTest, testValidateOptions) {
+    WriterOptions options;
+    constexpr uint64_t compressionBlockSizeThreshold = (1 << 23) - 1;
+    
EXPECT_NO_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold));
+    EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold 
+ 1),
+                 std::invalid_argument);
+    EXPECT_THROW(options.setCompressionBlockSize(compressionBlockSizeThreshold 
+ 2),
+                 std::invalid_argument);
+  }
+
   INSTANTIATE_TEST_SUITE_P(OrcTest, WriterTest,
                            Values(FileVersion::v_0_11(), FileVersion::v_0_12(),
                                   FileVersion::UNSTABLE_PRE_2_0()));

Reply via email to