This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e0fee1edb ORC-1834: [C++] Fix undefined behavior
e0fee1edb is described below

commit e0fee1edb34abcd6849f5ae57800580b3a8c8674
Author: Yuriy Chernyshov <[email protected]>
AuthorDate: Thu Jan 16 15:55:45 2025 +0800

    ORC-1834: [C++] Fix undefined behavior
    
    ### What changes were proposed in this pull request?
    Unaligned reads are UB in C++, memcpy-ing zero bytes is UB either.
    
    ### How was this patch tested?
    Internal UBsan report was used to detect and fix this bug.
    
    Closes #2112 from georgthegreat/patch-3.
    
    Authored-by: Yuriy Chernyshov <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
    (cherry picked from commit ab084b507a70d4da16ef1dc7cfd0fec186083761)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/src/ColumnReader.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index e70f916ff..af434c37c 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -395,7 +395,7 @@ namespace orc {
       int64_t bits = 0;
       if (bufferEnd_ - bufferPointer_ >= 8) {
         if (isLittleEndian) {
-          bits = *(reinterpret_cast<const int64_t*>(bufferPointer_));
+          memcpy(&bits, bufferPointer_, sizeof(bits));
         } else {
           bits = static_cast<int64_t>(static_cast<unsigned 
char>(bufferPointer_[0]));
           bits |= static_cast<int64_t>(static_cast<unsigned 
char>(bufferPointer_[1])) << 8;
@@ -509,8 +509,10 @@ namespace orc {
           bufferNum = std::min(numValues,
                                static_cast<size_t>(bufferEnd_ - 
bufferPointer_) / bytesPerValue_);
           uint64_t bufferBytes = bufferNum * bytesPerValue_;
-          memcpy(outArray, bufferPointer_, bufferBytes);
-          bufferPointer_ += bufferBytes;
+          if (bufferBytes > 0) {
+            memcpy(outArray, bufferPointer_, bufferBytes);
+            bufferPointer_ += bufferBytes;
+          }
         }
         for (size_t i = bufferNum; i < numValues; ++i) {
           outArray[i] = readDouble<ValueType>();

Reply via email to