This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 0fd495717c fix crc32 failure in bzip2 (#7772)
0fd495717c is described below

commit 0fd495717ce24b6dee1d2e6b6fa1592182daa719
Author: 李扬 <[email protected]>
AuthorDate: Fri Nov 1 17:53:27 2024 +0800

    fix crc32 failure in bzip2 (#7772)
---
 cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp | 18 ++++++++++++------
 cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h   |  4 +++-
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp 
b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
index 779e79416f..ba36baaf4c 100644
--- a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
+++ b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
@@ -192,7 +192,7 @@ SplittableBzip2ReadBuffer::SplittableBzip2ReadBuffer(
     changeStateToProcessABlock();
     LOG_DEBUG(
         getLogger("SplittableBzip2ReadBuffer"),
-        "adjusted_start: {} first_block_need_special_process: {} 
last_block_need_special_process: {}",
+        "adjusted_start:{} first_block_need_special_process:{} 
last_block_need_special_process:{}",
         *adjusted_start,
         first_block_need_special_process,
         last_block_need_special_process);
@@ -217,8 +217,6 @@ Int32 SplittableBzip2ReadBuffer::read(char * dest, size_t 
dest_size, size_t offs
         result = b;
         skipResult = 
skipToNextMarker(SplittableBzip2ReadBuffer::BLOCK_DELIMITER, 
DELIMITER_BIT_LENGTH);
 
-        // auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
-        // std::cout << "skipResult:" << skipResult << " position:" << 
seekable->getPosition() << " b:" << b << std::endl;
         changeStateToProcessABlock();
     }
     return result;
@@ -413,7 +411,13 @@ bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64 
marker, Int32 markerBitLe
 
 void SplittableBzip2ReadBuffer::reportCRCError()
 {
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "CRC error");
+    auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
+    throw Exception(
+        ErrorCodes::LOGICAL_ERROR,
+        "CRC error in position:{} computedBlockCRC:{} storedBlockCRC:{}",
+        seekable->getPosition(),
+        computedBlockCRC,
+        storedBlockCRC);
 }
 
 void SplittableBzip2ReadBuffer::makeMaps()
@@ -440,6 +444,8 @@ void SplittableBzip2ReadBuffer::changeStateToProcessABlock()
 
 void SplittableBzip2ReadBuffer::initBlock()
 {
+    auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
+    size_t position = seekable->getPosition();
     storedBlockCRC = bsGetInt();
     blockRandomised = (bsR(1) == 1);
 
@@ -914,7 +920,7 @@ void SplittableBzip2ReadBuffer::setupRandPartB()
     }
     else if (++su_count >= 4)
     {
-        su_z = static_cast<char>(data->ll8[su_tPos] & 0xff);
+        su_z = data->ll8[su_tPos] & 0xff;
         su_tPos = data->tt[su_tPos];
         if (su_rNToGo == 0)
         {
@@ -965,7 +971,7 @@ void SplittableBzip2ReadBuffer::setupNoRandPartB()
     }
     else if (++su_count >= 4)
     {
-        su_z = static_cast<char>(data->ll8[su_tPos] & 0xff);
+        su_z = data->ll8[su_tPos] & 0xff;
         su_tPos = data->tt[su_tPos];
         su_j2 = 0;
         setupNoRandPartC();
diff --git a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h 
b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
index 93a7ca64df..375a6c8858 100644
--- a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
+++ b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
@@ -23,6 +23,7 @@
 #include <vector>
 #include <IO/CompressedReadBufferWrapper.h>
 #include <base/StringRef.h>
+#include <iostream>
 
 namespace DB
 {
@@ -157,6 +158,7 @@ private:
             if (temp < 0)
                 temp = 256 + temp;
             globalCrc = (globalCrc << 8) ^ 
static_cast<Int32>(crc32Table[temp]);
+            // std::cout << "input:" << inCh << " crc:" << globalCrc << 
std::endl;
         }
         void updateCRC(Int32 inCh, Int32 repeat)
         {
@@ -237,7 +239,7 @@ private:
     Int32 su_rNToGo;
     Int32 su_rTPos;
     Int32 su_tPos;
-    char su_z;
+    UInt16 su_z;
 
     /// SplittableBzip2ReadBuffer will skip bytes before the first block 
header. adjusted_start records file position after skipping.
     /// It is only valid when input stream is seekable and block header could 
be found in input stream.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to