This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 0fd495717c fix crc32 failure in bzip2 (#7772)
0fd495717c is described below
commit 0fd495717ce24b6dee1d2e6b6fa1592182daa719
Author: 李扬 <[email protected]>
AuthorDate: Fri Nov 1 17:53:27 2024 +0800
fix crc32 failure in bzip2 (#7772)
---
cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp | 18 ++++++++++++------
cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h | 4 +++-
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
index 779e79416f..ba36baaf4c 100644
--- a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
+++ b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.cpp
@@ -192,7 +192,7 @@ SplittableBzip2ReadBuffer::SplittableBzip2ReadBuffer(
changeStateToProcessABlock();
LOG_DEBUG(
getLogger("SplittableBzip2ReadBuffer"),
- "adjusted_start: {} first_block_need_special_process: {}
last_block_need_special_process: {}",
+ "adjusted_start:{} first_block_need_special_process:{}
last_block_need_special_process:{}",
*adjusted_start,
first_block_need_special_process,
last_block_need_special_process);
@@ -217,8 +217,6 @@ Int32 SplittableBzip2ReadBuffer::read(char * dest, size_t
dest_size, size_t offs
result = b;
skipResult =
skipToNextMarker(SplittableBzip2ReadBuffer::BLOCK_DELIMITER,
DELIMITER_BIT_LENGTH);
- // auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
- // std::cout << "skipResult:" << skipResult << " position:" <<
seekable->getPosition() << " b:" << b << std::endl;
changeStateToProcessABlock();
}
return result;
@@ -413,7 +411,13 @@ bool SplittableBzip2ReadBuffer::skipToNextMarker(Int64
marker, Int32 markerBitLe
void SplittableBzip2ReadBuffer::reportCRCError()
{
- throw Exception(ErrorCodes::LOGICAL_ERROR, "CRC error");
+ auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
+ throw Exception(
+ ErrorCodes::LOGICAL_ERROR,
+ "CRC error in position:{} computedBlockCRC:{} storedBlockCRC:{}",
+ seekable->getPosition(),
+ computedBlockCRC,
+ storedBlockCRC);
}
void SplittableBzip2ReadBuffer::makeMaps()
@@ -440,6 +444,8 @@ void SplittableBzip2ReadBuffer::changeStateToProcessABlock()
void SplittableBzip2ReadBuffer::initBlock()
{
+ auto * seekable = dynamic_cast<SeekableReadBuffer*>(in.get());
+ size_t position = seekable->getPosition();
storedBlockCRC = bsGetInt();
blockRandomised = (bsR(1) == 1);
@@ -914,7 +920,7 @@ void SplittableBzip2ReadBuffer::setupRandPartB()
}
else if (++su_count >= 4)
{
- su_z = static_cast<char>(data->ll8[su_tPos] & 0xff);
+ su_z = data->ll8[su_tPos] & 0xff;
su_tPos = data->tt[su_tPos];
if (su_rNToGo == 0)
{
@@ -965,7 +971,7 @@ void SplittableBzip2ReadBuffer::setupNoRandPartB()
}
else if (++su_count >= 4)
{
- su_z = static_cast<char>(data->ll8[su_tPos] & 0xff);
+ su_z = data->ll8[su_tPos] & 0xff;
su_tPos = data->tt[su_tPos];
su_j2 = 0;
setupNoRandPartC();
diff --git a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
index 93a7ca64df..375a6c8858 100644
--- a/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
+++ b/cpp-ch/local-engine/IO/SplittableBzip2ReadBuffer.h
@@ -23,6 +23,7 @@
#include <vector>
#include <IO/CompressedReadBufferWrapper.h>
#include <base/StringRef.h>
+#include <iostream>
namespace DB
{
@@ -157,6 +158,7 @@ private:
if (temp < 0)
temp = 256 + temp;
globalCrc = (globalCrc << 8) ^
static_cast<Int32>(crc32Table[temp]);
+ // std::cout << "input:" << inCh << " crc:" << globalCrc <<
std::endl;
}
void updateCRC(Int32 inCh, Int32 repeat)
{
@@ -237,7 +239,7 @@ private:
Int32 su_rNToGo;
Int32 su_rTPos;
Int32 su_tPos;
- char su_z;
+ UInt16 su_z;
/// SplittableBzip2ReadBuffer will skip bytes before the first block
header. adjusted_start records file position after skipping.
/// It is only valid when input stream is seekable and block header could
be found in input stream.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]