This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new dcda37e79 ORC-1711: [C++] Fix recordPosition with unexpected value
dcda37e79 is described below
commit dcda37e79ec4a37dfd8dd53bbf6302f22a3e7761
Author: luffy-zh <[email protected]>
AuthorDate: Fri Oct 11 23:16:13 2024 +0800
ORC-1711: [C++] Fix recordPosition with unexpected value
### What changes were proposed in this pull request?
Make sure the recorded position in blockCompressStream is correct.
### Why are the changes needed?
Fix the issue discussed [here](https://github.com/apache/orc/issues/2037).
### How was this patch tested?
UTs in testwriter.cc can cover this patch.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #2038 from luffy-zh/ORC-1711.
Lead-authored-by: luffy-zh <[email protected]>
Co-authored-by: Gang Wu <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
c++/src/Compression.cc | 2 +-
c++/test/TestWriter.cc | 13 +++++++------
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc
index 535018dcb..b5ca5a4c9 100644
--- a/c++/src/Compression.cc
+++ b/c++/src/Compression.cc
@@ -954,7 +954,7 @@ namespace orc {
virtual uint64_t flush() override;
virtual std::string getName() const override = 0;
uint64_t getRawInputBufferSize() const override {
- return rawInputBuffer.size();
+ return bufferSize;
}
virtual void finishStream() override;
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index 5bcb0cdff..8bc4032a5 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -416,11 +416,11 @@ namespace orc {
uint64_t memoryBlockSize = 64;
std::unique_ptr<Writer> writer =
- createWriter(stripeSize, memoryBlockSize, compressionBlockSize,
CompressionKind_ZLIB, *type,
- pool, &memStream, fileVersion);
+ createWriter(stripeSize, memoryBlockSize, compressionBlockSize,
CompressionKind_ZSTD, *type,
+ pool, &memStream, fileVersion, 1024, "GMT", true);
std::unique_ptr<ColumnVectorBatch> batch =
writer->createRowBatch(rowCount);
StructVectorBatch* structBatch =
dynamic_cast<StructVectorBatch*>(batch.get());
- LongVectorBatch* byteBatch =
dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+ ByteVectorBatch* byteBatch =
dynamic_cast<ByteVectorBatch*>(structBatch->fields[0]);
int64_t sum = 0;
for (uint64_t i = 0; i < rowCount; ++i) {
@@ -440,12 +440,13 @@ namespace orc {
EXPECT_EQ(rowCount, reader->getNumberOfRows());
batch = rowReader->createRowBatch(rowCount);
+ rowReader->seekToRow(20);
EXPECT_EQ(true, rowReader->next(*batch));
structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
- byteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
- for (uint64_t i = 0; i < rowCount; ++i) {
- EXPECT_EQ(static_cast<int8_t>(i),
static_cast<int8_t>(byteBatch->data[i]));
+ auto outByteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+ for (uint64_t i = 0; i < rowCount - 20; ++i) {
+ EXPECT_EQ(static_cast<int8_t>(i + 20),
static_cast<int8_t>(outByteBatch->data[i]));
}
auto col_stats = reader->getColumnStatistics(1);