This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new dcda37e79 ORC-1711: [C++] Fix recordPosition with unexpected value
dcda37e79 is described below

commit dcda37e79ec4a37dfd8dd53bbf6302f22a3e7761
Author: luffy-zh <[email protected]>
AuthorDate: Fri Oct 11 23:16:13 2024 +0800

    ORC-1711: [C++] Fix recordPosition with unexpected value
    
    ### What changes were proposed in this pull request?
    Make sure the recorded position in blockCompressStream is correct.
    
    ### Why are the changes needed?
    Fix the issue discussed [here](https://github.com/apache/orc/issues/2037).
    
    ### How was this patch tested?
    UTs in testwriter.cc can cover this patch.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #2038 from luffy-zh/ORC-1711.
    
    Lead-authored-by: luffy-zh <[email protected]>
    Co-authored-by: Gang Wu <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 c++/src/Compression.cc |  2 +-
 c++/test/TestWriter.cc | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/c++/src/Compression.cc b/c++/src/Compression.cc
index 535018dcb..b5ca5a4c9 100644
--- a/c++/src/Compression.cc
+++ b/c++/src/Compression.cc
@@ -954,7 +954,7 @@ namespace orc {
     virtual uint64_t flush() override;
     virtual std::string getName() const override = 0;
     uint64_t getRawInputBufferSize() const override {
-      return rawInputBuffer.size();
+      return bufferSize;
     }
 
     virtual void finishStream() override;
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index 5bcb0cdff..8bc4032a5 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -416,11 +416,11 @@ namespace orc {
     uint64_t memoryBlockSize = 64;
 
     std::unique_ptr<Writer> writer =
-        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, 
CompressionKind_ZLIB, *type,
-                     pool, &memStream, fileVersion);
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, 
CompressionKind_ZSTD, *type,
+                     pool, &memStream, fileVersion, 1024, "GMT", true);
     std::unique_ptr<ColumnVectorBatch> batch = 
writer->createRowBatch(rowCount);
     StructVectorBatch* structBatch = 
dynamic_cast<StructVectorBatch*>(batch.get());
-    LongVectorBatch* byteBatch = 
dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+    ByteVectorBatch* byteBatch = 
dynamic_cast<ByteVectorBatch*>(structBatch->fields[0]);
 
     int64_t sum = 0;
     for (uint64_t i = 0; i < rowCount; ++i) {
@@ -440,12 +440,13 @@ namespace orc {
     EXPECT_EQ(rowCount, reader->getNumberOfRows());
 
     batch = rowReader->createRowBatch(rowCount);
+    rowReader->seekToRow(20);
     EXPECT_EQ(true, rowReader->next(*batch));
 
     structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
-    byteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
-    for (uint64_t i = 0; i < rowCount; ++i) {
-      EXPECT_EQ(static_cast<int8_t>(i), 
static_cast<int8_t>(byteBatch->data[i]));
+    auto outByteBatch = dynamic_cast<LongVectorBatch*>(structBatch->fields[0]);
+    for (uint64_t i = 0; i < rowCount - 20; ++i) {
+      EXPECT_EQ(static_cast<int8_t>(i + 20), 
static_cast<int8_t>(outByteBatch->data[i]));
     }
 
     auto col_stats = reader->getColumnStatistics(1);

Reply via email to