This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git


The following commit(s) were added to refs/heads/develop by this push:
     new fa3e01f1 fix(cpp): aligned VECTOR row-offset skip only when time/value 
counts match (#778)
fa3e01f1 is described below

commit fa3e01f1895dd2d498a63c552f50bf27eb85b797
Author: Hongzhi Gao <[email protected]>
AuthorDate: Sun Apr 12 15:57:11 2026 +0800

    fix(cpp): aligned VECTOR row-offset skip only when time/value counts match 
(#778)
    
    Whole-chunk and whole-page skips by statistic count previously used only the
    value side for aligned series, which could desynchronize row_offset from
    decoded rows when ChunkMeta or page header counts differed.
    
    Require both time and value statistics to be present, positive, and equal
    before applying count-based skip; otherwise decode and rely on page/row
    handling.
    
    Made-with: Cursor
---
 cpp/src/reader/aligned_chunk_reader.cc        | 18 ++++++++++++------
 cpp/src/reader/tsfile_series_scan_iterator.cc | 27 ++++++++++++++++++++++++++-
 cpp/src/reader/tsfile_series_scan_iterator.h  |  8 ++++++++
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/cpp/src/reader/aligned_chunk_reader.cc 
b/cpp/src/reader/aligned_chunk_reader.cc
index 955715d4..d79bc781 100644
--- a/cpp/src/reader/aligned_chunk_reader.cc
+++ b/cpp/src/reader/aligned_chunk_reader.cc
@@ -740,15 +740,21 @@ bool AlignedChunkReader::should_skip_page_by_offset(int& 
row_offset) {
     if (row_offset <= 0) {
         return false;
     }
-    // Use time page statistic for count.
-    Statistic* stat = cur_time_page_header_.statistic_;
-    if (stat == nullptr) {
-        stat = cur_value_page_header_.statistic_;
+    // Aligned TV pages: only skip a whole page by count when both page headers
+    // expose the same positive row count. Using a single side (or min) when
+    // the other is missing or unequal can desynchronize row_offset from
+    // decoded row order vs. the paired time/value stream.
+    Statistic* ts = cur_time_page_header_.statistic_;
+    Statistic* vs = cur_value_page_header_.statistic_;
+    if (ts == nullptr || vs == nullptr) {
+        return false;
     }
-    if (stat == nullptr || stat->count_ == 0) {
+    int32_t tc = ts->count_;
+    int32_t vc = vs->count_;
+    if (tc <= 0 || vc <= 0 || tc != vc) {
         return false;
     }
-    int32_t count = stat->count_;
+    int32_t count = tc;
     if (row_offset >= count) {
         row_offset -= count;
         return true;
diff --git a/cpp/src/reader/tsfile_series_scan_iterator.cc 
b/cpp/src/reader/tsfile_series_scan_iterator.cc
index c363d0a4..5add1e9a 100644
--- a/cpp/src/reader/tsfile_series_scan_iterator.cc
+++ b/cpp/src/reader/tsfile_series_scan_iterator.cc
@@ -60,6 +60,30 @@ bool 
TsFileSeriesScanIterator::should_skip_chunk_by_offset(ChunkMeta* cm) {
     return false;
 }
 
+bool TsFileSeriesScanIterator::should_skip_aligned_chunk_by_offset(
+    ChunkMeta* time_cm, ChunkMeta* value_cm) {
+    if (row_offset_ <= 0) {
+        return false;
+    }
+    if (time_cm->statistic_ == nullptr || value_cm->statistic_ == nullptr) {
+        return false;
+    }
+    int32_t tc = time_cm->statistic_->count_;
+    int32_t vc = value_cm->statistic_->count_;
+    if (tc <= 0 || vc <= 0) {
+        return false;
+    }
+    if (tc != vc) {
+        return false;
+    }
+    int32_t count = tc;
+    if (row_offset_ >= count) {
+        row_offset_ -= count;
+        return true;
+    }
+    return false;
+}
+
 int TsFileSeriesScanIterator::get_next(TsBlock*& ret_tsblock, bool alloc,
                                        Filter* oneshoot_filter,
                                        int64_t min_time_hint) {
@@ -106,7 +130,8 @@ int TsFileSeriesScanIterator::get_next(TsBlock*& 
ret_tsblock, bool alloc,
                                                       min_time_hint)) {
                             continue;
                         }
-                        if (should_skip_chunk_by_offset(value_cm)) {
+                        if (should_skip_aligned_chunk_by_offset(time_cm,
+                                                                value_cm)) {
                             continue;
                         }
                         chunk_reader_->reset();
diff --git a/cpp/src/reader/tsfile_series_scan_iterator.h 
b/cpp/src/reader/tsfile_series_scan_iterator.h
index 06b35ba1..9e790a3d 100644
--- a/cpp/src/reader/tsfile_series_scan_iterator.h
+++ b/cpp/src/reader/tsfile_series_scan_iterator.h
@@ -119,6 +119,14 @@ class TsFileSeriesScanIterator {
     }
     bool should_skip_chunk_by_time(ChunkMeta* cm, int64_t min_time_hint);
     bool should_skip_chunk_by_offset(ChunkMeta* cm);
+    /**
+     * Aligned (VECTOR): whole-chunk skip by row count is only safe when the
+     * time ChunkMeta and value ChunkMeta agree on statistic count (>0). If
+     * either side lacks count or counts differ, skip is disabled for this
+     * chunk; pages are loaded and page/row-level offset handling applies.
+     */
+    bool should_skip_aligned_chunk_by_offset(ChunkMeta* time_cm,
+                                             ChunkMeta* value_cm);
     common::TsBlock* alloc_tsblock();
 
    private:

Reply via email to