This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit b19148f4604a5bc8323db64d703e6031b1a4ccc9
Author: Ritik Raj <[email protected]>
AuthorDate: Wed Jul 9 09:51:59 2025 +0530

    [ASTERIXDB-3601][STO] Fixing Merge failure
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    While bulkloading during merge, we calculate the present
    columns in each of the leaf. But, there can be case where
    a cursor gets closed because all the tuples have been read.
    Closing a range cursor releases the page, hence can be reused.
    
    While Bulkloading, even after the rangeCursor was closed, the
    leaf was being asked for the present set of columns. Since, the
    page has been reused, it contained differnt buffer, which when
    read was giving wrong column details.
    
    Hence, fixing this by calculating the info while reset happens
    with new leaf, which always comes before closing the cursor.
    
    Ext-ref: MB-67570
    Change-Id: I87b3a084d01986dd5c2abd9452a2ad5619fbab15
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20038
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Peeyush Gupta <[email protected]>
---
 .../zero/readers/DefaultColumnPageZeroReader.java  |  5 ++--
 .../zero/readers/SparseColumnPageZeroReader.java   | 17 +++++++++++---
 .../DefaultColumnMultiPageZeroReader.java          |  5 +++-
 .../multipage/SparseColumnMultiPageZeroReader.java | 27 +++++++++++++++-------
 .../am/lsm/btree/column/cloud/ColumnRanges.java    | 14 ++++++-----
 .../buffercache/read/CloudColumnReadContext.java   |  2 +-
 .../btree/column/cloud/sweep/ColumnSweeper.java    |  1 +
 7 files changed, 50 insertions(+), 21 deletions(-)

diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java
index d756a6cb5c..a8d159f0e0 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java
@@ -150,8 +150,9 @@ public class DefaultColumnPageZeroReader implements 
IColumnPageZeroReader {
 
     @Override
     public void getAllColumns(BitSet presentColumns) {
-        int numberOfColumns = numberOfPresentColumns;
-        presentColumns.set(0, numberOfColumns);
+        //Don't ask for pageZeroBuf.getInt(NUMBER_OF_COLUMNS_OFFSET) here, as 
the cursor might have been closed.
+        //and the cached page might have been recycled.
+        presentColumns.set(0, numberOfPresentColumns);
     }
 
     @Override
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java
index 5955d5e750..3fd7a117dc 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java
@@ -29,21 +29,26 @@ import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
 
 public class SparseColumnPageZeroReader extends DefaultColumnPageZeroReader {
     private final Int2IntOpenHashMap columnIndexToRelativeColumnIndex;
+    private final BitSet presentColumnsIndices;
 
     public SparseColumnPageZeroReader() {
         columnIndexToRelativeColumnIndex = new Int2IntOpenHashMap();
+        presentColumnsIndices = new BitSet();
         columnIndexToRelativeColumnIndex.defaultReturnValue(-1);
     }
 
     @Override
     public void reset(ByteBuffer pageZeroBuf, int headerSize) {
         super.reset(pageZeroBuf, headerSize);
+        setPresentColumnsIndices();
         columnIndexToRelativeColumnIndex.clear();
+
     }
 
     @Override
     public void reset(ByteBuffer pageZeroBuf, int numberOfPresentColumns, int 
headerSize) {
         super.reset(pageZeroBuf, numberOfPresentColumns, headerSize);
+        setPresentColumnsIndices();
         columnIndexToRelativeColumnIndex.clear();
     }
 
@@ -126,8 +131,8 @@ public class SparseColumnPageZeroReader extends 
DefaultColumnPageZeroReader {
         return relativeColumnIndex != -1;
     }
 
-    @Override
-    public void getAllColumns(BitSet presentColumns) {
+    private void setPresentColumnsIndices() {
+        presentColumnsIndices.clear();
         if (numberOfPresentColumns == 0) {
             return;
         }
@@ -137,11 +142,17 @@ public class SparseColumnPageZeroReader extends 
DefaultColumnPageZeroReader {
 
         while (columnIndex < limit) {
             int column = pageZeroBuf.getInt(columnIndex);
-            presentColumns.set(column);
+            presentColumnsIndices.set(column);
             columnIndex += SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE;
         }
     }
 
+    @Override
+    public void getAllColumns(BitSet presentColumns) {
+        //Iterate through the present columns indices and set them in the 
BitSet
+        presentColumns.or(presentColumnsIndices);
+    }
+
     @Override
     public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) {
         int columnIndex = getColumnIndex(0);
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java
index d4ffbb42b5..8eaaed2b1a 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java
@@ -53,6 +53,7 @@ public class DefaultColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZer
     private int zerothSegmentMaxColumns;
     private int numberOfPageZeroSegments; // includes the zeroth segment
     private ByteBuffer pageZeroBuf;
+    private int numberOfColumns;
 
     private final VoidPointable offsetPointable;
 
@@ -77,6 +78,7 @@ public class DefaultColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZer
         zerothSegmentReader.reset(pageZeroBuf, 
Math.min(zerothSegmentMaxColumns, getNumberOfPresentColumns()),
                 headerSize);
         numberOfPageZeroSegments = 
pageZeroBuf.getInt(NUMBER_OF_PAGE_ZERO_SEGMENTS_OFFSET);
+        numberOfColumns = pageZeroBuf.getInt(NUMBER_OF_COLUMNS_OFFSET);
     }
 
     @Override
@@ -220,7 +222,8 @@ public class DefaultColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZer
 
     @Override
     public void getAllColumns(BitSet presentColumns) {
-        int numberOfColumns = getNumberOfPresentColumns();
+        //Don't ask for pageZeroBuf.getInt(NUMBER_OF_COLUMNS_OFFSET) here, as 
the cursor might have been closed.
+        //and the cached page might have been recycled.
         presentColumns.set(0, numberOfColumns);
     }
 
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java
index 035db5d6c3..379d81c9d0 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java
@@ -51,6 +51,8 @@ public class SparseColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZero
     private final int maxNumberOfColumnsInAPage;
     private final BitSet pageZeroSegmentsPages;
     private final Int2IntOpenHashMap columnIndexToRelativeColumnIndex;
+    private final VoidPointable offsetPointable;
+    private final BitSet presentColumnsIndices;
 
     private int maxColumnIndexInZerothSegment;
     private int numberOfColumnInZerothSegment;
@@ -58,10 +60,9 @@ public class SparseColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZero
     private int headerSize;
     private ByteBuffer pageZeroBuf;
 
-    private final VoidPointable offsetPointable;
-
     public SparseColumnMultiPageZeroReader(int bufferCapacity) {
         super();
+        presentColumnsIndices = new BitSet();
         zerothSegmentReader = new SparseColumnPageZeroReader();
         this.pageZeroSegmentsPages = new BitSet();
         this.maxNumberOfColumnsInAPage =
@@ -85,6 +86,7 @@ public class SparseColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZero
         headerSize = MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + 
numberOfPageZeroSegments * Integer.BYTES;
         zerothSegmentReader.reset(pageZeroBuf, 
Math.min(numberOfColumnInZerothSegment, getNumberOfPresentColumns()),
                 headerSize);
+        setPresentColumnsIndices();
         columnIndexToRelativeColumnIndex.clear();
     }
 
@@ -276,23 +278,32 @@ public class SparseColumnMultiPageZeroReader extends 
AbstractColumnMultiPageZero
         return findRelativeColumnIndex(columnIndex) != -1;
     }
 
-    @Override
-    public void getAllColumns(BitSet presentColumns) {
+    private void setPresentColumnsIndices() {
+        presentColumnsIndices.clear();
+        int numberOfPresentColumns = getNumberOfPresentColumns();
+        if (numberOfPresentColumns == 0) {
+            return;
+        }
         int columnOffsetStart = headerSize;
-        for (int i = 0; i < Math.min(getNumberOfPresentColumns(), 
numberOfColumnInZerothSegment); i++) {
+        for (int i = 0; i < Math.min(numberOfPresentColumns, 
numberOfColumnInZerothSegment); i++) {
             int columnIndex = pageZeroBuf.getInt(columnOffsetStart);
-            presentColumns.set(columnIndex);
+            presentColumnsIndices.set(columnIndex);
             columnOffsetStart += SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE;
         }
-        if (getNumberOfPresentColumns() > numberOfColumnInZerothSegment) {
+        if (numberOfPresentColumns > numberOfColumnInZerothSegment) {
             // read the rest of the columns from the segment stream
             int columnsInLastSegment = getNumberOfPresentColumns() - 
numberOfColumnInZerothSegment
                     - (numberOfPageZeroSegments - 2) * 
maxNumberOfColumnsInAPage;
-            segmentBuffers.readAllColumns(presentColumns, 
numberOfPageZeroSegments, maxNumberOfColumnsInAPage,
+            segmentBuffers.readAllColumns(presentColumnsIndices, 
numberOfPageZeroSegments, maxNumberOfColumnsInAPage,
                     columnsInLastSegment);
         }
     }
 
+    @Override
+    public void getAllColumns(BitSet presentColumns) {
+        presentColumns.or(presentColumnsIndices);
+    }
+
     @Override
     public ByteBuffer getPageZeroBuf() {
         throw new UnsupportedOperationException("This method is not supported 
for multi-page zero readers.");
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java
index 87b35bdf3b..e6170a29c4 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java
@@ -84,7 +84,7 @@ public final class ColumnRanges {
      * @param leafFrame to compute the ranges for
      */
     public void reset(ColumnBTreeReadLeafFrame leafFrame) throws 
HyracksDataException {
-        reset(leafFrame, EMPTY, EMPTY, EMPTY);
+        reset(leafFrame, EMPTY, EMPTY, EMPTY, false);
     }
 
     /**
@@ -94,7 +94,7 @@ public final class ColumnRanges {
      * @param plan      eviction plan
      */
     public void reset(ColumnBTreeReadLeafFrame leafFrame, BitSet plan) throws 
HyracksDataException {
-        reset(leafFrame, plan, EMPTY, EMPTY);
+        reset(leafFrame, plan, EMPTY, EMPTY, false);
     }
 
     /**
@@ -106,7 +106,7 @@ public final class ColumnRanges {
      * @param cloudOnlyColumns locked columns that cannot be read from a local 
disk
      */
     public void reset(ColumnBTreeReadLeafFrame leafFrame, BitSet 
requestedColumns, BitSet evictableColumns,
-            BitSet cloudOnlyColumns) throws HyracksDataException {
+            BitSet cloudOnlyColumns, boolean unPinPageZeroSegments) throws 
HyracksDataException {
         try {
             // Set leafFrame
             this.leafFrame = leafFrame;
@@ -167,8 +167,10 @@ public final class ColumnRanges {
             // to indicate the end
             columnsOrder[columnOrdinal] = -1;
         } finally {
-            //Unpin the not required segment pages
-            leafFrame.unPinNotRequiredPageZeroSegments();
+            if (unPinPageZeroSegments) {
+                //Unpin the not required segment pages
+                leafFrame.unPinNotRequiredPageZeroSegments();
+            }
         }
     }
 
@@ -213,7 +215,7 @@ public final class ColumnRanges {
      *
      * @param pageId page ID
      * @return true of the page should be read from the cloud, false otherwise
-     * @see #reset(ColumnBTreeReadLeafFrame, BitSet, BitSet, BitSet)
+     * @see #reset(ColumnBTreeReadLeafFrame, BitSet, BitSet, BitSet, boolean)
      */
     public boolean isCloudOnly(int pageId) {
         // Compute the relative page ID for this mega leaf node
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java
index 181aa060e1..b499a50e09 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java
@@ -158,7 +158,7 @@ public final class CloudColumnReadContext implements 
IColumnReadContext {
             return;
         }
 
-        columnRanges.reset(leafFrame, projectedColumns, plan, 
cloudOnlyColumns);
+        columnRanges.reset(leafFrame, projectedColumns, plan, 
cloudOnlyColumns, true);
         int pageZeroId = leafFrame.getPageId();
         int numberOfPageZeroSegments = leafFrame.getNumberOfPageZeroSegments();
 
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweeper.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweeper.java
index ba4376f963..2a9dd25b7e 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweeper.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/sweep/ColumnSweeper.java
@@ -202,6 +202,7 @@ public final class ColumnSweeper {
                             }
                         }
                     }
+                    segmentPagesTempHolder.clear();
                     context.unpin(page0, bcOpCtx);
                 }
             }

Reply via email to