>From Ritik Raj <ritik....@couchbase.com>: Ritik Raj has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20013 )
Change subject: [ASTERIXDB-3601][STO] Fixed calculation issues ...................................................................... [ASTERIXDB-3601][STO] Fixed calculation issues - user model changes: no - storage format changes: no - interface changes: no Details: 1. isFull calculation 2. sparse column columnToIndexCache reset 3. bounding the offsetColumnIndexPairs per leaf Ext-ref: MB-66306 Change-Id: I796b74355eca845f006abb5b45789a5136ba8c84 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20013 Reviewed-by: Peeyush Gupta <peeyush.gu...@couchbase.com> Tested-by: Ritik Raj <ritik....@couchbase.com> --- M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/in/MultiPageZeroByteBuffersReader.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java M asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroWriter.java M asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroReader.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java M hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeReadLeafFrame.java 15 files changed, 150 insertions(+), 77 deletions(-) Approvals: Peeyush Gupta: Looks good to me, approved Ritik Raj: Verified Anon. E. Moose #1000171: diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/in/MultiPageZeroByteBuffersReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/in/MultiPageZeroByteBuffersReader.java index bb02166..4a59073 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/in/MultiPageZeroByteBuffersReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/in/MultiPageZeroByteBuffersReader.java @@ -82,7 +82,8 @@ pointable.set(buffer.array(), position, length); } - public void readOffset(long[] offsetColumnIndexPairs, int maxColumnsInZerothSegment, int numberOfColumnsInAPage) { + public int readOffset(long[] offsetColumnIndexPairs, int maxColumnsInZerothSegment, int numberOfColumnsInAPage, + int currentColumnIndex) { int numberOfColumns = offsetColumnIndexPairs.length - 1; for (Int2IntMap.Entry pair : segmentDir.int2IntEntrySet()) { int segmentIndex = pair.getIntKey(); @@ -92,18 +93,20 @@ int segmentOffset = 0; for (int j = 0; j < numberOfColumnsInAPage; j++) { int columnOffset = buffer.getInt(segmentOffset); - offsetColumnIndexPairs[columnIndex] = IntPairUtil.of(columnOffset, columnIndex); + offsetColumnIndexPairs[currentColumnIndex] = IntPairUtil.of(columnOffset, columnIndex); segmentOffset += DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE; + currentColumnIndex++; columnIndex++; if (columnIndex == numberOfColumns) { break; // No need to read more columns from this buffer. } } } + return currentColumnIndex; } - public void readSparseOffset(long[] offsetColumnIndexPairs, int numberOfPageSegments, int numberOfColumnsInAPage, - int numberOfColumnsInLastSegment) { + public int readSparseOffset(long[] offsetColumnIndexPairs, int numberOfPageSegments, int numberOfColumnsInAPage, + int numberOfColumnsInLastSegment, int currentColumnIndex) { for (Int2IntMap.Entry pair : segmentDir.int2IntEntrySet()) { int segmentIndex = pair.getIntKey(); int bufferIndex = pair.getIntValue(); @@ -114,10 +117,11 @@ for (int j = 0; j < numberOfColumnsInSegment; j++) { int columnIndex = buffer.getInt(segmentOffset); int columnOffset = buffer.getInt(segmentOffset + Integer.BYTES); - offsetColumnIndexPairs[columnIndex] = IntPairUtil.of(columnOffset, columnIndex); + offsetColumnIndexPairs[currentColumnIndex++] = IntPairUtil.of(columnOffset, columnIndex); segmentOffset += SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE; } } + return currentColumnIndex; } public void readAllColumns(BitSet presentColumns, int numberOfPageSegments, int numberOfColumnsInAPage, diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java index a1fcfe3..bf76a6b 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java @@ -145,8 +145,8 @@ } @Override - public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns, - IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) { + public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, int bufferCapacity, + boolean includeCurrentTupleColumns, IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) { int spaceOccupiedByDefaultWriter; int spaceOccupiedBySparseWriter; @@ -157,13 +157,13 @@ return spaceOccupiedByDefaultWriter; } else if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.SPARSE) { // Maximum space occupied by the columns = maxColumnsInPageZerothSegment * (offset + filter size) - spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment); + spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment, bufferCapacity); return spaceOccupiedBySparseWriter; } spaceOccupiedByDefaultWriter = getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns); - spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment); + spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment, bufferCapacity); pageZeroWriterFlavorSelector.switchPageZeroWriterIfNeeded(spaceOccupiedByDefaultWriter, spaceOccupiedBySparseWriter); @@ -179,11 +179,14 @@ return spaceOccupiedByDefaultWriter; } - private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment) { + private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment, int bufferCapacity) { int presentColumns = transformerForCurrentTuple.getNumberOfVisitedColumnsInBatch(); - int numberOfPagesRequired = (int) Math.ceil( - (double) (presentColumns - maxColumnsInPageZerothSegment) / IColumnPageZeroWriter.MIN_COLUMN_SPACE); - int headerSpace = SparseColumnMultiPageZeroWriter.getHeaderSpace(numberOfPagesRequired); + int maximumNumberOfColumnsInASegment = + SparseColumnMultiPageZeroWriter.getMaximumNumberOfColumnsInAPage(bufferCapacity); + int numberOfExtraPagesRequired = presentColumns <= maxColumnsInPageZerothSegment ? 0 + : (int) Math.ceil( + (double) (presentColumns - maxColumnsInPageZerothSegment) / maximumNumberOfColumnsInASegment); + int headerSpace = SparseColumnMultiPageZeroWriter.getHeaderSpace(numberOfExtraPagesRequired); presentColumns = Math.min(presentColumns, maxColumnsInPageZerothSegment); // space occupied by the sparse writer diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java index d31e1d3..fb5cfdb 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java @@ -155,8 +155,8 @@ } @Override - public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns, - IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) { + public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, int bufferCapacity, + boolean includeCurrentTupleColumns, IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) { int spaceOccupiedByDefaultWriter; int spaceOccupiedBySparseWriter; @@ -167,11 +167,11 @@ return spaceOccupiedByDefaultWriter; } else if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.SPARSE) { // Maximum space occupied by the columns = maxColumnsInPageZerothSegment * (offset + filter size) - spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment); + spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment, bufferCapacity); return spaceOccupiedBySparseWriter; } - spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment); + spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment, bufferCapacity); spaceOccupiedByDefaultWriter = getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns); pageZeroWriterFlavorSelector.switchPageZeroWriterIfNeeded(spaceOccupiedByDefaultWriter, @@ -188,11 +188,14 @@ return spaceOccupiedByDefaultWriter; } - private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment) { + private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment, int bufferCapacity) { int presentColumns = presentColumnsIndexes.cardinality(); - int numberOfPagesRequired = (int) Math.ceil( - (double) (presentColumns - maxColumnsInPageZerothSegment) / IColumnPageZeroWriter.MIN_COLUMN_SPACE); - int headerSpace = SparseColumnMultiPageZeroWriter.getHeaderSpace(numberOfPagesRequired); + int maximumNumberOfColumnsInASegment = + SparseColumnMultiPageZeroWriter.getMaximumNumberOfColumnsInAPage(bufferCapacity); + int numberOfExtraPagesRequired = presentColumns <= maxColumnsInPageZerothSegment ? 0 + : (int) Math.ceil( + (double) (presentColumns - maxColumnsInPageZerothSegment) / maximumNumberOfColumnsInASegment); + int headerSpace = SparseColumnMultiPageZeroWriter.getHeaderSpace(numberOfExtraPagesRequired); presentColumns = Math.min(presentColumns, maxColumnsInPageZerothSegment); // space occupied by the sparse writer diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java index a34d8c1..3f729b8 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/DefaultColumnPageZeroReader.java @@ -160,13 +160,14 @@ } @Override - public void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { + public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { int columnOffsetStart = headerSize; - for (int i = 0; i < offsetColumnIndexPairs.length; i++) { + for (int i = 0; i < numberOfPresentColumns; i++) { int offset = pageZeroBuf.getInt(columnOffsetStart); offsetColumnIndexPairs[i] = IntPairUtil.of(offset, i); columnOffsetStart += DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE; } + return numberOfPresentColumns; } @Override diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java index 3b4fdc4..5955d5e 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/readers/SparseColumnPageZeroReader.java @@ -42,6 +42,12 @@ } @Override + public void reset(ByteBuffer pageZeroBuf, int numberOfPresentColumns, int headerSize) { + super.reset(pageZeroBuf, numberOfPresentColumns, headerSize); + columnIndexToRelativeColumnIndex.clear(); + } + + @Override public int getColumnOffset(int columnIndex) { int relativeColumnIndex = getRelativeColumnIndex(columnIndex); return pageZeroBuf.getInt( @@ -137,7 +143,7 @@ } @Override - public void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { + public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { int columnIndex = getColumnIndex(0); for (int i = 0; i < numberOfPresentColumns; i++) { int column = pageZeroBuf.getInt(columnIndex); @@ -145,5 +151,6 @@ offsetColumnIndexPairs[i] = IntPairUtil.of(offset, column); columnIndex += SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE; } + return numberOfPresentColumns; } } diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java index 5a0b180..d29daa7 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/DefaultColumnMultiPageZeroReader.java @@ -230,19 +230,24 @@ } @Override - public void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { + public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { int columnOffsetStart = headerSize; - for (int i = 0; i < Math.min(offsetColumnIndexPairs.length, zerothSegmentMaxColumns); i++) { + int numberOfColumns = getNumberOfPresentColumns(); + int currentColumnIndex = 0; + while (currentColumnIndex < Math.min(numberOfColumns, zerothSegmentMaxColumns)) { // search in the 0th segment int offset = pageZeroBuf.getInt(columnOffsetStart); - offsetColumnIndexPairs[i] = IntPairUtil.of(offset, i); + offsetColumnIndexPairs[currentColumnIndex] = IntPairUtil.of(offset, currentColumnIndex); columnOffsetStart += DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE; + currentColumnIndex++; } - if (offsetColumnIndexPairs.length > zerothSegmentMaxColumns) { + if (numberOfColumns > zerothSegmentMaxColumns) { // read the rest of the columns from the segment stream - segmentBuffers.readOffset(offsetColumnIndexPairs, zerothSegmentMaxColumns, maxNumberOfColumnsInAPage); + currentColumnIndex = segmentBuffers.readOffset(offsetColumnIndexPairs, zerothSegmentMaxColumns, + maxNumberOfColumnsInAPage, currentColumnIndex); } + return currentColumnIndex; } @Override @@ -251,11 +256,12 @@ // Not marking the zeroth segment if (numberOfPageZeroSegments == 1 || markAll) { // mark all segments as required - pageZeroSegmentsPages.set(1, numberOfPageZeroSegments); + pageZeroSegmentsPages.set(0, numberOfPageZeroSegments); } else { // Iterate over the projected columns and mark the segments that contain them int currentIndex = projectedColumns.nextSetBit(zerothSegmentMaxColumns); - while (currentIndex >= 0) { + int totalNumberOfColumns = getNumberOfPresentColumns(); + while (currentIndex >= 0 && currentIndex < totalNumberOfColumns) { int rangeEnd = projectedColumns.nextClearBit(currentIndex); // exclusive int fromSegmentIndex = (currentIndex - zerothSegmentMaxColumns) / maxNumberOfColumnsInAPage + 1; diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java index 0a0a817..608ff71 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroReader.java @@ -118,28 +118,24 @@ // This method finds the segment index (except for 0th segment) for the given columnIndex. if (numberOfPageZeroSegments == 1) { // only zeroth segment is present - return -1; + return 0; } // gives 0 based segment index (0 for zeroth segment, 1 for first segment, etc.) - if (columnIndex <= maxColumnIndexInZerothSegment) { - return 0; - } else { - int start = 0; - int end = numberOfPageZeroSegments - 1; - int resultSegment = -1; - while (start <= end) { - int mid = (start + end) / 2; - int segmentColumnIndex = - pageZeroBuf.getInt(MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + mid * Integer.BYTES); - if (segmentColumnIndex >= columnIndex) { - resultSegment = mid; - end = mid - 1; // continue searching in the left half - } else { - start = mid + 1; - } + int start = 1; + int end = numberOfPageZeroSegments - 1; + int resultSegment = -1; + while (start <= end) { + int mid = (start + end) / 2; + int segmentColumnIndex = + pageZeroBuf.getInt(MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + mid * Integer.BYTES); + if (segmentColumnIndex >= columnIndex) { + resultSegment = mid; + end = mid - 1; // continue searching in the left half + } else { + start = mid + 1; } - return resultSegment; } + return resultSegment; } private int findRelativeColumnIndex(int columnIndex) throws HyracksDataException { @@ -150,7 +146,7 @@ return zerothSegmentReader.getRelativeColumnIndex(columnIndex); } else { int segmentIndex = findSegment(columnIndex); - if (segmentIndex == -1) { + if (segmentIndex <= 0) { return -1; } segmentIndex -= 1; // Adjusting to get the segment index for the segment stream @@ -303,23 +299,30 @@ } @Override - public void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { - // OffsetColumnIndexPairs is of size getNumberOfPresentColumns() + 1 + public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { + // offsetColumnIndexPairs >= getNumberOfPresentColumns() + 1 (maybe because of the previous MegaLeaf). + // Do not rely on offsetColumnIndexPairs.length, as it may be larger than the number of present columns. + // This is because the same array is reused for multiple leaf segments, and previous leaves may have more columns. int columnOffsetStart = headerSize; - for (int i = 0; i < Math.min(offsetColumnIndexPairs.length - 1, numberOfColumnInZerothSegment); i++) { + int currentColumnIndex = 0; + int numberOfColumns = getNumberOfPresentColumns(); + while (currentColumnIndex < Math.min(numberOfColumns, numberOfColumnInZerothSegment)) { int columnIndex = pageZeroBuf.getInt(columnOffsetStart); int columnOffset = pageZeroBuf.getInt(columnOffsetStart + SparseColumnPageZeroWriter.COLUMN_INDEX_SIZE); - offsetColumnIndexPairs[i] = IntPairUtil.of(columnOffset, columnIndex); + offsetColumnIndexPairs[currentColumnIndex++] = IntPairUtil.of(columnOffset, columnIndex); columnOffsetStart += SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE; } - if (offsetColumnIndexPairs.length - 1 > numberOfColumnInZerothSegment) { + // If the pages are not pinned, we will not read any columnIndex, but the old stuffs will already be present in the offsetColumnIndexPairs. + if (numberOfColumns > numberOfColumnInZerothSegment) { // read the rest of the columns from the segment stream int columnsInLastSegment = getNumberOfPresentColumns() - numberOfColumnInZerothSegment - (numberOfPageZeroSegments - 2) * maxNumberOfColumnsInAPage; - segmentBuffers.readSparseOffset(offsetColumnIndexPairs, numberOfPageZeroSegments, maxNumberOfColumnsInAPage, - columnsInLastSegment); + currentColumnIndex = segmentBuffers.readSparseOffset(offsetColumnIndexPairs, numberOfPageZeroSegments, + maxNumberOfColumnsInAPage, columnsInLastSegment, currentColumnIndex); } + + return currentColumnIndex; } @Override @@ -343,14 +346,26 @@ // Not marking the zeroth segment if (numberOfPageZeroSegments == 1 || markAll) { // mark all segments as required - pageZeroSegmentsPages.set(1, numberOfPageZeroSegments); + pageZeroSegmentsPages.set(0, numberOfPageZeroSegments); } else { // Iterate over the projected columns and mark the segments that contain them int currentIndex = projectedColumns.nextSetBit(maxColumnIndexInZerothSegment + 1); while (currentIndex >= 0) { int rangeEnd = projectedColumns.nextClearBit(currentIndex); // exclusive int startSegmentIndex = findSegment(currentIndex); + if (startSegmentIndex == -1) { + //This indicates that the currentIndex > MaxColumnIndex in the last segment + //Hence this leaf doesn't need to pin the segment for requested column ranges. + + //We can return early as next projectedColumns next set bit will also be out of bounds. + break; + } int endSegmentIndex = findSegment(rangeEnd - 1); + if (endSegmentIndex == -1) { + //This indicates that the rangeEnd - 1 > MaxColumnIndex in the last segment + //but the startSegmentIndex is valid, hence we may pin to the last segment. + endSegmentIndex = numberOfPageZeroSegments - 1; // Last segment index + } if (startSegmentIndex <= endSegmentIndex) { pageZeroSegmentsPages.set(startSegmentIndex, endSegmentIndex + 1); diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroWriter.java index 695ee6e..5753632 100644 --- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroWriter.java +++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/writers/multipage/SparseColumnMultiPageZeroWriter.java @@ -96,8 +96,7 @@ segments = new MultiPersistentPageZeroBufferBytesOutputStream(multiPageOpRef); this.zerothSegmentMaxColumns = zerothSegmentMaxColumns; this.zerothSegmentWriter = new SparseColumnPageZeroWriter(); - this.maximumNumberOfColumnsInAPage = bufferCachePageSize - / (SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE + SparseColumnPageZeroWriter.FILTER_SIZE); + this.maximumNumberOfColumnsInAPage = getMaximumNumberOfColumnsInAPage(bufferCachePageSize); } @Override @@ -282,7 +281,12 @@ return MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + numberOfPageZeroSegments * Integer.BYTES; } - public static int getHeaderSpace(int numberOfPageZeroSegments) { - return MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + numberOfPageZeroSegments * Integer.BYTES; + public static int getHeaderSpace(int numberOfExtraPagesRequired) { + return MAX_COLUMNS_INDEX_IN_ZEROTH_SEGMENT_OFFSET + numberOfExtraPagesRequired * Integer.BYTES; + } + + public static int getMaximumNumberOfColumnsInAPage(int bufferCachePageSize) { + return bufferCachePageSize + / (SparseColumnPageZeroWriter.COLUMN_OFFSET_SIZE + SparseColumnPageZeroWriter.FILTER_SIZE); } } diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java index 6e85d29..1ef865e 100644 --- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java +++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java @@ -202,8 +202,10 @@ } //Reserved for the number of pages int requiredFreeSpace = HEADER_SIZE; + //Since this test uses DefaultWriter, it does not need the bufferCapacity in the calculation + int bufferCapacity = Integer.MAX_VALUE; //Columns' Offsets - requiredFreeSpace += columnWriter.getPageZeroWriterOccupiedSpace(100, true, + requiredFreeSpace += columnWriter.getPageZeroWriterOccupiedSpace(100, bufferCapacity, true, IColumnPageZeroWriter.ColumnPageZeroWriterType.DEFAULT); //Occupied space from previous writes requiredFreeSpace += columnWriter.getPrimaryKeysEstimatedSize(); diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java index f5cef05..8b3abd6 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java @@ -64,7 +64,7 @@ * * @return the size needed to store columns' offsets */ - public abstract int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, + public abstract int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, int bufferCapacity, boolean includeCurrentTupleColumns, IColumnPageZeroWriter.ColumnPageZeroWriterType adaptive); /** diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java index 6cf43bf..ba3cda4 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/ColumnRanges.java @@ -112,20 +112,19 @@ // Ensure arrays capacities (given the leafFrame's columns and pages) init(); - // Get the number of columns in a page - int numberOfColumns = leafFrame.getNumberOfColumns(); // Set the first 32-bits to the offset and the second 32-bits to columnIndex - leafFrame.populateOffsetColumnIndexPairs(offsetColumnIndexPairs); + int numberOfPresentColumnsInLeaf = leafFrame.populateOffsetColumnIndexPairs(offsetColumnIndexPairs); // Set artificial offset to determine the last column's length int megaLeafLength = leafFrame.getMegaLeafNodeLengthInBytes(); - offsetColumnIndexPairs[numberOfColumns] = IntPairUtil.of(megaLeafLength, numberOfColumns); + offsetColumnIndexPairs[numberOfPresentColumnsInLeaf] = + IntPairUtil.of(megaLeafLength, numberOfPresentColumnsInLeaf); // Sort the pairs by offset (i.e., lowest offset first) - LongArrays.stableSort(offsetColumnIndexPairs, 0, numberOfColumns, OFFSET_COMPARATOR); + LongArrays.stableSort(offsetColumnIndexPairs, 0, numberOfPresentColumnsInLeaf, OFFSET_COMPARATOR); int columnOrdinal = 0; - for (int i = 0; i < numberOfColumns; i++) { + for (int i = 0; i < numberOfPresentColumnsInLeaf; i++) { if (offsetColumnIndexPairs[i] == 0) { //Any requested column's offset can't be zero //In case a column is not being present in the accessed pageZero segments, it will be defaulted to 0 diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java index 2cefab3..5458807 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/cloud/buffercache/read/CloudColumnReadContext.java @@ -140,8 +140,14 @@ // pin the required page segments mergedPageRanges.clear(); int pageZeroId = leafFrame.getPageId(); - BitSet pageZeroSegmentRanges = - leafFrame.markRequiredPageZeroSegments(projectedColumns, pageZeroId, operation == MERGE); + // Pinning all the segments of the page zero for now, + // as the column eviction logic is based on the length of the columns which + // gets evaluated from the page zero segments. + + //TODO: find a way to pin only the segments that are required for the operation + // or pin all the segments and then unpin the segments that are not required + boolean markAll = true || operation == MERGE; + BitSet pageZeroSegmentRanges = leafFrame.markRequiredPageZeroSegments(projectedColumns, pageZeroId, markAll); // Merge the page zero segments ranges mergePageZeroSegmentRanges(pageZeroSegmentRanges); mergedPageRanges.pin(columnCtx, bufferCache, fileId, pageZeroId); diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java index 8439373..9ca57dd 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java @@ -133,8 +133,8 @@ } //Columns' Offsets columnWriter.updateColumnMetadataForCurrentTuple(tuple); - int requiredFreeSpace = - columnWriter.getPageZeroWriterOccupiedSpace(maxColumnsInPageZerothSegment, true, pageZeroWriterType); + int requiredFreeSpace = columnWriter.getPageZeroWriterOccupiedSpace(maxColumnsInPageZerothSegment, + columnarFrame.getBuffer().capacity(), true, pageZeroWriterType); //Occupied space from previous writes requiredFreeSpace += columnWriter.getPrimaryKeysEstimatedSize(); //min and max tuples' sizes diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeReadLeafFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeReadLeafFrame.java index c3f8228..f01b28c 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeReadLeafFrame.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeReadLeafFrame.java @@ -119,6 +119,7 @@ public int getColumnOffset(int columnIndex) throws HyracksDataException { // update the exception message. if (!columnPageZeroReader.isValidColumn(columnIndex)) { + printPageZeroReaderInfo(); throw new IndexOutOfBoundsException(columnIndex + " >= " + getNumberOfColumns()); } return columnPageZeroReader.getColumnOffset(columnIndex); @@ -177,8 +178,8 @@ throw new IllegalArgumentException("Use createTupleReference(int)"); } - public void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { - columnPageZeroReader.populateOffsetColumnIndexPairs(offsetColumnIndexPairs); + public int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs) { + return columnPageZeroReader.populateOffsetColumnIndexPairs(offsetColumnIndexPairs); } public BitSet getPageZeroSegmentsPages() { diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroReader.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroReader.java index d9cf11b..fa9b57a 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroReader.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroReader.java @@ -64,7 +64,7 @@ ByteBuffer getPageZeroBuf(); - void populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs); + int populateOffsetColumnIndexPairs(long[] offsetColumnIndexPairs); int getNumberOfPageZeroSegments(); -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20013 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: ionic Gerrit-Change-Id: I796b74355eca845f006abb5b45789a5136ba8c84 Gerrit-Change-Number: 20013 Gerrit-PatchSet: 6 Gerrit-Owner: Ritik Raj <ritik....@couchbase.com> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Gerrit-Reviewer: Peeyush Gupta <peeyush.gu...@couchbase.com> Gerrit-Reviewer: Ritik Raj <ritik....@couchbase.com> Gerrit-MessageType: merged