>From Wail Alkowaileet <[email protected]>:

Wail Alkowaileet has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18176 )


Change subject: [ASTERIXDB-3360][STO] Honor limit when writing compressed pages
......................................................................

[ASTERIXDB-3360][STO] Honor limit when writing compressed pages

- user model changes: no
- storage format changes: yes
- interface changes: no

Details:
Compressed page writer should not reset the written page buffer
to be a whole page. Instead, it should honor the provided limit.

Change-Id: I9465a20d8f938320aff844bb313a5ee65331ba83
---
M 
hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
M 
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
M 
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
4 files changed, 45 insertions(+), 10 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/76/18176/1

diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
index c910131..90aacc7 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
@@ -38,6 +38,12 @@
     @Override
     protected void preReset() throws HyracksDataException {
         if (allocatedBytes > 0) {
+            /*
+             * Ensure limit is set to current position and position is set to 
0. This to ensure no unrelated bytes are
+             * persisted to disk. Unrelated bytes are bytes that were in the 
ByteBuffer (when the column writer
+             * confiscated it from the buffer cache) but those byte do not 
belong to the column that to be written.
+             */
+            currentBuf.flip();
             //Persist all buffers before resetting the stream
             multiPageOpRef.getValue().persist();
             allocatedBytes = 0;
diff --git 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
index 6fbdc27..d5f7270 100644
--- 
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
+++ 
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
@@ -119,7 +119,15 @@
     }

     private boolean hasEnoughSpace(int bufferPosition, IColumnValuesWriter 
columnWriter) {
-        //Estimated size mostly overestimate the size
+        if (bufferPosition == 0) {
+            // if the current buffer is empty, then use it
+            return true;
+        } else if (tolerance == 1.0d) {
+            // if tolerance is 100%, then it should avoid doing any 
calculations and start a with a new page
+            return false;
+        }
+
+        // Estimated size mostly overestimate the size
         int columnSize = columnWriter.getEstimatedSize();
         float remainingPercentage = (pageSize - bufferPosition) / (float) 
pageSize;
         if (columnSize > pageSize) {
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
index 34ec856..2e0ccfb 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
@@ -61,9 +61,14 @@
         startOffset = offset % multiPageOp.getPageSize();
         //Duplicate as the buffer could be shared by more than one column
         ByteBuffer firstPage = readNext().duplicate();
+        // Set the limit to read the length of the column
+        firstPage.limit(startOffset + Integer.BYTES);
+        // Set position at the start of column
         firstPage.position(startOffset);
-        //Read the length
+        // Read the length of this column
         length = firstPage.getInt();
+        // Ensure the page limit to at most a full page
+        firstPage.limit(Math.min(length, multiPageOp.getPageSize()));
         int remainingLength = length - firstPage.remaining();
         numberOfPages = (int) Math.ceil((double) remainingLength / 
multiPageOp.getPageSize());
         //+4-bytes after reading the length
diff --git 
a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
 
b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
index 01811c7..0cb4c9d 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
@@ -103,7 +103,13 @@
             final long bytesWritten;
             final long expectedBytesWritten;

-            fixBufferPointers(uBuffer, 0);
+            if (cPage.isLargePage()) {
+                // When a page is large, ensure buffer limit is set a full 
page length
+                fixBufferPointers(uBuffer, 0);
+            } else {
+                // Otherwise, the limit must be kept as provided
+                uBuffer.position(0);
+            }
             if (compressToWriteBuffer(uBuffer, cBuffer) < 
bufferCache.getPageSize()) {
                 cBuffer.position(0);
                 final long offset = 
compressedFileManager.writePageInfo(pageId, cBuffer.remaining());
@@ -233,7 +239,6 @@
     private void uncompressToPageBuffer(ByteBuffer cBuffer, ByteBuffer 
uBuffer) throws HyracksDataException {
         final ICompressorDecompressor compDecomp = 
compressedFileManager.getCompressorDecompressor();
         compDecomp.uncompress(cBuffer, uBuffer);
-        verifyUncompressionSize(bufferCache.getPageSize(), 
uBuffer.remaining());
     }

     private int compressToWriteBuffer(ByteBuffer uBuffer, ByteBuffer cBuffer) 
throws HyracksDataException {
@@ -247,10 +252,4 @@
         return 
compDecomp.computeCompressedBufferSize(bufferCache.getPageSize());
     }

-    private void verifyUncompressionSize(int expected, int actual) {
-        if (expected != actual) {
-            throwException("Uncompressed", expected, actual);
-        }
-    }
-
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18176
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I9465a20d8f938320aff844bb313a5ee65331ba83
Gerrit-Change-Number: 18176
Gerrit-PatchSet: 1
Gerrit-Owner: Wail Alkowaileet <[email protected]>
Gerrit-MessageType: newchange

Reply via email to