>From Wail Alkowaileet <[email protected]>:
Wail Alkowaileet has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18176 )
Change subject: [ASTERIXDB-3360][STO] Honor limit when writing compressed pages
......................................................................
[ASTERIXDB-3360][STO] Honor limit when writing compressed pages
- user model changes: no
- storage format changes: yes
- interface changes: no
Details:
Compressed page writer should not reset the written page buffer
to be a whole page. Instead, it should honor the provided limit.
Change-Id: I9465a20d8f938320aff844bb313a5ee65331ba83
---
M
hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
M
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
M
hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
M
asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
4 files changed, 45 insertions(+), 10 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/76/18176/1
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
index c910131..90aacc7 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/bytes/stream/out/MultiPersistentBufferBytesOutputStream.java
@@ -38,6 +38,12 @@
@Override
protected void preReset() throws HyracksDataException {
if (allocatedBytes > 0) {
+ /*
+ * Ensure limit is set to current position and position is set to
0. This to ensure no unrelated bytes are
+ * persisted to disk. Unrelated bytes are bytes that were in the
ByteBuffer (when the column writer
+ * confiscated it from the buffer cache) but those byte do not
belong to the column that to be written.
+ */
+ currentBuf.flip();
//Persist all buffers before resetting the stream
multiPageOpRef.getValue().persist();
allocatedBytes = 0;
diff --git
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
index 6fbdc27..d5f7270 100644
---
a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
+++
b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/writer/ColumnBatchWriter.java
@@ -119,7 +119,15 @@
}
private boolean hasEnoughSpace(int bufferPosition, IColumnValuesWriter
columnWriter) {
- //Estimated size mostly overestimate the size
+ if (bufferPosition == 0) {
+ // if the current buffer is empty, then use it
+ return true;
+ } else if (tolerance == 1.0d) {
+ // if tolerance is 100%, then it should avoid doing any
calculations and start a with a new page
+ return false;
+ }
+
+ // Estimated size mostly overestimate the size
int columnSize = columnWriter.getEstimatedSize();
float remainingPercentage = (pageSize - bufferPosition) / (float)
pageSize;
if (columnSize > pageSize) {
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
index 34ec856..2e0ccfb 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/lsm/tuples/ColumnMultiBufferProvider.java
@@ -61,9 +61,14 @@
startOffset = offset % multiPageOp.getPageSize();
//Duplicate as the buffer could be shared by more than one column
ByteBuffer firstPage = readNext().duplicate();
+ // Set the limit to read the length of the column
+ firstPage.limit(startOffset + Integer.BYTES);
+ // Set position at the start of column
firstPage.position(startOffset);
- //Read the length
+ // Read the length of this column
length = firstPage.getInt();
+ // Ensure the page limit to at most a full page
+ firstPage.limit(Math.min(length, multiPageOp.getPageSize()));
int remainingLength = length - firstPage.remaining();
numberOfPages = (int) Math.ceil((double) remainingLength /
multiPageOp.getPageSize());
//+4-bytes after reading the length
diff --git
a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
index 01811c7..0cb4c9d 100644
---
a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
+++
b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/file/CompressedBufferedFileHandle.java
@@ -103,7 +103,13 @@
final long bytesWritten;
final long expectedBytesWritten;
- fixBufferPointers(uBuffer, 0);
+ if (cPage.isLargePage()) {
+ // When a page is large, ensure buffer limit is set a full
page length
+ fixBufferPointers(uBuffer, 0);
+ } else {
+ // Otherwise, the limit must be kept as provided
+ uBuffer.position(0);
+ }
if (compressToWriteBuffer(uBuffer, cBuffer) <
bufferCache.getPageSize()) {
cBuffer.position(0);
final long offset =
compressedFileManager.writePageInfo(pageId, cBuffer.remaining());
@@ -233,7 +239,6 @@
private void uncompressToPageBuffer(ByteBuffer cBuffer, ByteBuffer
uBuffer) throws HyracksDataException {
final ICompressorDecompressor compDecomp =
compressedFileManager.getCompressorDecompressor();
compDecomp.uncompress(cBuffer, uBuffer);
- verifyUncompressionSize(bufferCache.getPageSize(),
uBuffer.remaining());
}
private int compressToWriteBuffer(ByteBuffer uBuffer, ByteBuffer cBuffer)
throws HyracksDataException {
@@ -247,10 +252,4 @@
return
compDecomp.computeCompressedBufferSize(bufferCache.getPageSize());
}
- private void verifyUncompressionSize(int expected, int actual) {
- if (expected != actual) {
- throwException("Uncompressed", expected, actual);
- }
- }
-
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18176
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I9465a20d8f938320aff844bb313a5ee65331ba83
Gerrit-Change-Number: 18176
Gerrit-PatchSet: 1
Gerrit-Owner: Wail Alkowaileet <[email protected]>
Gerrit-MessageType: newchange