Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/2149#discussion_r181342780
--- Diff:
core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java
---
@@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long
totalSize, boolean isInv
// start position will be used to store the current data position
int startOffset = 0;
- // position from where offsets will start
- long pointerOffsets = this.dataPointersOffsets;
// as first position will be start from 2 byte as data is stored first
in the memory block
// we need to skip first two bytes this is because first two bytes
will be length of the data
// which we have to skip
- CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
- CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // incrementing the pointers as first value is already filled and as
we are storing as int
- // we need to increment the 4 bytes to set the position of the next
value to set
- pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
+ int [] dataOffsets = new int[numberOfRows];
+ dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
// creating a byte buffer which will wrap the length of the row
- // using byte buffer as unsafe will return bytes in little-endian
encoding
- ByteBuffer buffer =
ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // store length of data
- byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE];
- // as first offset is already stored, we need to start from the 2nd
row in data array
+ ByteBuffer buffer = ByteBuffer.wrap(data);
for (int i = 1; i < numberOfRows; i++) {
- // first copy the length of previous row
-
CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + startOffset, length,
CarbonUnsafe.BYTE_ARRAY_OFFSET,
- CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- buffer.put(length);
- buffer.flip();
+ buffer.position(startOffset);
// so current row position will be
// previous row length + 2 bytes used for storing previous row data
- startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE +
buffer.getShort();
+ startOffset += buffer.getShort() +
CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
// as same byte buffer is used to avoid creating many byte buffer
for each row
// we need to clear the byte buffer
- buffer.clear();
- // now put the offset of current row, here we need to add 2 more
bytes as current will
- // also have length part so we have to skip length
- CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + pointerOffsets,
- startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE);
- // incrementing the pointers as first value is already filled and as
we are storing as int
- // we need to increment the 4 bytes to set the position of the next
value to set
- pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE;
+ dataOffsets[i] = startOffset +
CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
}
-
+ CarbonUnsafe.getUnsafe().copyMemory(dataOffsets,
CarbonUnsafe.INT_ARRAY_OFFSET,
+ dataPageMemoryBlock.getBaseObject(),
+ dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets,
+ dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE);
}
/**
* Below method will be used to get the row based on row id passed
- *
+ * Getting the row from unsafe works in below logic
+ * 1. if inverted index is present then get the row id based on reverse
inverted index
+ * 2. get the current row id data offset
+ * 3. if it's not a last row- get the next row offset
+ * Subtract the current row offset + 2 bytes(to skip the data length)
with next row offset
+ * 4. if it's last row
+ * subtract the current row offset + 2 bytes(to skip the data length)
with complete data length
* @param rowId
* @return row
*/
@Override public byte[] getRow(int rowId) {
+ // get the actual row id
+ rowId = getRowId(rowId);
+ // get offset of data in unsafe
+ int currentDataOffset = getOffSet(rowId);
+ // get the data length
+ short length = getLength(rowId, currentDataOffset);
+ // create data array
+ byte[] data = new byte[length];
+ // fill the row data
+ fillRowInternal(length, data, currentDataOffset);
+ return data;
+ }
+
+ /**
+ * Returns the actual row id for data
+ * if inverted index is present then get the row id based on reverse
inverted index
+ * otherwise return the same row id
+ * @param rowId
+ * row id
+ * @return actual row id
+ */
+ private int getRowId(int rowId) {
// if column was explicitly sorted we need to get the rowid based
inverted index reverse
if (isExplicitSorted) {
rowId =
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
dataPageMemoryBlock.getBaseOffset() +
this.invertedIndexReverseOffset + ((long)rowId
* CarbonCommonConstants.INT_SIZE_IN_BYTE));
}
- // now to get the row from memory block we need to do following thing
- // 1. first get the current offset
- // 2. if it's not a last row- get the next row offset
- // Subtract the current row offset + 2 bytes(to skip the data length)
with next row offset
- // else subtract the current row offset + 2 bytes(to skip the data
length)
- // with complete data length
- int currentDataOffset =
CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(),
- dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets +
(rowId
+ return rowId;
+ }
+
+ /**
+ * get data offset based on current row id
+ * @param rowId
+ * row id
--- End diff --
move it to previous line, please modify all places
---