Github user kumarvishal09 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2149#discussion_r181554900 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java --- @@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInv // start position will be used to store the current data position int startOffset = 0; - // position from where offsets will start - long pointerOffsets = this.dataPointersOffsets; // as first position will be start from 2 byte as data is stored first in the memory block // we need to skip first two bytes this is because first two bytes will be length of the data // which we have to skip - CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + pointerOffsets, - CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - // incrementing the pointers as first value is already filled and as we are storing as int - // we need to increment the 4 bytes to set the position of the next value to set - pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; + int [] dataOffsets = new int[numberOfRows]; + dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // creating a byte buffer which will wrap the length of the row - // using byte buffer as unsafe will return bytes in little-endian encoding - ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - // store length of data - byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE]; - // as first offset is already stored, we need to start from the 2nd row in data array + ByteBuffer buffer = ByteBuffer.wrap(data); for (int i = 1; i < numberOfRows; i++) { - // first copy the length of previous row - CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + startOffset, length, CarbonUnsafe.BYTE_ARRAY_OFFSET, - CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - buffer.put(length); - buffer.flip(); + buffer.position(startOffset); // so current row position will be // previous row length + 2 bytes used for storing previous row data - startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + buffer.getShort(); + startOffset += buffer.getShort() + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // as same byte buffer is used to avoid creating many byte buffer for each row // we need to clear the byte buffer - buffer.clear(); - // now put the offset of current row, here we need to add 2 more bytes as current will - // also have length part so we have to skip length - CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + pointerOffsets, - startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - // incrementing the pointers as first value is already filled and as we are storing as int - // we need to increment the 4 bytes to set the position of the next value to set - pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; + dataOffsets[i] = startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; } - + CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET, + dataPageMemoryBlock.getBaseObject(), + dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets, + dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE); } /** * Below method will be used to get the row based on row id passed - * + * Getting the row from unsafe works in below logic + * 1. if inverted index is present then get the row id based on reverse inverted index + * 2. get the current row id data offset + * 3. if it's not a last row- get the next row offset + * Subtract the current row offset + 2 bytes(to skip the data length) with next row offset + * 4. if it's last row + * subtract the current row offset + 2 bytes(to skip the data length) with complete data length * @param rowId * @return row */ @Override public byte[] getRow(int rowId) { + // get the actual row id + rowId = getRowId(rowId); + // get offset of data in unsafe + int currentDataOffset = getOffSet(rowId); + // get the data length + short length = getLength(rowId, currentDataOffset); + // create data array + byte[] data = new byte[length]; + // fill the row data + fillRowInternal(length, data, currentDataOffset); + return data; + } + + /** + * Returns the actual row id for data + * if inverted index is present then get the row id based on reverse inverted index + * otherwise return the same row id + * @param rowId + * row id + * @return actual row id + */ + private int getRowId(int rowId) { // if column was explicitly sorted we need to get the rowid based inverted index reverse if (isExplicitSorted) { rowId = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(), dataPageMemoryBlock.getBaseOffset() + this.invertedIndexReverseOffset + ((long)rowId * CarbonCommonConstants.INT_SIZE_IN_BYTE)); } - // now to get the row from memory block we need to do following thing - // 1. first get the current offset - // 2. if it's not a last row- get the next row offset - // Subtract the current row offset + 2 bytes(to skip the data length) with next row offset - // else subtract the current row offset + 2 bytes(to skip the data length) - // with complete data length - int currentDataOffset = CarbonUnsafe.getUnsafe().getInt(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets + (rowId + return rowId; + } + + /** + * get data offset based on current row id + * @param rowId + * row id --- End diff -- ok
---