[GitHub] carbondata pull request #2149: [CARBONDATA-2325]Page level uncompress and Im...
Github user asfgit closed the pull request at: https://github.com/apache/carbondata/pull/2149 ---
[GitHub] carbondata pull request #2149: [CARBONDATA-2325]Page level uncompress and Im...
Github user kumarvishal09 commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2149#discussion_r181554900 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java --- @@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInv // start position will be used to store the current data position int startOffset = 0; -// position from where offsets will start -long pointerOffsets = this.dataPointersOffsets; // as first position will be start from 2 byte as data is stored first in the memory block // we need to skip first two bytes this is because first two bytes will be length of the data // which we have to skip -CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), -dataPageMemoryBlock.getBaseOffset() + pointerOffsets, -CarbonCommonConstants.SHORT_SIZE_IN_BYTE); -// incrementing the pointers as first value is already filled and as we are storing as int -// we need to increment the 4 bytes to set the position of the next value to set -pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; +int [] dataOffsets = new int[numberOfRows]; +dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // creating a byte buffer which will wrap the length of the row -// using byte buffer as unsafe will return bytes in little-endian encoding -ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE); -// store length of data -byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE]; -// as first offset is already stored, we need to start from the 2nd row in data array +ByteBuffer buffer = ByteBuffer.wrap(data); for (int i = 1; i < numberOfRows; i++) { - // first copy the length of previous row - CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + startOffset, length, CarbonUnsafe.BYTE_ARRAY_OFFSET, - CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - buffer.put(length); - buffer.flip(); + buffer.position(startOffset); // so current row position will be // previous row length + 2 bytes used for storing previous row data - startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + buffer.getShort(); + startOffset += buffer.getShort() + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // as same byte buffer is used to avoid creating many byte buffer for each row // we need to clear the byte buffer - buffer.clear(); - // now put the offset of current row, here we need to add 2 more bytes as current will - // also have length part so we have to skip length - CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + pointerOffsets, - startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - // incrementing the pointers as first value is already filled and as we are storing as int - // we need to increment the 4 bytes to set the position of the next value to set - pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; + dataOffsets[i] = startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; } - +CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET, +dataPageMemoryBlock.getBaseObject(), +dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets, +dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE); } /** * Below method will be used to get the row based on row id passed - * + * Getting the row from unsafe works in below logic + * 1. if inverted index is present then get the row id based on reverse inverted index + * 2. get the current row id data offset + * 3. if it's not a last row- get the next row offset + * Subtract the current row offset + 2 bytes(to skip the data length) with next row offset + * 4. if it's last row + * subtract the current row offset + 2 bytes(to skip the data length) with complete data length * @param rowId * @return row */ @Override public byte[] getRow(int rowId) { +// get the actual row id +rowId = getRowId(rowId); +// get offset of data in unsafe +int currentDataOffset = getOffSet(rowId); +// get the data length +short length = getLength(rowId, currentDataOffset); +// create data array +byte[] data = new byte[length]; +// fill the row data +
[GitHub] carbondata pull request #2149: [CARBONDATA-2325]Page level uncompress and Im...
Github user jackylk commented on a diff in the pull request: https://github.com/apache/carbondata/pull/2149#discussion_r181342780 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeVariableLengthDimensionDataChunkStore.java --- @@ -78,70 +88,96 @@ public UnsafeVariableLengthDimensionDataChunkStore(long totalSize, boolean isInv // start position will be used to store the current data position int startOffset = 0; -// position from where offsets will start -long pointerOffsets = this.dataPointersOffsets; // as first position will be start from 2 byte as data is stored first in the memory block // we need to skip first two bytes this is because first two bytes will be length of the data // which we have to skip -CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), -dataPageMemoryBlock.getBaseOffset() + pointerOffsets, -CarbonCommonConstants.SHORT_SIZE_IN_BYTE); -// incrementing the pointers as first value is already filled and as we are storing as int -// we need to increment the 4 bytes to set the position of the next value to set -pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; +int [] dataOffsets = new int[numberOfRows]; +dataOffsets[0] = CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // creating a byte buffer which will wrap the length of the row -// using byte buffer as unsafe will return bytes in little-endian encoding -ByteBuffer buffer = ByteBuffer.allocate(CarbonCommonConstants.SHORT_SIZE_IN_BYTE); -// store length of data -byte[] length = new byte[CarbonCommonConstants.SHORT_SIZE_IN_BYTE]; -// as first offset is already stored, we need to start from the 2nd row in data array +ByteBuffer buffer = ByteBuffer.wrap(data); for (int i = 1; i < numberOfRows; i++) { - // first copy the length of previous row - CarbonUnsafe.getUnsafe().copyMemory(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + startOffset, length, CarbonUnsafe.BYTE_ARRAY_OFFSET, - CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - buffer.put(length); - buffer.flip(); + buffer.position(startOffset); // so current row position will be // previous row length + 2 bytes used for storing previous row data - startOffset += CarbonCommonConstants.SHORT_SIZE_IN_BYTE + buffer.getShort(); + startOffset += buffer.getShort() + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; // as same byte buffer is used to avoid creating many byte buffer for each row // we need to clear the byte buffer - buffer.clear(); - // now put the offset of current row, here we need to add 2 more bytes as current will - // also have length part so we have to skip length - CarbonUnsafe.getUnsafe().putInt(dataPageMemoryBlock.getBaseObject(), - dataPageMemoryBlock.getBaseOffset() + pointerOffsets, - startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE); - // incrementing the pointers as first value is already filled and as we are storing as int - // we need to increment the 4 bytes to set the position of the next value to set - pointerOffsets += CarbonCommonConstants.INT_SIZE_IN_BYTE; + dataOffsets[i] = startOffset + CarbonCommonConstants.SHORT_SIZE_IN_BYTE; } - +CarbonUnsafe.getUnsafe().copyMemory(dataOffsets, CarbonUnsafe.INT_ARRAY_OFFSET, +dataPageMemoryBlock.getBaseObject(), +dataPageMemoryBlock.getBaseOffset() + this.dataPointersOffsets, +dataOffsets.length * CarbonCommonConstants.INT_SIZE_IN_BYTE); } /** * Below method will be used to get the row based on row id passed - * + * Getting the row from unsafe works in below logic + * 1. if inverted index is present then get the row id based on reverse inverted index + * 2. get the current row id data offset + * 3. if it's not a last row- get the next row offset + * Subtract the current row offset + 2 bytes(to skip the data length) with next row offset + * 4. if it's last row + * subtract the current row offset + 2 bytes(to skip the data length) with complete data length * @param rowId * @return row */ @Override public byte[] getRow(int rowId) { +// get the actual row id +rowId = getRowId(rowId); +// get offset of data in unsafe +int currentDataOffset = getOffSet(rowId); +// get the data length +short length = getLength(rowId, currentDataOffset); +// create data array +byte[] data = new byte[length]; +// fill the row data +