lru object size calculation
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/377dee94 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/377dee94 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/377dee94 Branch: refs/heads/streaming_ingest Commit: 377dee94780a4fe6073c855cc980ac234a576bf6 Parents: ac2168a Author: Raghunandan S <[email protected]> Authored: Wed Jun 14 21:01:07 2017 +0530 Committer: sraghunandan <[email protected]> Committed: Thu Jun 29 01:59:21 2017 +0530 ---------------------------------------------------------------------- .../carbondata/core/cache/CarbonLRUCache.java | 29 +++++++ .../AbstractColumnDictionaryInfo.java | 15 +++- .../dictionary/AbstractDictionaryCache.java | 89 +++++++++++++++++--- .../core/cache/dictionary/DictionaryInfo.java | 12 +++ .../dictionary/ForwardDictionaryCache.java | 42 +++++++++ .../dictionary/ReverseDictionaryCache.java | 54 ++++++++++++ .../datastore/AbstractBlockIndexStoreCache.java | 4 +- .../core/datastore/SegmentTaskIndexStore.java | 19 +++-- .../reader/CarbonDictionaryMetadataReader.java | 13 +++ .../CarbonDictionaryMetadataReaderImpl.java | 17 ++++ .../core/util/ObjectSizeCalculator.java | 71 ++++++++++++++++ .../dictionary/ReverseDictionaryCacheTest.java | 2 +- 12 files changed, 347 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java index 23dd814..03838a2 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/CarbonLRUCache.java @@ -199,6 +199,35 @@ public final class CarbonLRUCache { } /** + * This method will check if required size is available in the memory + * @param columnIdentifier + * @param requiredSize + * @return + */ + public boolean tryPut(String columnIdentifier, long requiredSize) { + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("checking Required size for entry " + columnIdentifier + " :: " + requiredSize + + " Current cache size :: " + currentSize); + } + boolean columnKeyCanBeAdded = false; + if (isLRUCacheSizeConfigured()) { + synchronized (lruCacheMap) { + if (freeMemorySizeForAddingCache(requiredSize)) { + columnKeyCanBeAdded = true; + } else { + LOGGER.error( + "Size check failed.Size not available. Entry cannot be added to lru cache :: " + + columnIdentifier + " .Required Size = " + requiredSize + " Size available " + ( + lruCacheMemorySize - currentSize)); + } + } + } else { + columnKeyCanBeAdded = true; + } + return columnKeyCanBeAdded; + } + + /** * The method will add the cache entry to LRU cache map * * @param columnIdentifier http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java index 18f4885..7e8a1c8 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractColumnDictionaryInfo.java @@ -56,6 +56,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo { protected long offsetTillFileIsRead; /** + * memory size of this object.We store it as calculation everytime is costly + */ + protected long memorySize; + + /** * length of dictionary metadata file */ private long dictionaryMetaFileLength; @@ -91,7 +96,11 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo { * @return */ @Override public long getMemorySize() { - return offsetTillFileIsRead; + return memorySize; + } + + @Override public void setMemorySize(long memorySize) { + this.memorySize = memorySize; } /** @@ -131,6 +140,10 @@ public abstract class AbstractColumnDictionaryInfo implements DictionaryInfo { this.offsetTillFileIsRead = offsetTillFileIsRead; } + @Override public long getOffsetTillFileIsRead() { + return offsetTillFileIsRead; + } + /** * This method will update the timestamp of a file if a file is modified * like in case of incremental load http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java index d7c25f1..f0b8c78 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java @@ -32,6 +32,7 @@ import org.apache.carbondata.core.service.CarbonCommonFactory; import org.apache.carbondata.core.service.DictionaryService; import org.apache.carbondata.core.service.PathService; import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.ObjectSizeCalculator; import org.apache.carbondata.core.util.path.CarbonTablePath; /** @@ -107,6 +108,34 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId } /** + * get the dictionary column meta chunk for object already read and stored in LRU cache + * @param dictionaryColumnUniqueIdentifier + * @param offsetRead + * @return + * @throws IOException + */ + protected long getNumRecordsInCarbonDictionaryColumnMetaChunk( + DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, long offsetRead) + throws IOException { + DictionaryService dictService = CarbonCommonFactory.getDictionaryService(); + CarbonDictionaryMetadataReader columnMetadataReaderImpl = dictService + .getDictionaryMetadataReader( + dictionaryColumnUniqueIdentifier.getCarbonTableIdentifier(), + dictionaryColumnUniqueIdentifier.getColumnIdentifier(), carbonStorePath); + + CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = null; + // read metadata file + try { + carbonDictionaryColumnMetaChunk = + columnMetadataReaderImpl.readEntryOfDictionaryMetaChunk(offsetRead); + } finally { + // close the metadata reader + columnMetadataReaderImpl.close(); + } + return carbonDictionaryColumnMetaChunk.getMax_surrogate_key(); + } + + /** * This method will validate dictionary metadata file for any modification * * @param carbonFile @@ -141,6 +170,12 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId return dictFile; } + protected long getSortIndexSize(long numOfRecords) { + // sort index has sort index and reverse sort index,each is 4 byte integer. + // 32 byte is the array header of both the integer arrays + return numOfRecords * ObjectSizeCalculator.estimate(new Integer(0), 16) * 2 + 32; + } + /** * This method will get the value for the given key. If value does not exist * for the given key, it will check and load the value. @@ -176,28 +211,37 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId if (dictionaryMetaFileModified) { CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk = readLastChunkFromDictionaryMetadataFile(dictionaryColumnUniqueIdentifier); - // required size will be size total size of file - offset till file is - // already read - long requiredSize = - carbonDictionaryColumnMetaChunk.getEnd_offset() - dictionaryInfo.getMemorySize(); + + long requiredSize = getEstimatedDictionarySize(dictionaryInfo, + carbonDictionaryColumnMetaChunk, + dictionaryColumnUniqueIdentifier, loadSortIndex); + if (requiredSize > 0) { - boolean columnAddedToLRUCache = - carbonLRUCache.put(lruCacheKey, dictionaryInfo, requiredSize); - // if column is successfully added to lru cache then only load the + dictionaryInfo.setMemorySize(requiredSize); + boolean colCanBeAddedToLRUCache = + carbonLRUCache.tryPut(lruCacheKey, requiredSize); + // if column can be added to lru cache then only load the // dictionary data - if (columnAddedToLRUCache) { + if (colCanBeAddedToLRUCache) { // load dictionary data loadDictionaryData(dictionaryInfo, dictionaryColumnUniqueIdentifier, - dictionaryInfo.getMemorySize(), carbonDictionaryColumnMetaChunk.getEnd_offset(), - loadSortIndex); + dictionaryInfo.getOffsetTillFileIsRead(), + carbonDictionaryColumnMetaChunk.getEnd_offset(), + loadSortIndex); // set the end offset till where file is read dictionaryInfo - .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset()); + .setOffsetTillFileIsRead(carbonDictionaryColumnMetaChunk.getEnd_offset()); + long updateRequiredSize = ObjectSizeCalculator.estimate(dictionaryInfo, requiredSize); + dictionaryInfo.setMemorySize(updateRequiredSize); + if (!carbonLRUCache.put(lruCacheKey, dictionaryInfo, updateRequiredSize)) { + throw new DictionaryBuilderException( + "Cannot load dictionary into memory. Not enough memory available"); + } dictionaryInfo.setFileTimeStamp(carbonFile.getLastModifiedTime()); dictionaryInfo.setDictionaryMetaFileLength(carbonFile.getSize()); } else { throw new DictionaryBuilderException( - "Cannot load dictionary into memory. Not enough memory available"); + "Cannot load dictionary into memory. Not enough memory available"); } } } @@ -262,4 +306,25 @@ public abstract class AbstractDictionaryCache<K extends DictionaryColumnUniqueId dictionary.clear(); } } + + /** + * calculate the probable size of Dictionary in java heap + * Use the value to check if can be added to lru cache + * This helps to avoid unnecessary loading of dictionary files + * if estimated size more than that can be fit into lru cache + * Estimated size can be less or greater than the actual size + * due to java optimizations + * @param dictionaryInfo + * @param carbonDictionaryColumnMetaChunk + * @param dictionaryColumnUniqueIdentifier + * @param readSortIndexSize + * @return + * @throws IOException + */ + protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo, + CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk, + DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean + readSortIndexSize) throws IOException { + return 0; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java index a14c3d6..0fbb4bb 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryInfo.java @@ -41,6 +41,18 @@ public interface DictionaryInfo extends Cacheable, Dictionary { void setOffsetTillFileIsRead(long offsetTillFileIsRead); /** + * offset till the file is read + * @return + */ + long getOffsetTillFileIsRead(); + + /** + * the memory size of this object after loaded into memory + * @param memorySize + */ + void setMemorySize(long memorySize); + + /** * This method will update the timestamp of a file if a file is modified * like in case of incremental load * http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java index e46f3f5..b23bd49 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ForwardDictionaryCache.java @@ -32,6 +32,9 @@ import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.cache.CacheType; import org.apache.carbondata.core.cache.CarbonLRUCache; +import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.ObjectSizeCalculator; /** * This class implements methods to create dictionary cache which will hold @@ -49,6 +52,11 @@ public class ForwardDictionaryCache<K extends private static final Map<DictionaryColumnUniqueIdentifier, Object> DICTIONARY_LOCK_OBJECT = new HashMap<>(); + private static final long sizeOfEmptyDictChunks = + ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16); + + private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16); + /** * @param carbonStorePath * @param carbonLRUCache @@ -231,4 +239,38 @@ public class ForwardDictionaryCache<K extends cacheable.clear(); } } + + @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo, + CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk, + DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean + readSortIndexSize) throws IOException { + // required size will be size total size of file - offset till file is + // already read + long requiredSize = + carbonDictionaryColumnMetaChunk.getEnd_offset() - + dictionaryInfo.getOffsetTillFileIsRead(); + + long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ? + carbonDictionaryColumnMetaChunk.getMax_surrogate_key() : + carbonDictionaryColumnMetaChunk.getMax_surrogate_key() + - getNumRecordsInCarbonDictionaryColumnMetaChunk( + dictionaryColumnUniqueIdentifier, + dictionaryInfo.getOffsetTillFileIsRead()); + + if (numOfRecords > 0) { + long avgRecordsSize = requiredSize / numOfRecords; + long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8; + + requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords; + } + + if (readSortIndexSize) { + // every time we are loading all the sort index files.Hence memory calculation for all + // the records + requiredSize = requiredSize + getSortIndexSize( + carbonDictionaryColumnMetaChunk.getMax_surrogate_key()); + } + + return requiredSize + sizeOfEmptyDictChunks; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java index d63d42a..28568b5 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCache.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -30,6 +31,9 @@ import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.cache.CacheType; import org.apache.carbondata.core.cache.CarbonLRUCache; +import org.apache.carbondata.core.reader.CarbonDictionaryColumnMetaChunk; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.ObjectSizeCalculator; /** * This class implements methods to create dictionary cache which will hold @@ -45,6 +49,20 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier, private static final LogService LOGGER = LogServiceFactory.getLogService(ReverseDictionaryCache.class.getName()); + private static final long sizeOfEmptyDictChunks = + ObjectSizeCalculator.estimate(new ArrayList<byte[]>(CarbonUtil.getDictionaryChunkSize()), 16); + + private static final long sizeOfEmptyHashMap = ObjectSizeCalculator.estimate(new + ConcurrentHashMap<DictionaryByteArrayWrapper, + Integer>(CarbonUtil.getDictionaryChunkSize()), 16); + + private static final long sizeOfHashMapNode = ObjectSizeCalculator.estimate(new + DictionaryByteArrayWrapper(new byte[0]), 16) + + ObjectSizeCalculator.estimate(new Integer(0), 16); + + private static final long byteArraySize = ObjectSizeCalculator.estimate(new byte[0], 16); + + /** * @param carbonStorePath * @param carbonLRUCache @@ -209,4 +227,40 @@ public class ReverseDictionaryCache<K extends DictionaryColumnUniqueIdentifier, cacheable.clear(); } } + + @Override protected long getEstimatedDictionarySize(DictionaryInfo dictionaryInfo, + CarbonDictionaryColumnMetaChunk carbonDictionaryColumnMetaChunk, + DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier, boolean + readSortIndexSize) throws IOException { + // required size will be size total size of file - offset till file is + // already read + long requiredSize = + carbonDictionaryColumnMetaChunk.getEnd_offset() - + dictionaryInfo.getOffsetTillFileIsRead(); + + long numOfRecords = dictionaryInfo.getOffsetTillFileIsRead() == 0 ? + carbonDictionaryColumnMetaChunk.getMax_surrogate_key() : + carbonDictionaryColumnMetaChunk.getMax_surrogate_key() + - getNumRecordsInCarbonDictionaryColumnMetaChunk( + dictionaryColumnUniqueIdentifier, + dictionaryInfo.getOffsetTillFileIsRead()); + + if (numOfRecords > 0) { + long avgRecordsSize = requiredSize / numOfRecords; + long bytesPerRecord = (long)Math.ceil(avgRecordsSize / 8.0) * 8; + + requiredSize = (bytesPerRecord + byteArraySize) * numOfRecords; + } + + if (readSortIndexSize) { + // every time we are loading all the sort index files.Hence memory calculation for all + // the records + requiredSize = requiredSize + getSortIndexSize( + carbonDictionaryColumnMetaChunk.getMax_surrogate_key()); + } + + requiredSize = requiredSize + (sizeOfHashMapNode * numOfRecords); + + return requiredSize + sizeOfEmptyDictChunks + sizeOfEmptyHashMap; + } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java index 5e8c8a3..8cfc602 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/AbstractBlockIndexStoreCache.java @@ -32,6 +32,7 @@ import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier; import org.apache.carbondata.core.datastore.exception.IndexBuilderException; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.ObjectSizeCalculator; /** * This class validate and load the B-Tree in the executor lru cache @@ -92,13 +93,14 @@ public abstract class AbstractBlockIndexStoreCache<K, V> TableBlockInfo blockInfo = tableBlockUniqueIdentifier.getTableBlockInfo(); long requiredMetaSize = CarbonUtil.calculateMetaSize(blockInfo); if (requiredMetaSize > 0) { - tableBlock.setMemorySize(requiredMetaSize); // load table blocks data // getting the data file meta data of the block DataFileFooter footer = CarbonUtil.readMetadatFile(blockInfo); footer.setBlockInfo(new BlockInfo(blockInfo)); // building the block tableBlock.buildIndex(Collections.singletonList(footer)); + requiredMetaSize = ObjectSizeCalculator.estimate(blockInfo, requiredMetaSize); + tableBlock.setMemorySize(requiredMetaSize); tableBlock.incrementAccessCount(); boolean isTableBlockAddedToLruCache = lruCache.put(lruCacheKey, tableBlock, requiredMetaSize); if (!isTableBlockAddedToLruCache) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java index 734aaaf..980ea3e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/SegmentTaskIndexStore.java @@ -43,6 +43,7 @@ import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; import org.apache.carbondata.core.mutate.UpdateVO; import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.ObjectSizeCalculator; import org.apache.carbondata.core.util.path.CarbonTablePath; import org.apache.carbondata.core.util.path.CarbonTablePath.DataFileUtil; @@ -233,11 +234,10 @@ public class SegmentTaskIndexStore taskIdToTableBlockInfoMap.entrySet().iterator(); long requiredSize = calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier); - segmentTaskIndexWrapper - .setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize()); - boolean isAddedToLruCache = - lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize); - if (isAddedToLruCache) { + segmentTaskIndexWrapper.setMemorySize(requiredSize); + boolean canAddToLruCache = + lruCache.tryPut(lruCacheKey, requiredSize); + if (canAddToLruCache) { while (iterator.hasNext()) { Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList = iterator.next(); @@ -246,6 +246,15 @@ public class SegmentTaskIndexStore loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(), absoluteTableIdentifier)); } + long updatedRequiredSize = + ObjectSizeCalculator.estimate(segmentTaskIndexWrapper, requiredSize); + // update the actual size of object + segmentTaskIndexWrapper.setMemorySize(updatedRequiredSize); + if (!lruCache.put(lruCacheKey, segmentTaskIndexWrapper, updatedRequiredSize)) { + throw new IndexBuilderException( + "Can not load the segment. No Enough space available."); + } + } else { throw new IndexBuilderException( "Can not load the segment. No Enough space available."); http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java index f2e09ec..cc14187 100644 --- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReader.java @@ -53,4 +53,17 @@ public interface CarbonDictionaryMetadataReader extends Closeable { * @throws IOException if an I/O error occurs */ CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk() throws IOException; + + /** + * This method will be used to read the last dictionary meta chunk ending at end_Offset. + * Applicable scenarios : + * 1. When loading into LRU cache, we need to calculate the size of Object in memory,for + * this we need the number of records already loaded into LRU cache, so that we can calculate + * the memory required for incremental load + * + * @return last segment entry for dictionary chunk + * @throws IOException if an I/O error occurs + */ + CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset) + throws IOException; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java index 98c0ddb..9356974 100644 --- a/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/reader/CarbonDictionaryMetadataReaderImpl.java @@ -133,6 +133,23 @@ public class CarbonDictionaryMetadataReaderImpl implements CarbonDictionaryMetad return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta); } + @Override public CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset) + throws IOException { + ColumnDictionaryChunkMeta dictionaryChunkMeta = null; + // open dictionary meta thrift reader + openThriftReader(); + // at the completion of while loop we will get the last dictionary chunk entry + while (dictionaryMetadataFileReader.hasNext()) { + // get the thrift object for dictionary chunk + dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read(); + if (dictionaryChunkMeta.end_offset >= end_Offset) { + break; + } + } + // create a new instance of chunk meta wrapper using thrift object + return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta); + } + /** * Closes this stream and releases any system resources associated * with it. If the stream is already closed then invoking this http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java new file mode 100644 index 0000000..513e786 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/util/ObjectSizeCalculator.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.util; + +import java.lang.reflect.Method; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; + +/** + * This wrapper class is created so that core doesnt have direct dependency on spark + * TODO: Need to have carbon implementation if carbon needs to be used without spark + */ +public final class ObjectSizeCalculator { + /** + * Logger object for the class + */ + private static final LogService LOGGER = + LogServiceFactory.getLogService(ObjectSizeCalculator.class.getName()); + + /** + * Class of spark to invoke + */ + private static String className = "org.apache.spark.util.SizeEstimator"; + + private static Method estimateMethod = null; + + private static boolean methodAccessible = true; + + /** + * Invoke the spark's implementation of Object size computation + * return the default value passed if function cannot be invoked + * @param anObject + * @param defValue + * @return + */ + public static long estimate(Object anObject, long defValue) { + try { + if (methodAccessible) { + if (null == estimateMethod) { + estimateMethod = Class.forName(className).getMethod("estimate", Object.class); + estimateMethod.setAccessible(true); + } + return (Long) estimateMethod.invoke(null, anObject); + } else { + return defValue; + } + } catch (Throwable ex) { + // throwable is being caught as external interface is being invoked through reflection + // and runtime exceptions might get thrown + LOGGER.error(ex, "Could not access method SizeEstimator:estimate.Returning default value"); + methodAccessible = false; + return defValue; + } + } +} http://git-wip-us.apache.org/repos/asf/carbondata/blob/377dee94/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java index 628c3ff..b06fc4d 100644 --- a/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java +++ b/core/src/test/java/org/apache/carbondata/core/cache/dictionary/ReverseDictionaryCacheTest.java @@ -206,7 +206,7 @@ public class ReverseDictionaryCacheTest extends AbstractDictionaryCacheTest { // available size limit new MockUp<CarbonDictionaryColumnMetaChunk>() { @Mock public long getEnd_offset() { - return 10485755L; + return 10445000L; } }; columnIdentifier = columnIdentifiers[1];
