This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch cardinality_buffer
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 3b3a64afa761576f3b3132394329f75389118baf
Author: Jackie (Xiaotian) Jiang <[email protected]>
AuthorDate: Mon Jun 24 20:17:52 2019 -0700

    When initializing mutable dictionary, preserve 20% buffer for cardinality 
to reduce the chance of re-sizing the dictionary
    
    The current off-heap mutable dictionary is designed to hold all values in a 
single buffer
    If the buffer allocated is just enough for the cardinality estimation, 
there are quite high possibility that the dictionary needs to be expanded, 
which will impact performance
    In order to reduce the chance of re-sizing the dictionary, preserve 20% 
buffer for cardinality
---
 .../apache/pinot/core/indexsegment/mutable/MutableSegmentImpl.java   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/mutable/MutableSegmentImpl.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/mutable/MutableSegmentImpl.java
index 972d91c..ca4d18d 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/mutable/MutableSegmentImpl.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/indexsegment/mutable/MutableSegmentImpl.java
@@ -45,7 +45,6 @@ import 
org.apache.pinot.core.realtime.impl.RealtimeSegmentStatsHistory;
 import org.apache.pinot.core.realtime.impl.dictionary.MutableDictionary;
 import org.apache.pinot.core.realtime.impl.dictionary.MutableDictionaryFactory;
 import 
org.apache.pinot.core.realtime.impl.invertedindex.RealtimeInvertedIndexReader;
-import org.apache.pinot.core.realtime.stream.StreamMessageMetadata;
 import org.apache.pinot.core.segment.creator.impl.V1Constants;
 import org.apache.pinot.core.segment.index.SegmentMetadataImpl;
 import org.apache.pinot.core.segment.index.data.source.ColumnDataSource;
@@ -165,9 +164,11 @@ public class MutableSegmentImpl implements MutableSegment {
           dictionaryColumnSize = dataType.size();
         }
         String allocationContext = buildAllocationContext(_segmentName, 
column, V1Constants.Dict.FILE_EXTENSION);
+        // NOTE: preserve 20% buffer for cardinality to reduce the chance of 
re-sizing the dictionary
+        int estimatedCardinality = (int) 
(_statsHistory.getEstimatedCardinality(column) * 1.2);
         MutableDictionary dictionary = MutableDictionaryFactory
             .getMutableDictionary(dataType, _offHeap, _memoryManager, 
dictionaryColumnSize,
-                Math.min(_statsHistory.getEstimatedCardinality(column), 
_capacity), allocationContext);
+                Math.min(estimatedCardinality, _capacity), allocationContext);
         _dictionaryMap.put(column, dictionary);
 
         // Even though the column is defined as 'no-dictionary' in the config, 
we did create dictionary for consuming segment.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to