richardstartin commented on a change in pull request #8195:
URL: https://github.com/apache/pinot/pull/8195#discussion_r806367799



##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
##########
@@ -92,60 +92,62 @@ public int getGlobalGroupKeyUpperBound() {
   @Override
   public void generateKeysForBlock(TransformBlock transformBlock, int[] 
groupKeys) {
     int numDocs = transformBlock.getNumDocs();
-    int[][] keys = new int[numDocs][_numGroupByExpressions];
+    Object[] values = new Object[_numGroupByExpressions];
     for (int i = 0; i < _numGroupByExpressions; i++) {
       BlockValSet blockValSet = 
transformBlock.getBlockValueSet(_groupByExpressions[i]);
       if (_dictionaries[i] != null) {
-        int[] dictIds = blockValSet.getDictionaryIdsSV();
-        for (int j = 0; j < numDocs; j++) {
-          keys[j][i] = dictIds[j];
-        }
+        values[i] = blockValSet.getDictionaryIdsSV();
       } else {
-        ValueToIdMap onTheFlyDictionary = _onTheFlyDictionaries[i];
         switch (_storedTypes[i]) {
           case INT:
-            int[] intValues = blockValSet.getIntValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(intValues[j]);
-            }
+            values[i] = blockValSet.getIntValuesSV();
             break;
           case LONG:
-            long[] longValues = blockValSet.getLongValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(longValues[j]);
-            }
+            values[i] = blockValSet.getLongValuesSV();
             break;
           case FLOAT:
-            float[] floatValues = blockValSet.getFloatValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(floatValues[j]);
-            }
+            values[i] = blockValSet.getFloatValuesSV();
             break;
           case DOUBLE:
-            double[] doubleValues = blockValSet.getDoubleValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(doubleValues[j]);
-            }
+            values[i] = blockValSet.getDoubleValuesSV();
             break;
           case STRING:
-            String[] stringValues = blockValSet.getStringValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(stringValues[j]);
-            }
+            values[i] = blockValSet.getStringValuesSV();
             break;
           case BYTES:
-            byte[][] bytesValues = blockValSet.getBytesValuesSV();
-            for (int j = 0; j < numDocs; j++) {
-              keys[j][i] = onTheFlyDictionary.put(new 
ByteArray(bytesValues[j]));
-            }
+            values[i] = blockValSet.getBytesValuesSV();
             break;
           default:
             throw new IllegalArgumentException("Illegal data type for 
no-dictionary key generator: " + _storedTypes[i]);
         }
       }
     }
-    for (int i = 0; i < numDocs; i++) {
-      groupKeys[i] = getGroupIdForKey(new FixedIntArray(keys[i]));
+    int[] keyValues = new int[_numGroupByExpressions];
+    // note that we are mutating its backing array for memory efficiency
+    FixedIntArray flyweightKey = new FixedIntArray(keyValues);
+    for (int row = 0; row < numDocs; row++) {

Review comment:
       I posted benchmark results. Please read them.

##########
File path: 
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java
##########
@@ -306,7 +325,7 @@ private int getGroupIdForKey(FixedIntArray keyList) {
     if (groupId == INVALID_ID) {
       if (_numGroups < _globalGroupIdUpperBound) {
         groupId = _numGroups;
-        _groupKeyMap.put(keyList, _numGroups++);
+        _groupKeyMap.put(keyList.clone(), _numGroups++);

Review comment:
       good catch. Note that this does not affect the baseline benchmark 
results which were taken at 2fa525253a62108dbc91874c77e112eb349337d9




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to