This is an automated email from the ASF dual-hosted git repository. xiangfu pushed a commit to branch support_mv_non_dict_groupby in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit a978e22c542c0183feb86674288b472a5ee3ebfa Author: Xiang Fu <[email protected]> AuthorDate: Wed Aug 12 01:15:47 2020 -0700 Support multi-value non-dictionary group by --- .../NoDictionaryMultiColumnGroupKeyGenerator.java | 153 ++++++++++++++++++++- 1 file changed, 149 insertions(+), 4 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java index 1d292c6..5322816 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/groupby/NoDictionaryMultiColumnGroupKeyGenerator.java @@ -51,6 +51,7 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat private final Dictionary[] _dictionaries; private final ValueToIdMap[] _onTheFlyDictionaries; private final Object2IntOpenHashMap<FixedIntArray> _groupKeyMap; + private final boolean[] _isSingleValueExpressions; private final int _globalGroupIdUpperBound; private int _numGroups = 0; @@ -62,6 +63,7 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat _dataTypes = new DataType[_numGroupByExpressions]; _dictionaries = new Dictionary[_numGroupByExpressions]; _onTheFlyDictionaries = new ValueToIdMap[_numGroupByExpressions]; + _isSingleValueExpressions = new boolean[_numGroupByExpressions]; for (int i = 0; i < _numGroupByExpressions; i++) { ExpressionContext groupByExpression = groupByExpressions[i]; @@ -72,6 +74,7 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat } else { _onTheFlyDictionaries[i] = ValueToIdMapFactory.get(_dataTypes[i]); } + _isSingleValueExpressions[i] = transformResultMetadata.isSingleValue(); } _groupKeyMap = new Object2IntOpenHashMap<>(); @@ -146,8 +149,129 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat @Override public void generateKeysForBlock(TransformBlock transformBlock, int[][] groupKeys) { - // TODO: Support generating keys for multi-valued columns. - throw new UnsupportedOperationException("Operation not supported"); + int numDocs = transformBlock.getNumDocs(); + int[][][] keys = new int[numDocs][_numGroupByExpressions][]; + for (int i = 0; i < _numGroupByExpressions; i++) { + BlockValSet blockValSet = transformBlock.getBlockValueSet(_groupByExpressions[i]); + if (_dictionaries[i] != null) { + int[] dictIds = blockValSet.getDictionaryIdsSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{ dictIds[j] }; + } + } else { + ValueToIdMap onTheFlyDictionary = _onTheFlyDictionaries[i]; + if (_isSingleValueExpressions[i]) { + switch (_dataTypes[i]) { + case INT: + int[] intValues = blockValSet.getIntValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(intValues[j])}; + } + break; + case LONG: + long[] longValues = blockValSet.getLongValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(longValues[j])}; + } + break; + case FLOAT: + float[] floatValues = blockValSet.getFloatValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(floatValues[j])}; + } + break; + case DOUBLE: + double[] doubleValues = blockValSet.getDoubleValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(doubleValues[j])}; + } + break; + case STRING: + String[] stringValues = blockValSet.getStringValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(stringValues[j])}; + } + break; + case BYTES: + byte[][] bytesValues = blockValSet.getBytesValuesSV(); + for (int j = 0; j < numDocs; j++) { + keys[j][i] = new int[]{onTheFlyDictionary.put(new ByteArray(bytesValues[j]))}; + } + break; + default: + throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + _dataTypes[i]); + } + } else { + switch (_dataTypes[i]) { + case INT: + int[][] intValues = blockValSet.getIntValuesMV(); + for (int j = 0; j < numDocs; j++) { + int mvSize = intValues[j].length; + int[] mvKeys = new int[mvSize]; + for (int k = 0; k < mvSize; k++) { + mvKeys[k] = onTheFlyDictionary.put(intValues[j][k]); + } + keys[j][i] = mvKeys; + } + break; + case LONG: + long[][] longValues = blockValSet.getLongValuesMV(); + for (int j = 0; j < numDocs; j++) { + int mvSize = longValues[j].length; + int[] mvKeys = new int[mvSize]; + for (int k = 0; k < mvSize; k++) { + mvKeys[k] = onTheFlyDictionary.put(longValues[j][k]); + } + keys[j][i] = mvKeys; + } + break; + case FLOAT: + float[][] floatValues = blockValSet.getFloatValuesMV(); + for (int j = 0; j < numDocs; j++) { + int mvSize = floatValues[j].length; + int[] mvKeys = new int[mvSize]; + for (int k = 0; k < mvSize; k++) { + mvKeys[k] = onTheFlyDictionary.put(floatValues[j][k]); + } + keys[j][i] = mvKeys; + } + break; + case DOUBLE: + double[][] doubleValues = blockValSet.getDoubleValuesMV(); + for (int j = 0; j < numDocs; j++) { + int mvSize = doubleValues[j].length; + int[] mvKeys = new int[mvSize]; + for (int k = 0; k < mvSize; k++) { + mvKeys[k] = onTheFlyDictionary.put(doubleValues[j][k]); + } + keys[j][i] = mvKeys; + } + break; + case STRING: + String[][] stringValues = blockValSet.getStringValuesMV(); + for (int j = 0; j < numDocs; j++) { + int mvSize = stringValues[j].length; + int[] mvKeys = new int[mvSize]; + for (int k = 0; k < mvSize; k++) { + mvKeys[k] = onTheFlyDictionary.put(stringValues[j][k]); + } + keys[j][i] = mvKeys; + } + break; + default: + throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + _dataTypes[i]); + } + } + } + } + for (int i = 0; i < numDocs; i++) { + int groupLength = keys[i].length; + FixedIntArray[] keysList = new FixedIntArray[groupLength]; + for (int j = 0; j < groupLength; j++) { + keysList[j] = new FixedIntArray(keys[i][j]); + } + groupKeys[i] = getGroupIdsForKey(keysList); + } } @Override @@ -161,9 +285,9 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat } /** - * Helper method to get or create group-id for a group key. + * Helper method to get or create group-id for group key. * - * @param keyList Group key, that is a list of objects to be grouped + * @param keyList Group keys, that is a list of objects to be grouped * @return Group id */ private int getGroupIdForKey(FixedIntArray keyList) { @@ -178,6 +302,27 @@ public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerat } /** + * Helper method to get or create a list of group-id for a list of group key. + * + * @param keysList Group keys, that is a list of list of objects to be grouped + * @return Group ids + */ + private int[] getGroupIdsForKey(FixedIntArray[] keysList) { + int[] groupIds = new int[keysList.length]; + for (int i = 0; i < keysList.length; i++) { + int groupId = _groupKeyMap.getInt(keysList[i]); + if (groupId == INVALID_ID) { + if (_numGroups < _globalGroupIdUpperBound) { + groupId = _numGroups; + _groupKeyMap.put(keysList[i], _numGroups++); + } + } + groupIds[i] = groupId; + } + return groupIds; + } + + /** * Iterator for {Group-Key, Group-id) pair. */ private class GroupKeyIterator implements Iterator<GroupKey> { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
