dario-liberman commented on code in PR #18760:
URL: https://github.com/apache/pinot/pull/18760#discussion_r3465756641
##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/funnel/DictIdsWrapper.java:
##########
@@ -18,19 +18,151 @@
*/
package org.apache.pinot.core.query.aggregation.function.funnel;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.roaringbitmap.RoaringBitmap;
+/**
+ * Holds per-step RoaringBitmaps keyed by correlation dictionary IDs.
+ *
+ * <p>For single-key CORRELATE_BY, stores raw dictionary IDs directly in the
bitmaps (compact, fits in one
+ * RoaringBitmap container for typical segment sizes).
+ *
+ * <p>For multi-key CORRELATE_BY, composite IDs are assigned via stride-based
arithmetic (when the combined key
+ * space fits in int) or a HashMap fallback for large key spaces.
+ */
final class DictIdsWrapper {
- final Dictionary _dictionary;
+ final Dictionary[] _dictionaries;
final RoaringBitmap[] _stepsBitmaps;
+ // Stride-based composite mapping (non-null only for multi-key when product
of dict sizes fits in int)
+ private final int[] _strides;
+
+ // HashMap-based composite mapping (non-null only for multi-key when stride
overflows int)
+ private final Map<IntArrayList, Integer> _compositeKeyMap;
+ private final List<int[]> _compositeKeyReverse;
+ private final IntArrayList _lookupKey;
+
DictIdsWrapper(int numSteps, Dictionary dictionary) {
- _dictionary = dictionary;
+ _dictionaries = new Dictionary[]{dictionary};
_stepsBitmaps = new RoaringBitmap[numSteps];
for (int n = 0; n < numSteps; n++) {
_stepsBitmaps[n] = new RoaringBitmap();
}
+ _strides = null;
+ _compositeKeyMap = null;
+ _compositeKeyReverse = null;
+ _lookupKey = null;
+ }
+
+ DictIdsWrapper(int numSteps, Dictionary[] dictionaries) {
+ _dictionaries = dictionaries;
+ _stepsBitmaps = new RoaringBitmap[numSteps];
+ for (int n = 0; n < numSteps; n++) {
+ _stepsBitmaps[n] = new RoaringBitmap();
+ }
+
+ if (dictionaries.length > 1) {
+ long totalSpace = 1;
+ boolean fitsInInt = true;
+ for (Dictionary d : dictionaries) {
+ totalSpace *= d.length();
+ if (totalSpace > Integer.MAX_VALUE) {
+ fitsInInt = false;
+ break;
+ }
+ }
+
+ if (fitsInInt) {
+ _strides = new int[dictionaries.length];
+ _strides[dictionaries.length - 1] = 1;
+ for (int k = dictionaries.length - 2; k >= 0; k--) {
+ _strides[k] = _strides[k + 1] * dictionaries[k + 1].length();
+ }
+ _compositeKeyMap = null;
+ _compositeKeyReverse = null;
+ _lookupKey = null;
+ } else {
+ _strides = null;
+ _compositeKeyMap = new HashMap<>();
+ _compositeKeyReverse = new ArrayList<>();
+ _lookupKey = new IntArrayList(dictionaries.length);
+ }
+ } else {
+ _strides = null;
+ _compositeKeyMap = null;
+ _compositeKeyReverse = null;
+ _lookupKey = null;
+ }
+ }
+
+ boolean isMultiKey() {
+ return _dictionaries.length > 1;
+ }
+
+ boolean isHashMapPath() {
+ return _compositeKeyMap != null;
+ }
+
+ /**
+ * Maps a tuple of per-column dictionary IDs to a single composite int
suitable for RoaringBitmap.
+ * Only used for multi-key; for single-key, callers should add the dictId
directly.
+ */
+ int getCompositeCorrelationId(int[] dictIds) {
+ if (_strides != null) {
+ int id = 0;
+ for (int k = 0; k < dictIds.length; k++) {
+ id += dictIds[k] * _strides[k];
+ }
+ return id;
+ }
+ _lookupKey.clear();
+ for (int dictId : dictIds) {
+ _lookupKey.add(dictId);
+ }
+ Integer existingId = _compositeKeyMap.get(_lookupKey);
+ if (existingId != null) {
+ return existingId;
+ }
+ IntArrayList insertKey = new IntArrayList(dictIds);
Review Comment:
All of this memory allocation and subsequent garbage collection seems to me
completely unnecessary.
We do not have a collision free counting strategy anyways.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]