clintropolis commented on code in PR #13803:
URL: https://github.com/apache/druid/pull/13803#discussion_r1146920835
##########
processing/src/main/java/org/apache/druid/segment/NestedDataColumnMerger.java:
##########
@@ -280,4 +320,122 @@ private <T> boolean allNull(Indexed<T> dimValues)
}
return true;
}
+
+ public static class ArrayDictionaryMergingIterator implements Iterator<int[]>
+ {
+ private static final Comparator<PeekingIterator<int[]>>
PEEKING_ITERATOR_COMPARATOR =
+ (lhs, rhs) ->
FrontCodedIntArrayIndexedWriter.ARRAY_COMPARATOR.compare(lhs.peek(),
rhs.peek());
+
+ protected final PriorityQueue<PeekingIterator<int[]>> pQueue;
+ protected int counter;
+
+ public ArrayDictionaryMergingIterator(Iterable<Object[]>[]
dimValueLookups, GlobalDictionaryIdLookup idLookup)
+ {
+ pQueue = new PriorityQueue<>(PEEKING_ITERATOR_COMPARATOR);
+
+ for (Iterable<Object[]> dimValueLookup : dimValueLookups) {
+ if (dimValueLookup == null) {
+ continue;
+ }
+ final PeekingIterator<int[]> iter = Iterators.peekingIterator(
+ new IdLookupArrayIterator(idLookup, dimValueLookup.iterator())
+ );
+ if (iter.hasNext()) {
+ pQueue.add(iter);
+ }
+ }
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ return !pQueue.isEmpty();
+ }
+
+ @Override
+ public int[] next()
+ {
+ PeekingIterator<int[]> smallest = pQueue.remove();
+ if (smallest == null) {
+ throw new NoSuchElementException();
+ }
+ final int[] value = smallest.next();
+ if (smallest.hasNext()) {
+ pQueue.add(smallest);
+ }
+
+ while (!pQueue.isEmpty() && Arrays.equals(value, pQueue.peek().peek())) {
+ PeekingIterator<int[]> same = pQueue.remove();
+ same.next();
+ if (same.hasNext()) {
+ pQueue.add(same);
+ }
+ }
+ counter++;
+
+ return value;
+ }
+
+ public int getCardinality()
+ {
+ return counter;
+ }
+
+ @Override
+ public void remove()
+ {
+ throw new UnsupportedOperationException("remove");
+ }
+ }
+
+ public static class IdLookupArrayIterator implements Iterator<int[]>
+ {
+ private final GlobalDictionaryIdLookup idLookup;
+ private final Iterator<Object[]> delegate;
+
+ public IdLookupArrayIterator(
+ GlobalDictionaryIdLookup idLookup,
+ Iterator<Object[]> delegate
+ )
+ {
+ this.idLookup = idLookup;
+ this.delegate = delegate;
+ }
+
+ @Override
+ public boolean hasNext()
+ {
+ return delegate.hasNext();
+ }
+
+ @Override
+ public int[] next()
+ {
+ final Object[] next = delegate.next();
+ if (next == null) {
+ return null;
+ }
+ final int[] newIdsWhoDis = new int[next.length];
+ for (int i = 0; i < next.length; i++) {
+ if (next[i] == null) {
+ newIdsWhoDis[i] = 0;
+ } else if (next[i] instanceof String) {
+ newIdsWhoDis[i] = idLookup.lookupString((String) next[i]);
+ } else if (next[i] instanceof Long) {
+ newIdsWhoDis[i] = idLookup.lookupLong((Long) next[i]);
+ } else if (next[i] instanceof Double) {
+ newIdsWhoDis[i] = idLookup.lookupDouble((Double) next[i]);
+ } else {
+ newIdsWhoDis[i] = -1;
+ }
+ Preconditions.checkArgument(
+ newIdsWhoDis[i] >= 0,
+ "unknown global id [%s] for value [%s]",
+ newIdsWhoDis[i],
+ next[i]
+ );
Review Comment:
So, we are dealing with them here like this so we can lookup the new global
id from the newly merged lower scalar value dictionaries. Otherwise we would
need the mappings of old ids to new ids, which we don't currently have
anywhere, and its a lot more complicated since its per segment. This way we
just lookup the old values and after the lower dictionaries are merged, just
lookup the array elements for the newly sorted values
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]