gsmiller commented on a change in pull request #127: URL: https://github.com/apache/lucene/pull/127#discussion_r638247788
########## File path: lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeCounter.java ########## @@ -16,122 +16,64 @@ */ package org.apache.lucene.facet.range; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.Arrays; +import java.util.Comparator; /** - * Counts how many times each range was seen; per-hit it's just a binary search ({@link #add}) - * against the elementary intervals, and in the end we rollup back to the original ranges. + * Counter for numeric ranges. Works for both single- and multi-valued cases (assuming you use it + * correctly). + * + * <p>Usage notes: When counting a document field that only has a single value, callers should call + * addSingleValued() with the value. Whenever a document field has multiple values, callers should + * call startMultiValuedDoc() at the beginning of processing the document, followed by + * addMultiValued() with each value before finally calling endMultiValuedDoc() at the end of + * processing the document. The call to endMultiValuedDoc() will respond with a boolean indicating + * whether-or-not the specific document matched against at least one of the ranges being counted. + * Finally, after processing all documents, the caller should call finish(). This final call will + * ensure the contents of the user-provided {@code countBuffer} contains accurate counts (each index + * corresponding to the provided {@code LongRange} in {@code ranges}). The final call to finish() + * will also report how many additional documents did not match against any ranges. The combination + * of the endMultiValuedDoc() boolean responses and the number reported by finish() communicates the + * total number of missing documents. Note that the call to finish() will not report any documents + * already reported missing by endMultiValuedDoc(). */ -final class LongRangeCounter { - - final LongRangeNode root; - final long[] boundaries; - final int[] leafCounts; - - // Used during rollup - private int leafUpto; - private int missingCount; - - public LongRangeCounter(LongRange[] ranges) { - // Maps all range inclusive endpoints to int flags; 1 - // = start of interval, 2 = end of interval. We need to - // track the start vs end case separately because if a - // given point is both, then it must be its own - // elementary interval: - Map<Long, Integer> endsMap = new HashMap<>(); - - endsMap.put(Long.MIN_VALUE, 1); - endsMap.put(Long.MAX_VALUE, 2); - - for (LongRange range : ranges) { - Integer cur = endsMap.get(range.min); - if (cur == null) { - endsMap.put(range.min, 1); - } else { - endsMap.put(range.min, cur.intValue() | 1); - } - cur = endsMap.get(range.max); - if (cur == null) { - endsMap.put(range.max, 2); - } else { - endsMap.put(range.max, cur.intValue() | 2); - } - } - - List<Long> endsList = new ArrayList<>(endsMap.keySet()); - Collections.sort(endsList); - - // Build elementaryIntervals (a 1D Venn diagram): - List<InclusiveRange> elementaryIntervals = new ArrayList<>(); - int upto0 = 1; - long v = endsList.get(0); - long prev; - if (endsMap.get(v) == 3) { - elementaryIntervals.add(new InclusiveRange(v, v)); - prev = v + 1; - } else { - prev = v; - } - - while (upto0 < endsList.size()) { - v = endsList.get(upto0); - int flags = endsMap.get(v); - // System.out.println(" v=" + v + " flags=" + flags); - if (flags == 3) { - // This point is both an end and a start; we need to - // separate it: - if (v > prev) { - elementaryIntervals.add(new InclusiveRange(prev, v - 1)); - } - elementaryIntervals.add(new InclusiveRange(v, v)); - prev = v + 1; - } else if (flags == 1) { - // This point is only the start of an interval; - // attach it to next interval: - if (v > prev) { - elementaryIntervals.add(new InclusiveRange(prev, v - 1)); - } - prev = v; - } else { - assert flags == 2; - // This point is only the end of an interval; attach - // it to last interval: - elementaryIntervals.add(new InclusiveRange(prev, v)); - prev = v + 1; - } - // System.out.println(" ints=" + elementaryIntervals); - upto0++; - } +abstract class LongRangeCounter { - // Build binary tree on top of intervals: - root = split(0, elementaryIntervals.size(), elementaryIntervals); + /** accumulated counts for all of the ranges */ + private final int[] countBuffer; - // Set outputs, so we know which range to output for - // each node in the tree: - for (int i = 0; i < ranges.length; i++) { - root.addOutputs(i, ranges[i]); - } + /** + * track the last counted leaf so we can skip over ones we've already counted for multi-value doc Review comment: Yes. Maybe there's an opportunity to tidy up terminology here. The leaves in the segment tree are all of the elementary ranges. I'll see if I can clean up some of the terminology. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org