gsmiller commented on a change in pull request #127: URL: https://github.com/apache/lucene/pull/127#discussion_r638246848
########## File path: lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeCounter.java ########## @@ -148,170 +90,139 @@ public void add(long v) { // are guaranteed to find a match because the last // boundary is Long.MAX_VALUE: + long[] boundaries = boundaries(); + int lo = 0; int hi = boundaries.length - 1; while (true) { int mid = (lo + hi) >>> 1; - // System.out.println(" cycle lo=" + lo + " hi=" + hi + " mid=" + mid + " boundary=" + - // boundaries[mid] + " to " + boundaries[mid+1]); if (v <= boundaries[mid]) { if (mid == 0) { - leafCounts[0]++; + processSingleValuedHit(mid); return; } else { hi = mid - 1; } } else if (v > boundaries[mid + 1]) { lo = mid + 1; } else { - leafCounts[mid + 1]++; - // System.out.println(" incr @ " + (mid+1) + "; now " + leafCounts[mid+1]); + processSingleValuedHit(mid + 1); return; } } } - /** - * Fills counts corresponding to the original input ranges, returning the missing count (how many - * hits didn't match any ranges). - */ - public int fillCounts(int[] counts) { - // System.out.println(" rollup"); - missingCount = 0; - leafUpto = 0; - rollup(root, counts, false); - return missingCount; - } + /** Count a multi-valued doc value */ + void addMultiValued(long v) { - private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { - int count; - sawOutputs |= node.outputs != null; - if (node.left != null) { - count = rollup(node.left, counts, sawOutputs); - count += rollup(node.right, counts, sawOutputs); - } else { - // Leaf: - count = leafCounts[leafUpto]; - leafUpto++; - if (!sawOutputs) { - // This is a missing count (no output ranges were - // seen "above" us): - missingCount += count; - } + if (rangeCount() == 0) { + return; // don't bother if there aren't any requested ranges + } + + long[] boundaries = boundaries(); + + // First check if we've "advanced" beyond the last leaf we counted for this doc. If + // we haven't, there's no sense doing anything else: + if (multiValuedDocLastSeenLeaf != -1 && v <= boundaries[multiValuedDocLastSeenLeaf]) { + return; } - if (node.outputs != null) { - for (int rangeIndex : node.outputs) { - counts[rangeIndex] += count; + + // Also check if we've already counted the last leaf. If so, there's nothing else to count + // for this doc: + final int nextCandidateLeaf = multiValuedDocLastSeenLeaf + 1; + if (nextCandidateLeaf == boundaries.length) { + return; + } + + // Binary search in the range of the next candidate leaf up to the last leaf: + int lo = nextCandidateLeaf; + int hi = boundaries.length - 1; + while (true) { + int mid = (lo + hi) >>> 1; + if (v <= boundaries[mid]) { + if (mid == nextCandidateLeaf) { + processMultiValuedHit(mid); + multiValuedDocLastSeenLeaf = mid; + return; + } else { + hi = mid - 1; + } + } else if (v > boundaries[mid + 1]) { + lo = mid + 1; + } else { + int idx = mid + 1; + processMultiValuedHit(idx); + multiValuedDocLastSeenLeaf = idx; + return; } } - // System.out.println(" rollup node=" + node.start + " to " + node.end + ": count=" + count); - return count; } - private static LongRangeNode split(int start, int end, List<InclusiveRange> elementaryIntervals) { - if (start == end - 1) { - // leaf - InclusiveRange range = elementaryIntervals.get(start); - return new LongRangeNode(range.start, range.end, null, null, start); - } else { - int mid = (start + end) >>> 1; - LongRangeNode left = split(start, mid, elementaryIntervals); - LongRangeNode right = split(mid, end, elementaryIntervals); - return new LongRangeNode(left.start, right.end, left, right, -1); - } + /** + * Finish processing all documents. This will return the number of docs that didn't contribute to + * any ranges (that weren't already reported when calling endMultiValuedDoc()). + */ + abstract int finish(); + + /** Provide boundary information for elementary segments (max inclusive value per range) */ + protected abstract long[] boundaries(); + + /** Process a single-value "hit" against an elementary segment. */ + protected abstract void processSingleValuedHit(int elementarySegmentNum); + + /** Process a multi-value "hit" against an elementary segment. */ + protected abstract void processMultiValuedHit(int elementarySegmentNum); + + /** Increment the specified range by one. */ + protected final void increment(int rangeNum) { + countBuffer[rangeNum]++; } - private static final class InclusiveRange { - public final long start; - public final long end; + /** Increment the specified range by the specified count. */ + protected final void increment(int rangeNum, int count) { + countBuffer[rangeNum] += count; + } - public InclusiveRange(long start, long end) { - assert end >= start; - this.start = start; - this.end = end; + /** Number of ranges requested by the caller. */ + protected final int rangeCount() { + return countBuffer.length; + } + + /** Determine whether-or-not any requested ranges overlap */ + private static boolean hasOverlappingRanges(LongRange[] ranges) { + if (ranges.length == 0) { + return false; } - @Override - public String toString() { - return start + " to " + end; + // Copy before sorting so we don't mess with the caller's original ranges: + LongRange[] sortedRanges = new LongRange[ranges.length]; + System.arraycopy(ranges, 0, sortedRanges, 0, ranges.length); + Arrays.sort(sortedRanges, Comparator.comparingLong(r -> r.min)); + + long prev = sortedRanges[0].max; + for (int i = 1; i < sortedRanges.length; i++) { + if (sortedRanges[i].min <= prev) { Review comment: Looks like you sorted this out, but I'll leave a comment to make this more readable so a future developer doesn't have to jump around to confirm. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org