yupeng9 commented on a change in pull request #6409:
URL: https://github.com/apache/incubator-pinot/pull/6409#discussion_r557803062
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
##########
@@ -90,110 +92,139 @@ public H3IndexFilterOperator(IndexSegment segment,
Predicate predicate, int numD
@Override
protected FilterBlock getNextBlock() {
if (_upperBound < 0 || _lowerBound > _upperBound) {
- // Invalid upper bound
-
+ // Invalid upper bound, return an empty block
return new FilterBlock(EmptyDocIdSet.getInstance());
}
- if (Double.isNaN(_lowerBound) || _lowerBound < 0) {
- // No lower bound
+ try {
+ if (Double.isNaN(_lowerBound) || _lowerBound < 0) {
+ // No lower bound
- if (Double.isNaN(_upperBound)) {
- // No upper bound
- return new FilterBlock(new MatchAllDocIdSet(_numDocs));
- }
-
- List<Long> fullMatchH3Ids = getFullMatchH3Ids(_upperBound);
- HashSet<Long> partialMatchH3Ids = new
HashSet<>(getPartialMatchH3Ids(_upperBound));
- partialMatchH3Ids.removeAll(fullMatchH3Ids);
+ if (Double.isNaN(_upperBound)) {
+ // No bound, return a match-all block
+ return new FilterBlock(new MatchAllDocIdSet(_numDocs));
+ }
- MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
- for (long fullMatchH3Id : fullMatchH3Ids) {
- fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
- }
+ // Upper bound only
+ List<Long> fullMatchH3Ids = getAlwaysMatchH3Ids(_upperBound);
+ HashSet<Long> partialMatchH3Ids = new
HashSet<>(getPossibleMatchH3Ids(_upperBound));
+ partialMatchH3Ids.removeAll(fullMatchH3Ids);
- MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
- for (long partialMatchH3Id : partialMatchH3Ids) {
- partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
- }
+ MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
+ for (long fullMatchH3Id : fullMatchH3Ids) {
+ fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
+ }
- return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
- }
+ MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
+ for (long partialMatchH3Id : partialMatchH3Ids) {
+ partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+ }
- if (Double.isNaN(_upperBound)) {
- // No upper bound
+ return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+ }
- List<Long> notMatchH3Ids = getFullMatchH3Ids(_lowerBound);
- Set<Long> partialMatchH3Ids = new
HashSet<>(getPartialMatchH3Ids(_lowerBound));
+ if (Double.isNaN(_upperBound)) {
+ // Lower bound only
+
+ List<Long> alwaysNotMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound);
+ Set<Long> possibleNotMatchH3Ids = new
HashSet<>(getPossibleMatchH3Ids(_lowerBound));
+
+ // Flip the result of possible not match doc ids to get the full match
doc ids
+ MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
+ for (long partialMatchH3Id : possibleNotMatchH3Ids) {
+ fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+ }
+ fullMatchDocIds.flip(0L, _numDocs);
+
+ // Remove the always not match H3 ids from possible not match H3 ids
to get the partial match H3 ids
+ possibleNotMatchH3Ids.removeAll(alwaysNotMatchH3Ids);
+ MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
+ for (long partialMatchH3Id : possibleNotMatchH3Ids) {
+ partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+ }
+
+ return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+ }
+ // Both lower bound and upper bound exist
+ List<Long> lowerAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_lowerBound);
+ List<Long> lowerPossibleMatchH3Ids = getPossibleMatchH3Ids(_lowerBound);
+ List<Long> upperAlwaysMatchH3Ids = getAlwaysMatchH3Ids(_upperBound);
+ List<Long> upperPossibleMatchH3Ids = getPossibleMatchH3Ids(_upperBound);
+
+ // Remove the possible match H3 ids for the lower bound from the always
match H3 ids for the upper bound to get
+ // the full match H3 ids
+ Set<Long> fullMatchH3Ids;
+ if (upperAlwaysMatchH3Ids.size() > lowerPossibleMatchH3Ids.size()) {
+ fullMatchH3Ids = new HashSet<>(upperAlwaysMatchH3Ids);
+ fullMatchH3Ids.removeAll(lowerPossibleMatchH3Ids);
+ } else {
+ fullMatchH3Ids = Collections.emptySet();
+ }
MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
- for (long partialMatchH3Id : partialMatchH3Ids) {
- fullMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
+ for (long fullMatchH3Id : fullMatchH3Ids) {
+ fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
}
- fullMatchDocIds.flip(0L, _numDocs);
- partialMatchH3Ids.removeAll(notMatchH3Ids);
+ // Remove the always match H3 ids for the lower bound (always not match
H3 ids) and the full match H3 ids from the
+ // possible match H3 ids for the upper bound to get the partial match H3
ids
+ Set<Long> partialMatchH3Ids = new HashSet<>(upperPossibleMatchH3Ids);
+ partialMatchH3Ids.removeAll(lowerAlwaysMatchH3Ids);
+ partialMatchH3Ids.removeAll(fullMatchH3Ids);
MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
for (long partialMatchH3Id : partialMatchH3Ids) {
partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
}
return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
+ } catch (Exception e) {
+ // Fall back to ExpressionFilterOperator when the execution encounters
exception (e.g. numRings is too large)
+ return new ExpressionFilterOperator(_segment, _predicate,
_numDocs).getNextBlock();
}
-
- // Both lower bound and upper bound exist
- List<Long> lowerFullMatchH3Ids = getFullMatchH3Ids(_lowerBound);
- List<Long> lowerPartialMatchH3Ids = getPartialMatchH3Ids(_lowerBound);
- List<Long> upperFullMatchH3Ids = getFullMatchH3Ids(_upperBound);
- List<Long> upperPartialMatchH3Ids = getPartialMatchH3Ids(_upperBound);
-
- Set<Long> fullMatchH3Ids;
- if (upperFullMatchH3Ids.size() > lowerPartialMatchH3Ids.size()) {
- fullMatchH3Ids = new HashSet<>(upperFullMatchH3Ids);
- fullMatchH3Ids.removeAll(lowerPartialMatchH3Ids);
- } else {
- fullMatchH3Ids = Collections.emptySet();
- }
- MutableRoaringBitmap fullMatchDocIds = new MutableRoaringBitmap();
- for (long fullMatchH3Id : fullMatchH3Ids) {
- fullMatchDocIds.or(_h3IndexReader.getDocIds(fullMatchH3Id));
- }
-
- Set<Long> partialMatchH3Ids = new HashSet<>(upperPartialMatchH3Ids);
- partialMatchH3Ids.removeAll(lowerFullMatchH3Ids);
- partialMatchH3Ids.removeAll(fullMatchH3Ids);
- MutableRoaringBitmap partialMatchDocIds = new MutableRoaringBitmap();
- for (long partialMatchH3Id : partialMatchH3Ids) {
- partialMatchDocIds.or(_h3IndexReader.getDocIds(partialMatchH3Id));
- }
-
- return getFilterBlock(fullMatchDocIds, partialMatchDocIds);
}
/**
* Returns the H3 ids that is ALWAYS fully covered by the circle with the
given distance as the radius and a point
* within the _h3Id hexagon as the center.
+ * <p>The farthest distance from the center of the center hexagon to the
center of a hexagon in the nth ring is
+ * {@code sqrt(3) * n * edgeLength}. Counting the distance from the center
to a point in the hexagon, which is up
+ * to the edge length, it is guaranteed that the hexagons in the nth ring
are always fully covered if:
+ * {@code distance >= (sqrt(3) * n + 2) * edgeLength}.
*/
- private List<Long> getFullMatchH3Ids(double distance) {
+ private List<Long> getAlwaysMatchH3Ids(double distance) {
// NOTE: Pick a constant slightly larger than sqrt(3) to be conservative
int numRings = (int) Math.floor((distance / _edgeLength - 2) / 1.7321);
- if (numRings >= 0) {
- return H3Utils.H3_CORE.kRing(_h3Id, numRings);
- } else {
- return Collections.emptyList();
- }
+ return numRings >= 0 ? getH3Ids(numRings) : Collections.emptyList();
}
/**
- * Returns the H3 ids that MIGHT BE partially/fully covered by the circle
with the given distance as the radius and a
+ * Returns the H3 ids that MIGHT BE fully/partially covered by the circle
with the given distance as the radius and a
* point within the _h3Id hexagon as the center.
+ * <p>The shortest distance from the center of the center hexagon to the
center of a hexagon in the nth ring is
+ * {@code >= 1.5 * n * edgeLength}. Counting the distance from the center to
a point in the hexagon, which is up
+ * to the edge length, it is guaranteed that the hexagons in the nth ring
are always not fully/partially covered if:
+ * {@code distance < (1.5 * n - 2) * edgeLength}.
*/
- private List<Long> getPartialMatchH3Ids(double distance) {
+ private List<Long> getPossibleMatchH3Ids(double distance) {
// NOTE: Add a small delta (0.001) to be conservative
int numRings = (int) Math.floor((distance / _edgeLength + 2) / 1.5 +
0.001);
+ return getH3Ids(numRings);
+ }
+
+ /**
+ * Returns the H3 ids for the given number of rings around the _h3Id.
+ * IMPORTANT: Throw exception when number of rings is too large because H3
library might send SIGILL for large number
+ * of rings, which can terminate the JVM. Also, the result won't be accurate
when the distance is too large. In such
+ * case, the operator will fallback to ExpressionFilterOperator.
+ */
+ private List<Long> getH3Ids(int numRings) {
+ Preconditions.checkState(numRings <= 100, "Expect numRings <= 100, got:
%s", numRings);
Review comment:
Will this fall back to scan-based approach?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]