This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 498387a323 Support st_contains using H3 index (#8498)
498387a323 is described below
commit 498387a3239d611539aad1badbbabb5d5f66b75c
Author: WangCHX <[email protected]>
AuthorDate: Sat Apr 30 09:06:12 2022 +0800
Support st_contains using H3 index (#8498)
The idea is to convert input geometry into a list of h3 cells by using
polyfill. But h3 polyfill only fills with the hexagons whose centers are
contained by the geometry.
so creating a method coverGeometryInH3 to return the set of H3 cells at the
specified resolution which completely cover the input shape.
---
.../filter/H3InclusionIndexFilterOperator.java | 152 ++++++++++++++
.../org/apache/pinot/core/plan/FilterPlanNode.java | 59 +++++-
.../apache/pinot/queries/H3IndexQueriesTest.java | 226 +++++++++++++++++++--
.../apache/pinot/segment/local/utils/H3Utils.java | 92 +++++++++
pom.xml | 2 +-
5 files changed, 501 insertions(+), 30 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
new file mode 100644
index 0000000000..c64d511697
--- /dev/null
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3InclusionIndexFilterOperator.java
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import it.unimi.dsi.fastutil.longs.LongIterator;
+import it.unimi.dsi.fastutil.longs.LongSet;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.predicate.EqPredicate;
+import org.apache.pinot.common.request.context.predicate.Predicate;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.FilterBlock;
+import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator;
+import org.apache.pinot.core.operator.docidsets.BitmapDocIdSet;
+import org.apache.pinot.segment.local.utils.GeometrySerializer;
+import org.apache.pinot.segment.local.utils.H3Utils;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.index.reader.H3IndexReader;
+import org.apache.pinot.spi.utils.BooleanUtils;
+import org.apache.pinot.spi.utils.BytesUtils;
+import org.locationtech.jts.geom.Geometry;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * A filter operator that uses H3 index for geospatial data inclusion
+ */
+public class H3InclusionIndexFilterOperator extends BaseFilterOperator {
+
+ private static final String EXPLAIN_NAME = "INCLUSION_FILTER_H3_INDEX";
+
+ private final IndexSegment _segment;
+ private final Predicate _predicate;
+ private final int _numDocs;
+ private final H3IndexReader _h3IndexReader;
+ private final Geometry _geometry;
+ private final boolean _isPositiveCheck;
+
+ public H3InclusionIndexFilterOperator(IndexSegment segment, Predicate
predicate, int numDocs) {
+ _segment = segment;
+ _predicate = predicate;
+ _numDocs = numDocs;
+
+ List<ExpressionContext> arguments =
predicate.getLhs().getFunction().getArguments();
+ EqPredicate eqPredicate = (EqPredicate) predicate;
+ _isPositiveCheck = BooleanUtils.toBoolean(eqPredicate.getValue());
+
+ if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER) {
+ _h3IndexReader =
segment.getDataSource(arguments.get(0).getIdentifier()).getH3Index();
+ _geometry =
GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(1).getLiteral()));
+ } else {
+ _h3IndexReader =
segment.getDataSource(arguments.get(1).getIdentifier()).getH3Index();
+ _geometry =
GeometrySerializer.deserialize(BytesUtils.toBytes(arguments.get(0).getLiteral()));
+ }
+ // must be some h3 index
+ assert _h3IndexReader != null : "the column must have H3 index setup.";
+ }
+
+ @Override
+ protected FilterBlock getNextBlock() {
+ // get the set of H3 cells at the specified resolution which completely
cover the input shape and potential cover.
+ Pair<LongSet, LongSet> fullCoverAndPotentialCoverCells =
+ H3Utils.coverGeometryInH3(_geometry,
_h3IndexReader.getH3IndexResolution().getLowestResolution());
+ LongSet fullyCoverH3Cells = fullCoverAndPotentialCoverCells.getLeft();
+ LongSet potentialCoverH3Cells = fullCoverAndPotentialCoverCells.getRight();
+
+ // have list of h3 cell ids for polygon provided
+ // return filtered num_docs
+ ImmutableRoaringBitmap[] potentialMatchDocIds = new
ImmutableRoaringBitmap[potentialCoverH3Cells.size()];
+ int i = 0;
+ LongIterator potentialCoverH3CellsIterator =
potentialCoverH3Cells.iterator();
+ while (potentialCoverH3CellsIterator.hasNext()) {
+ potentialMatchDocIds[i++] =
_h3IndexReader.getDocIds(potentialCoverH3CellsIterator.nextLong());
+ }
+ MutableRoaringBitmap potentialMatchMutableRoaringBitmap =
BufferFastAggregation.or(potentialMatchDocIds);
+ if (_isPositiveCheck) {
+ ImmutableRoaringBitmap[] fullMatchDocIds = new
ImmutableRoaringBitmap[fullyCoverH3Cells.size()];
+ i = 0;
+ LongIterator fullyCoverH3CellsIterator = fullyCoverH3Cells.iterator();
+ while (fullyCoverH3CellsIterator.hasNext()) {
+ fullMatchDocIds[i++] =
_h3IndexReader.getDocIds(fullyCoverH3CellsIterator.nextLong());
+ }
+ MutableRoaringBitmap fullMatchMutableRoaringBitmap =
BufferFastAggregation.or(fullMatchDocIds);
+ return getFilterBlock(fullMatchMutableRoaringBitmap,
potentialMatchMutableRoaringBitmap);
+ } else {
+ i = 0;
+ // remove full match from potential match to get potential not match
cells.
+ potentialCoverH3Cells.removeAll(fullyCoverH3Cells);
+ ImmutableRoaringBitmap[] potentialNotMatchMutableRoaringBitmap =
+ new ImmutableRoaringBitmap[potentialCoverH3Cells.size()];
+ LongIterator potentialNotMatchH3CellsIterator =
potentialCoverH3Cells.iterator();
+ while (potentialNotMatchH3CellsIterator.hasNext()) {
+ potentialNotMatchMutableRoaringBitmap[i++] =
+
_h3IndexReader.getDocIds(potentialNotMatchH3CellsIterator.nextLong());
+ }
+ MutableRoaringBitmap potentialNotMatch =
BufferFastAggregation.or(potentialNotMatchMutableRoaringBitmap);
+ // flip potential match bit map to get exactly not match bitmap.
+ potentialMatchMutableRoaringBitmap.flip(0L, _numDocs);
+ return getFilterBlock(potentialMatchMutableRoaringBitmap,
potentialNotMatch);
+ }
+ }
+
+ /**
+ * Returns the filter block based on the given the partial match doc ids.
+ */
+ private FilterBlock getFilterBlock(MutableRoaringBitmap fullMatchDocIds,
MutableRoaringBitmap partialMatchDocIds) {
+ ExpressionFilterOperator expressionFilterOperator = new
ExpressionFilterOperator(_segment, _predicate, _numDocs);
+ ScanBasedDocIdIterator docIdIterator =
+ (ScanBasedDocIdIterator)
expressionFilterOperator.getNextBlock().getBlockDocIdSet().iterator();
+ MutableRoaringBitmap result = docIdIterator.applyAnd(partialMatchDocIds);
+ result.or(fullMatchDocIds);
+ return new FilterBlock(new BitmapDocIdSet(result, _numDocs) {
+ @Override
+ public long getNumEntriesScannedInFilter() {
+ return docIdIterator.getNumEntriesScanned();
+ }
+ });
+ }
+
+ @Override
+ public List<Operator> getChildOperators() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public String toExplainString() {
+ StringBuilder stringBuilder = new
StringBuilder(EXPLAIN_NAME).append("(inclusionIndex:h3_index");
+ stringBuilder.append(",operator:").append(_predicate.getType());
+ stringBuilder.append(",predicate:").append(_predicate.toString());
+ return stringBuilder.append(')').toString();
+ }
+}
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
index c43c5b5912..716810e62f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/FilterPlanNode.java
@@ -38,6 +38,7 @@ import
org.apache.pinot.core.operator.filter.BitmapBasedFilterOperator;
import org.apache.pinot.core.operator.filter.EmptyFilterOperator;
import org.apache.pinot.core.operator.filter.ExpressionFilterOperator;
import org.apache.pinot.core.operator.filter.FilterOperatorUtils;
+import org.apache.pinot.core.operator.filter.H3InclusionIndexFilterOperator;
import org.apache.pinot.core.operator.filter.H3IndexFilterOperator;
import org.apache.pinot.core.operator.filter.JsonMatchFilterOperator;
import org.apache.pinot.core.operator.filter.MatchAllFilterOperator;
@@ -56,6 +57,7 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap;
public class FilterPlanNode implements PlanNode {
+
private final IndexSegment _indexSegment;
private final QueryContext _queryContext;
private final FilterContext _filter;
@@ -106,7 +108,7 @@ public class FilterPlanNode implements PlanNode {
}
/**
- * H3 index can be applied iff:
+ * H3 index can be applied on ST_Distance iff:
* <ul>
* <li>Predicate is of type RANGE</li>
* <li>Left-hand-side of the predicate is an ST_Distance function</li>
@@ -114,7 +116,7 @@ public class FilterPlanNode implements PlanNode {
* <li>The identifier column has H3 index</li>
* </ul>
*/
- private boolean canApplyH3Index(Predicate predicate, FunctionContext
function) {
+ private boolean canApplyH3IndexForDistanceCheck(Predicate predicate,
FunctionContext function) {
if (predicate.getType() != Predicate.Type.RANGE) {
return false;
}
@@ -139,6 +141,46 @@ public class FilterPlanNode implements PlanNode {
return columnName != null &&
_indexSegment.getDataSource(columnName).getH3Index() != null && findLiteral;
}
+ /**
+ * H3 index can be applied for inclusion check iff:
+ * <ul>
+ * <li>Predicate is of type EQ</li>
+ * <li>Left-hand-side of the predicate is an ST_Within or ST_Contains
function</li>
+ * <li>For ST_Within, the first argument is an identifier, the second
argument is literal</li>
+ * <li>For ST_Contains function the first argument is literal, the second
argument is an identifier</li>
+ * <li>The identifier column has H3 index</li>
+ * </ul>
+ */
+ private boolean canApplyH3IndexForInclusionCheck(Predicate predicate,
FunctionContext function) {
+ if (predicate.getType() != Predicate.Type.EQ) {
+ return false;
+ }
+ String functionName = function.getFunctionName();
+ if (!functionName.equals("stwithin") &&
!functionName.equals("stcontains")) {
+ return false;
+ }
+ List<ExpressionContext> arguments = function.getArguments();
+ if (arguments.size() != 2) {
+ throw new BadQueryRequestException("Expect 2 arguments for function: " +
functionName);
+ }
+ // TODO: handle nested geography/geometry conversion functions
+ if (functionName.equals("stwithin")) {
+ if (arguments.get(0).getType() == ExpressionContext.Type.IDENTIFIER
+ && arguments.get(1).getType() == ExpressionContext.Type.LITERAL) {
+ String columnName = arguments.get(0).getIdentifier();
+ return _indexSegment.getDataSource(columnName).getH3Index() != null;
+ }
+ return false;
+ } else {
+ if (arguments.get(1).getType() == ExpressionContext.Type.IDENTIFIER
+ && arguments.get(0).getType() == ExpressionContext.Type.LITERAL) {
+ String columnName = arguments.get(1).getIdentifier();
+ return _indexSegment.getDataSource(columnName).getH3Index() != null;
+ }
+ return false;
+ }
+ }
+
/**
* Helper method to build the operator tree from the filter.
*/
@@ -181,12 +223,15 @@ public class FilterPlanNode implements PlanNode {
Predicate predicate = filter.getPredicate();
ExpressionContext lhs = predicate.getLhs();
if (lhs.getType() == ExpressionContext.Type.FUNCTION) {
- if (canApplyH3Index(predicate, lhs.getFunction())) {
- return new H3IndexFilterOperator(_indexSegment, predicate,
numDocs);
+ if (canApplyH3IndexForDistanceCheck(predicate, lhs.getFunction())) {
+ return new H3IndexFilterOperator(_indexSegment, predicate,
numDocs);
+ } else if (canApplyH3IndexForInclusionCheck(predicate,
lhs.getFunction())) {
+ return new H3InclusionIndexFilterOperator(_indexSegment,
predicate, numDocs);
+ } else {
+ // TODO: ExpressionFilterOperator does not support predicate types
without PredicateEvaluator (IS_NULL,
+ // IS_NOT_NULL, TEXT_MATCH)
+ return new ExpressionFilterOperator(_indexSegment, predicate,
numDocs);
}
- // TODO: ExpressionFilterOperator does not support predicate types
without PredicateEvaluator (IS_NULL,
- // IS_NOT_NULL, TEXT_MATCH)
- return new ExpressionFilterOperator(_indexSegment, predicate,
numDocs);
} else {
String column = lhs.getIdentifier();
DataSource dataSource = _indexSegment.getDataSource(column);
diff --git
a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
index 3d39b31495..907f6b9ec6 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
@@ -18,6 +18,8 @@
*/
package org.apache.pinot.queries;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@@ -48,7 +50,6 @@ import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.locationtech.jts.geom.Coordinate;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -64,15 +65,21 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
private static final int NUM_RECORDS = 10000;
private static final String H3_INDEX_COLUMN = "h3Column";
+ private static final String H3_INDEX_GEOMETRY_COLUMN = "h3Column_geometry";
private static final String NON_H3_INDEX_COLUMN = "nonH3Column";
+ private static final String NON_H3_INDEX_GEOMETRY_COLUMN =
"nonH3Column_geometry";
private static final Schema SCHEMA =
new
Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME).addSingleValueDimension(H3_INDEX_COLUMN,
DataType.BYTES)
- .addSingleValueDimension(NON_H3_INDEX_COLUMN,
DataType.BYTES).build();
+ .addSingleValueDimension(NON_H3_INDEX_COLUMN, DataType.BYTES)
+ .addSingleValueDimension(H3_INDEX_GEOMETRY_COLUMN, DataType.BYTES)
+ .addSingleValueDimension(NON_H3_INDEX_GEOMETRY_COLUMN,
DataType.BYTES).build();
private static final Map<String, String> H3_INDEX_PROPERTIES =
Collections.singletonMap("resolutions", "5");
private static final TableConfig TABLE_CONFIG = new
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
- .setFieldConfigList(Collections.singletonList(
- new FieldConfig(H3_INDEX_COLUMN,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null,
- H3_INDEX_PROPERTIES))).build();
+ .setFieldConfigList(ImmutableList
+ .of(new FieldConfig(H3_INDEX_COLUMN,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3, null,
+ H3_INDEX_PROPERTIES),
+ new FieldConfig(H3_INDEX_GEOMETRY_COLUMN,
FieldConfig.EncodingType.DICTIONARY, FieldConfig.IndexType.H3,
+ null, H3_INDEX_PROPERTIES))).build();
private IndexSegment _indexSegment;
@@ -91,23 +98,10 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
throw new UnsupportedOperationException();
}
- @BeforeClass
- public void setUp()
+ public void setUp(List<GenericRow> records)
throws Exception {
FileUtils.deleteDirectory(INDEX_DIR);
- List<GenericRow> records = new ArrayList<>(NUM_RECORDS);
- for (int i = 0; i < NUM_RECORDS; i++) {
- double longitude = -122.5 + RANDOM.nextDouble();
- double latitude = 37 + RANDOM.nextDouble();
- byte[] value = GeometrySerializer
- .serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new
Coordinate(longitude, latitude)));
- GenericRow record = new GenericRow();
- record.putValue(H3_INDEX_COLUMN, value);
- record.putValue(NON_H3_INDEX_COLUMN, value);
- records.add(record);
- }
-
SegmentGeneratorConfig segmentGeneratorConfig = new
SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA);
segmentGeneratorConfig.setTableName(RAW_TABLE_NAME);
segmentGeneratorConfig.setSegmentName(SEGMENT_NAME);
@@ -118,14 +112,36 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
driver.build();
IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
- indexLoadingConfig
- .setH3IndexConfigs(Collections.singletonMap(H3_INDEX_COLUMN, new
H3IndexConfig(H3_INDEX_PROPERTIES)));
+ indexLoadingConfig.setH3IndexConfigs(ImmutableMap
+ .of(H3_INDEX_COLUMN, new H3IndexConfig(H3_INDEX_PROPERTIES),
H3_INDEX_GEOMETRY_COLUMN,
+ new H3IndexConfig(H3_INDEX_PROPERTIES)));
_indexSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR,
SEGMENT_NAME), indexLoadingConfig);
}
+ private void addRecord(List<GenericRow> records, double longitude, double
latitude) {
+ byte[] value =
+
GeometrySerializer.serialize(GeometryUtils.GEOGRAPHY_FACTORY.createPoint(new
Coordinate(longitude, latitude)));
+ byte[] geometryValue =
+
GeometrySerializer.serialize(GeometryUtils.GEOMETRY_FACTORY.createPoint(new
Coordinate(longitude, latitude)));
+ GenericRow record = new GenericRow();
+ record.putValue(H3_INDEX_COLUMN, value);
+ record.putValue(NON_H3_INDEX_COLUMN, value);
+ record.putValue(H3_INDEX_GEOMETRY_COLUMN, geometryValue);
+ record.putValue(NON_H3_INDEX_GEOMETRY_COLUMN, geometryValue);
+ records.add(record);
+ }
+
@Test
public void testH3Index()
- throws IOException {
+ throws Exception {
+ List<GenericRow> records = new ArrayList<>(NUM_RECORDS);
+ for (int i = 0; i < NUM_RECORDS; i++) {
+ double longitude = -122.5 + RANDOM.nextDouble();
+ double latitude = 37 + RANDOM.nextDouble();
+ addRecord(records, longitude, latitude);
+ }
+ setUp(records);
+
// Invalid upper bound
{
for (String query : Arrays
@@ -210,11 +226,177 @@ public class H3IndexQueriesTest extends BaseQueriesTest {
Assert.assertNotNull(aggregationResult);
Assert.assertEquals((long) aggregationResult.get(0), NUM_RECORDS);
}
+
+ {
+ // Test st contains in polygon
+ testQueryStContain("SELECT COUNT(*) FROM testTable WHERE
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))'), %s) = 1");
+
+ // negative test
+ testQueryStContain("SELECT COUNT(*) FROM testTable WHERE
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))'), %s) = 0");
+ }
+ {
+ // Test st contains in polygon, doesn't have
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ + " 122.0008564 -37.5004316, \n"
+ + " 121.9991291 -37.5005168, \n"
+ + " 121.9990325 -37.4995294, \n"
+ + " 122.0001268 -37.4993506, \n"
+ + " 122.0008564 -37.5004316))'), h3Column_geometry) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+ // Expect 0 entries scanned in filter
+ QueriesTestUtils
+
.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
0, 0, 0,
+ NUM_RECORDS);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 0);
+ }
+
+ {
+ // Test st within in polygon
+ testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s,
ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))')) = 1");
+
+ // negative test
+ testQueryStContain("SELECT COUNT(*) FROM testTable WHERE ST_Within(%s,
ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))')) = 0");
+ }
+ {
+ // Test st within in polygon, doesn't have
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+ + " 122.0008564 -37.5004316, \n"
+ + " 121.9991291 -37.5005168, \n"
+ + " 121.9990325 -37.4995294, \n"
+ + " 122.0001268 -37.4993506, \n"
+ + " 122.0008564 -37.5004316))')) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+ // Expect 0 entries scanned in filter
+ QueriesTestUtils
+
.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
0, 0, 0,
+ NUM_RECORDS);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 0);
+ }
+ }
+
+ @Test
+ public void stContainPointVeryCloseToBorderTest()
+ throws Exception {
+ List<GenericRow> records = new ArrayList<>(1);
+ addRecord(records, -122.0008081, 37.5004231);
+ setUp(records);
+ // Test point is closed to border of a polygon but inside.
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))'), h3Column_geometry) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
1, 1, 0, 1);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 1);
+ }
+
+ @Test
+ public void stWithinPointVeryCloseToBorderTest()
+ throws Exception {
+ List<GenericRow> records = new ArrayList<>(1);
+ addRecord(records, -122.0008081, 37.5004231);
+ setUp(records);
+ // Test point is closed to border of a polygon but inside.
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))')) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
1, 1, 0, 1);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 1);
+ }
+
+ @Test
+ public void stContainPointVeryCloseToBorderButOutsideTest()
+ throws Exception {
+ List<GenericRow> records = new ArrayList<>(1);
+ addRecord(records, -122.0007277, 37.5005785);
+ setUp(records);
+ // Test point is closed to border of a polygon but outside.
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Contains(ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))'), h3Column_geometry) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
0, 1, 0, 1);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 0);
+ }
+
+ @Test
+ public void stWithinPointVeryCloseToBorderButOutsideTest()
+ throws Exception {
+ List<GenericRow> records = new ArrayList<>(1);
+ addRecord(records, -122.0007277, 37.5005785);
+ setUp(records);
+ // Test point is closed to border of a polygon but outside.
+ String query = "SELECT COUNT(*) FROM testTable WHERE
ST_Within(h3Column_geometry, ST_GeomFromText('POLYGON ((\n"
+ + " -122.0008564 37.5004316, \n"
+ + " -121.9991291 37.5005168, \n"
+ + " -121.9990325 37.4995294, \n"
+ + " -122.0001268 37.4993506, \n"
+ + " -122.0008564 37.5004316))')) = 1";
+ AggregationOperator aggregationOperator = getOperator(query);
+ IntermediateResultsBlock resultsBlock = aggregationOperator.nextBlock();
+
QueriesTestUtils.testInnerSegmentExecutionStatistics(aggregationOperator.getExecutionStatistics(),
0, 1, 0, 1);
+ List<Object> aggregationResult = resultsBlock.getAggregationResult();
+ Assert.assertNotNull(aggregationResult);
+ Assert.assertEquals((long) aggregationResult.get(0), 0);
}
private void testQuery(String queryTemplate) {
String h3IndexQuery = String.format(queryTemplate, H3_INDEX_COLUMN);
String nonH3IndexQuery = String.format(queryTemplate, NON_H3_INDEX_COLUMN);
+ validateQueryResult(h3IndexQuery, nonH3IndexQuery);
+ }
+
+ private void testQueryStContain(String queryTemplate) {
+ String h3IndexQuery = String.format(queryTemplate,
H3_INDEX_GEOMETRY_COLUMN);
+ String nonH3IndexQuery = String.format(queryTemplate,
NON_H3_INDEX_GEOMETRY_COLUMN);
+ validateQueryResult(h3IndexQuery, nonH3IndexQuery);
+ }
+
+ private void validateQueryResult(String h3IndexQuery, String
nonH3IndexQuery) {
AggregationOperator h3IndexOperator = getOperator(h3IndexQuery);
AggregationOperator nonH3IndexOperator = getOperator(nonH3IndexQuery);
IntermediateResultsBlock h3IndexResultsBlock = h3IndexOperator.nextBlock();
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
index 165304e0a2..e971d07b8d 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/H3Utils.java
@@ -19,10 +19,28 @@
package org.apache.pinot.segment.local.utils;
import com.uber.h3core.H3Core;
+import com.uber.h3core.exceptions.LineUndefinedException;
+import com.uber.h3core.util.GeoCoord;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongSet;
import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.tuple.Pair;
+import org.locationtech.jts.geom.Coordinate;
+import org.locationtech.jts.geom.Geometry;
+import org.locationtech.jts.geom.GeometryCollection;
+import org.locationtech.jts.geom.LineString;
+import org.locationtech.jts.geom.Point;
+import org.locationtech.jts.geom.Polygon;
public class H3Utils {
+
private H3Utils() {
}
@@ -35,4 +53,78 @@ public class H3Utils {
throw new RuntimeException("Failed to instantiate H3 instance", e);
}
}
+
+ private static LongSet coverLineInH3(LineString lineString, int resolution) {
+ LongSet coveringH3Cells = new LongOpenHashSet();
+ LongList endpointH3Cells = new LongArrayList();
+ for (Coordinate endpoint : lineString.getCoordinates()) {
+ endpointH3Cells.add(H3_CORE.geoToH3(endpoint.y, endpoint.x, resolution));
+ }
+ for (int i = 0; i < endpointH3Cells.size() - 1; i++) {
+ try {
+ coveringH3Cells.addAll(H3_CORE.h3Line(endpointH3Cells.getLong(i),
endpointH3Cells.getLong(i + 1)));
+ } catch (LineUndefinedException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return coveringH3Cells;
+ }
+
+ private static Pair<LongSet, LongSet> coverPolygonInH3(Polygon polygon, int
resolution) {
+ List<Long> polyfillCells =
H3_CORE.polyfill(Arrays.stream(polygon.getExteriorRing().getCoordinates())
+ .map(coordinate -> new GeoCoord(coordinate.y,
coordinate.x)).collect(Collectors.toList()),
+ Collections.emptyList(), resolution);
+ // TODO: this can be further optimized to use native H3 implementation.
They have plan to support natively.
+ // https://github.com/apache/pinot/issues/8547
+ LongSet potentialH3Cells = new LongOpenHashSet();
+ if (polyfillCells.isEmpty()) {
+ // If the polyfill cells are empty, meaning the polygon might be smaller
than a single cell in the H3 system.
+ // So just get whatever one. here choose the first one. the follow up
kRing(firstCell, 1) will cover the whole
+ // polygon if there is potential not covered by the first point's
belonging cell.
+ // ref: https://github.com/uber/h3/issues/456#issuecomment-827760163
+ Coordinate represent = polygon.getCoordinate();
+ potentialH3Cells.add(H3_CORE.geoToH3(represent.getY(), represent.getX(),
resolution));
+ } else {
+ potentialH3Cells.addAll(polyfillCells);
+ }
+ potentialH3Cells
+ .addAll(potentialH3Cells.stream().flatMap(cell -> H3_CORE.kRing(cell,
1).stream()).collect(Collectors.toSet()));
+ LongSet fullyContainedCell = new LongOpenHashSet(
+ potentialH3Cells.stream().filter(h3Cell ->
polygon.contains(createPolygonFromH3Cell(h3Cell)))
+ .collect(Collectors.toSet()));
+ return Pair.of(fullyContainedCell, potentialH3Cells);
+ }
+
+ private static Polygon createPolygonFromH3Cell(long h3Cell) {
+ List<GeoCoord> boundary = H3_CORE.h3ToGeoBoundary(h3Cell);
+ boundary.add(boundary.get(0));
+ return GeometryUtils.GEOMETRY_FACTORY.createPolygon(
+ boundary.stream().map(geoCoord -> new Coordinate(geoCoord.lng,
geoCoord.lat)).toArray(Coordinate[]::new));
+ }
+
+ // Return a pair of cell ids: The first fully contain, the second is
potential contain.
+ // potential contains contain the fully contain.
+ public static Pair<LongSet, LongSet> coverGeometryInH3(Geometry geometry,
int resolution) {
+ if (geometry instanceof Point) {
+ LongSet potentialCover = new LongOpenHashSet();
+ potentialCover.add(H3_CORE.geoToH3(geometry.getCoordinate().y,
geometry.getCoordinate().x, resolution));
+ return Pair.of(new LongOpenHashSet(), potentialCover);
+ } else if (geometry instanceof LineString) {
+ LongSet potentialCover = new LongOpenHashSet();
+ potentialCover.addAll(coverLineInH3(((LineString) geometry),
resolution));
+ return Pair.of(new LongOpenHashSet(), potentialCover);
+ } else if (geometry instanceof Polygon) {
+ return coverPolygonInH3(((Polygon) geometry), resolution);
+ } else if (geometry instanceof GeometryCollection) {
+ LongOpenHashSet fullCover = new LongOpenHashSet();
+ LongOpenHashSet potentialCover = new LongOpenHashSet();
+ for (int i = 0; i < geometry.getNumGeometries(); i++) {
+ fullCover.addAll(coverGeometryInH3(geometry.getGeometryN(i),
resolution).getLeft());
+ potentialCover.addAll(coverGeometryInH3(geometry.getGeometryN(i),
resolution).getRight());
+ }
+ return Pair.of(fullCover, potentialCover);
+ } else {
+ throw new UnsupportedOperationException("Unexpected type: " +
geometry.getGeometryType());
+ }
+ }
}
diff --git a/pom.xml b/pom.xml
index 647dd5f049..81440daec1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -151,7 +151,7 @@
<netty.version>4.1.74.Final</netty.version>
<reactivestreams.version>1.0.3</reactivestreams.version>
<jts.version>1.16.1</jts.version>
- <h3.version>3.0.3</h3.version>
+ <h3.version>3.7.0</h3.version>
<jmh.version>1.26</jmh.version>
<audienceannotations.version>0.13.0</audienceannotations.version>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]