Jackie-Jiang commented on a change in pull request #7916:
URL: https://github.com/apache/pinot/pull/7916#discussion_r787207706
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/ProjectionBlock.java
##########
@@ -52,7 +52,7 @@ public BlockValSet getBlockValueSet(String column) {
@Override
public BlockDocIdSet getBlockDocIdSet() {
- throw new UnsupportedOperationException();
+ return null;
Review comment:
Revert this since it is no longer relevant
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
}
return true;
}
+
+ /**
+ * Build a FilteredAggregationOperator given the parameters.
+ * @param mainPredicateFilterOperator Filter operator corresponding to the
main predicate
+ * @param mainTransformOperator Transform operator corresponding to the main
predicate
+ * @param aggregationFunctions Aggregation functions in the query
+ * @param numTotalDocs Number of total docs
+ */
+ private BaseOperator<IntermediateResultsBlock>
buildOperatorForFilteredAggregations(
+ BaseFilterOperator mainPredicateFilterOperator,
Review comment:
(code format) Can you apply the latest [code
format](https://docs.pinot.apache.org/developers/developers-and-contributors/code-setup#intellij)
and reformat this file? Several places does not follow the code format. Also,
can we reduce some empty lines in this method?
##########
File path:
pinot-core/src/test/java/org/apache/pinot/queries/FilteredAggregationsTest.java
##########
@@ -0,0 +1,512 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.queries;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.response.broker.BrokerResponseNative;
+import org.apache.pinot.common.response.broker.ResultTable;
+import org.apache.pinot.common.utils.DataSchema;
+import
org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
+import
org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl;
+import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
+import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader;
+import org.apache.pinot.segment.spi.ImmutableSegment;
+import org.apache.pinot.segment.spi.IndexSegment;
+import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.data.readers.RecordReader;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+public class FilteredAggregationsTest extends BaseQueriesTest {
+ private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(),
"FilteredAggregationsTest");
+ private static final String TABLE_NAME = "MyTable";
+ private static final String FIRST_SEGMENT_NAME = "firstTestSegment";
+ private static final String SECOND_SEGMENT_NAME = "secondTestSegment";
+ private static final String INT_COL_NAME = "INT_COL";
+ private static final String NO_INDEX_INT_COL_NAME = "NO_INDEX_COL";
+ private static final String STATIC_INT_COL_NAME = "STATIC_INT_COL";
+ private static final Integer INT_BASE_VALUE = 0;
+ private static final Integer NUM_ROWS = 30000;
+
+
+ private IndexSegment _indexSegment;
+ private List<IndexSegment> _indexSegments;
+
+ @Override
+ protected String getFilter() {
+ return "";
+ }
+
+ @Override
+ protected IndexSegment getIndexSegment() {
+ return _indexSegment;
+ }
+
+ @Override
+ protected List<IndexSegment> getIndexSegments() {
+ return _indexSegments;
+ }
+
+ @BeforeClass
+ public void setUp()
+ throws Exception {
+ FileUtils.deleteQuietly(INDEX_DIR);
+
+ buildSegment(FIRST_SEGMENT_NAME);
+ buildSegment(SECOND_SEGMENT_NAME);
+ IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig();
+
+ Set<String> invertedIndexCols = new HashSet<>();
+ invertedIndexCols.add(INT_COL_NAME);
+
+ indexLoadingConfig.setInvertedIndexColumns(invertedIndexCols);
+ ImmutableSegment firstImmutableSegment =
+ ImmutableSegmentLoader.load(new File(INDEX_DIR, FIRST_SEGMENT_NAME),
indexLoadingConfig);
+ ImmutableSegment secondImmutableSegment =
+ ImmutableSegmentLoader.load(new File(INDEX_DIR, SECOND_SEGMENT_NAME),
indexLoadingConfig);
+ _indexSegment = firstImmutableSegment;
+ _indexSegments = Arrays.asList(firstImmutableSegment,
secondImmutableSegment);
+ }
+
+ @AfterClass
+ public void tearDown() {
+ _indexSegment.destroy();
+ FileUtils.deleteQuietly(INDEX_DIR);
+ }
+
+ private List<GenericRow> createTestData(int numRows) {
+ List<GenericRow> rows = new ArrayList<>();
+
+ for (int i = 0; i < numRows; i++) {
+ GenericRow row = new GenericRow();
+ row.putField(INT_COL_NAME, INT_BASE_VALUE + i);
+ row.putField(NO_INDEX_INT_COL_NAME, i);
+ row.putField(STATIC_INT_COL_NAME, 10);
+
+ rows.add(row);
+ }
+ return rows;
+ }
+
+ private void buildSegment(String segmentName)
+ throws Exception {
+ List<GenericRow> rows = createTestData(NUM_ROWS);
+ List<FieldConfig> fieldConfigs = new ArrayList<>();
+
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+
.setInvertedIndexColumns(Arrays.asList(INT_COL_NAME)).setFieldConfigList(fieldConfigs).build();
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension(NO_INDEX_INT_COL_NAME, FieldSpec.DataType.INT)
+ .addSingleValueDimension(STATIC_INT_COL_NAME, FieldSpec.DataType.INT)
+ .addMetric(INT_COL_NAME, FieldSpec.DataType.INT).build();
+ SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig,
schema);
+ config.setOutDir(INDEX_DIR.getPath());
+ config.setTableName(TABLE_NAME);
+ config.setSegmentName(segmentName);
+
+ SegmentIndexCreationDriverImpl driver = new
SegmentIndexCreationDriverImpl();
+ try (RecordReader recordReader = new GenericRowRecordReader(rows)) {
+ driver.init(config, recordReader);
+ driver.build();
+ }
+ }
+
+ private void testInterSegmentAggregationQueryHelper(String firstQuery,
String secondQuery) {
+ // SQL
+ BrokerResponseNative firstBrokerResponseNative =
getBrokerResponseForSqlQuery(firstQuery);
+ BrokerResponseNative secondBrokerResponseNative =
getBrokerResponseForSqlQuery(secondQuery);
+ ResultTable firstResultTable = firstBrokerResponseNative.getResultTable();
+ ResultTable secondResultTable =
secondBrokerResponseNative.getResultTable();
+ DataSchema firstDataSchema = firstResultTable.getDataSchema();
+ DataSchema secondDataSchema = secondResultTable.getDataSchema();
+
+ Assert.assertEquals(firstDataSchema.size(), secondDataSchema.size());
+
+ List<Object[]> firstSetOfRows = firstResultTable.getRows();
+ List<Object[]> secondSetOfRows = secondResultTable.getRows();
+
+ Assert.assertEquals(firstSetOfRows.size(), secondSetOfRows.size());
+
+ for (int i = 0; i < firstSetOfRows.size(); i++) {
+ Object[] firstSetRow = firstSetOfRows.get(i);
+ Object[] secondSetRow = secondSetOfRows.get(i);
+
+ Assert.assertEquals(firstSetRow.length, secondSetRow.length);
+
+ for (int j = 0; j < firstSetRow.length; j++) {
+ //System.out.println("FIRST " + firstSetRow[j] + " SECOND " +
secondSetRow[j] + " j " + j);
+ Assert.assertEquals(firstSetRow[j], secondSetRow[j]);
+ }
+ }
+ }
+
+ @Test
+ public void testInterSegment() {
+
+ String query =
+ "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 9999)"
+ + "FROM MyTable WHERE INT_COL < 1000000";
+
+ String nonFilterQuery =
+ "SELECT SUM(INT_COL)"
+ + "FROM MyTable WHERE INT_COL > 9999 AND INT_COL < 1000000";
+
+ testInterSegmentAggregationQueryHelper(query, nonFilterQuery);
+
+ query = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 1234 AND INT_COL <
22000)"
+ + "FROM MyTable";
+
+ nonFilterQuery = "SELECT SUM("
Review comment:
(code style) Suggest reformatting the queries in this test to be more
compact
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -62,57 +69,25 @@ public AggregationPlanNode(IndexSegment indexSegment,
QueryContext queryContext)
public Operator<IntermediateResultsBlock> run() {
assert _queryContext.getAggregationFunctions() != null;
- int numTotalDocs = _indexSegment.getSegmentMetadata().getTotalDocs();
- AggregationFunction[] aggregationFunctions =
_queryContext.getAggregationFunctions();
+ boolean hasFilteredPredicates = _queryContext.isHasFilteredAggregations();
- FilterPlanNode filterPlanNode = new FilterPlanNode(_indexSegment,
_queryContext);
- BaseFilterOperator filterOperator = filterPlanNode.run();
+ Pair<FilterPlanNode, BaseFilterOperator> filterOperatorPair =
Review comment:
What I meant is that we can branch these 2 cases earlier because the
optimizations for regular aggregation don't apply to filtered aggregation (e.g.
extra check on line 246, also star-tree should not be used for filtered
aggregation which is not checked properly in the current code). It is more
readable if we totally split these 2 cases:
```
if (hasFilteredPredicates) {
return buildOperatorForFilteredAggregations();
} else {
return buildOperatorForNonFilteredAggregations();
}
```
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/operator/blocks/TransformBlock.java
##########
@@ -43,6 +43,11 @@ public TransformBlock(ProjectionBlock projectionBlock,
_transformFunctionMap = transformFunctionMap;
}
+ protected TransformBlock(TransformBlock transformBlock) {
Review comment:
Revert this file
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
##########
@@ -90,9 +92,11 @@
// Pre-calculate the aggregation functions and columns for the query so that
it can be shared across all the segments
private AggregationFunction[] _aggregationFunctions;
- private List<Pair<AggregationFunction, FilterContext>>
_filteredAggregationFunctions;
+
Review comment:
I see your point, but my concern is that aggregation function and filter
are logically two independent concept, and embedding filter into an aggregation
function could cause confusion. If we need to associate some extra attributes
to an aggregation function, I'd suggest adding a wrapper class instead of
implementing a special `AggregationFunction`.
Based on the current implementation, I feel `Pair` itself should be enough
(only need to associate the `FilterContext` with the `AggregationFunction`).
To maintain the order of the aggregations, we may add pairs with `null`
`FilterContext`
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/query/request/context/QueryContext.java
##########
@@ -441,34 +471,54 @@ public QueryContext build() {
*/
private void generateAggregationFunctions(QueryContext queryContext) {
List<AggregationFunction> aggregationFunctions = new ArrayList<>();
- List<Pair<AggregationFunction, FilterContext>>
filteredAggregationFunctions = new ArrayList<>();
Map<FunctionContext, Integer> aggregationFunctionIndexMap = new
HashMap<>();
+ Map<Pair<FunctionContext, FilterContext>, Integer>
filterExpressionIndexMap = new HashMap<>();
// Add aggregation functions in the SELECT clause
// NOTE: DO NOT deduplicate the aggregation functions in the SELECT
clause because that involves protocol change.
- List<FunctionContext> aggregationsInSelect = new ArrayList<>();
- List<Pair<FunctionContext, FilterContext>> filteredAggregations = new
ArrayList<>();
+ List<Pair<Pair<FilterContext, ExpressionContext>, FunctionContext>>
aggregationsInSelect = new ArrayList<>();
for (ExpressionContext selectExpression :
queryContext._selectExpressions) {
- getAggregations(selectExpression, aggregationsInSelect,
filteredAggregations);
+ getAggregations(selectExpression, aggregationsInSelect);
}
- for (FunctionContext function : aggregationsInSelect) {
+ for (Pair<Pair<FilterContext, ExpressionContext>, FunctionContext> pair
: aggregationsInSelect) {
+ FunctionContext function = pair.getRight();
int functionIndex = aggregationFunctions.size();
AggregationFunction aggregationFunction =
AggregationFunctionFactory.getAggregationFunction(function,
queryContext);
+
+ // Hack: If the left pair is not null, implies a filtered aggregation
Review comment:
Revise this comment? We should not have hack in production code
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
}
return true;
}
+
+ /**
+ * Build a FilteredAggregationOperator given the parameters.
+ * @param mainPredicateFilterOperator Filter operator corresponding to the
main predicate
+ * @param mainTransformOperator Transform operator corresponding to the main
predicate
+ * @param aggregationFunctions Aggregation functions in the query
+ * @param numTotalDocs Number of total docs
+ */
+ private BaseOperator<IntermediateResultsBlock>
buildOperatorForFilteredAggregations(
+ BaseFilterOperator mainPredicateFilterOperator,
+ TransformOperator mainTransformOperator,
+ AggregationFunction[] aggregationFunctions, int numTotalDocs) {
+ Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>>
expressionContextToAggFuncsMap =
+ new HashMap<>();
+ List<AggregationFunction> nonFilteredAggregationFunctions = new
ArrayList<>();
+
+ // For each aggregation function, check if the aggregation function is a
filtered agg.
+ // If it is, populate the corresponding filter operator and corresponding
transform operator
+ for (AggregationFunction aggregationFunction : aggregationFunctions) {
+ if (aggregationFunction instanceof FilterableAggregationFunction) {
+ FilterableAggregationFunction filterableAggregationFunction =
+ (FilterableAggregationFunction) aggregationFunction;
+
+ ExpressionContext currentFilterExpression =
filterableAggregationFunction
Review comment:
The `currentFilterExpression` seems redundant. You may directly use
`filterContext` as the key
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/FilterableAggregationFunction.java
##########
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.FilterContext;
+import org.apache.pinot.common.utils.DataSchema;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+
+/**
+ * Represents a filtered aggregation
+ */
+public class FilterableAggregationFunction implements
+ AggregationFunction<Object,
Comparable> {
+ private AggregationFunction<Object, Comparable> _innerAggregationFunction;
+ private ExpressionContext _associatedExpressionContext;
+ private FilterContext _filterContext;
+
+ public FilterableAggregationFunction(AggregationFunction aggregationFunction,
+ ExpressionContext associatedExpressionContext, FilterContext
filterContext) {
+ _innerAggregationFunction = aggregationFunction;
+ _associatedExpressionContext = associatedExpressionContext;
+ _filterContext = filterContext;
+ }
+
+ @Override
+ public AggregationFunctionType getType() {
+ return _innerAggregationFunction.getType();
+ }
+
+ @Override
+ public String getColumnName() {
+ return _innerAggregationFunction.getColumnName();
+ }
+
+ @Override
+ public String getResultColumnName() {
+ return _innerAggregationFunction.getResultColumnName();
+ }
+
+ @Override
+ public List<ExpressionContext> getInputExpressions() {
+ return _innerAggregationFunction.getInputExpressions();
+ }
+
+ @Override
+ public AggregationResultHolder createAggregationResultHolder() {
+ return _innerAggregationFunction.createAggregationResultHolder();
+ }
+
+ @Override
+ public GroupByResultHolder createGroupByResultHolder(int initialCapacity,
int maxCapacity) {
+ return
_innerAggregationFunction.createGroupByResultHolder(initialCapacity,
maxCapacity);
+ }
+
+ @Override
+ public void aggregate(int length, AggregationResultHolder
aggregationResultHolder,
+ Map<ExpressionContext, BlockValSet> blockValSetMap) {
+ _innerAggregationFunction.aggregate(length, aggregationResultHolder,
blockValSetMap);
+ }
+
+ @Override
+ public void aggregateGroupBySV(int length, int[] groupKeyArray,
GroupByResultHolder groupByResultHolder,
+ Map<ExpressionContext, BlockValSet> blockValSetMap) {
+ _innerAggregationFunction.aggregateGroupBySV(length, groupKeyArray,
groupByResultHolder,
+ blockValSetMap);
+ }
+
+ @Override
+ public void aggregateGroupByMV(int length, int[][] groupKeysArray,
GroupByResultHolder groupByResultHolder,
+ Map<ExpressionContext, BlockValSet> blockValSetMap) {
+ _innerAggregationFunction.aggregateGroupByMV(length, groupKeysArray,
groupByResultHolder,
+ blockValSetMap);
+ }
+
+ @Override
+ public Object extractAggregationResult(AggregationResultHolder
aggregationResultHolder) {
+ return
_innerAggregationFunction.extractAggregationResult(aggregationResultHolder);
+ }
+
+ @Override
+ public Object extractGroupByResult(GroupByResultHolder groupByResultHolder,
int groupKey) {
+ return _innerAggregationFunction.extractGroupByResult(groupByResultHolder,
groupKey);
+ }
+
+ @Override
+ public Object merge(Object intermediateResult1, Object intermediateResult2) {
+ return _innerAggregationFunction.merge(intermediateResult1,
intermediateResult2);
+ }
+
+ @Override
+ public DataSchema.ColumnDataType getIntermediateResultColumnType() {
+ return _innerAggregationFunction.getIntermediateResultColumnType();
+ }
+
+ @Override
+ public DataSchema.ColumnDataType getFinalResultColumnType() {
+ return _innerAggregationFunction.getFinalResultColumnType();
+ }
+
+ @Override
+ public Comparable extractFinalResult(Object o) {
+ return _innerAggregationFunction.extractFinalResult(o);
+ }
+
+ @Override
+ public String toExplainString() {
+ return null;
Review comment:
^^
##########
File path:
pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java
##########
@@ -154,4 +129,168 @@ private static boolean
isFitForDictionaryBasedPlan(AggregationFunction[] aggrega
}
return true;
}
+
+ /**
+ * Build a FilteredAggregationOperator given the parameters.
+ * @param mainPredicateFilterOperator Filter operator corresponding to the
main predicate
+ * @param mainTransformOperator Transform operator corresponding to the main
predicate
+ * @param aggregationFunctions Aggregation functions in the query
+ * @param numTotalDocs Number of total docs
+ */
+ private BaseOperator<IntermediateResultsBlock>
buildOperatorForFilteredAggregations(
+ BaseFilterOperator mainPredicateFilterOperator,
+ TransformOperator mainTransformOperator,
+ AggregationFunction[] aggregationFunctions, int numTotalDocs) {
+ Map<ExpressionContext, Pair<List<AggregationFunction>, TransformOperator>>
expressionContextToAggFuncsMap =
+ new HashMap<>();
+ List<AggregationFunction> nonFilteredAggregationFunctions = new
ArrayList<>();
+
+ // For each aggregation function, check if the aggregation function is a
filtered agg.
+ // If it is, populate the corresponding filter operator and corresponding
transform operator
+ for (AggregationFunction aggregationFunction : aggregationFunctions) {
+ if (aggregationFunction instanceof FilterableAggregationFunction) {
+ FilterableAggregationFunction filterableAggregationFunction =
+ (FilterableAggregationFunction) aggregationFunction;
+
+ ExpressionContext currentFilterExpression =
filterableAggregationFunction
+ .getAssociatedExpressionContext();
+
+ if (expressionContextToAggFuncsMap.get(currentFilterExpression) !=
null) {
+
expressionContextToAggFuncsMap.get(currentFilterExpression).getLeft().add(aggregationFunction);
Review comment:
(Major) I think the `TransformOperator` cannot be shared among multiple
aggregations. Once it iterates over a block, it won't process the same block
again for the next aggregation. Let's add some test queries with multiple
aggregations on the same filter.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]