This is an automated email from the ASF dual-hosted git repository. tingchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new dd8be2a229 add TextMatchFilterOptimizer to maximally push down `text_match` filters to Lucene (#12339) dd8be2a229 is described below commit dd8be2a2298a3cdc091e342004e6eaca9d399ed9 Author: Christopher Peck <27231838+itschrisp...@users.noreply.github.com> AuthorDate: Wed Jan 31 13:40:37 2024 -0800 add TextMatchFilterOptimizer to maximally push down `text_match` filters to Lucene (#12339) * add TextMatchFilterOptimizer * fix equivalence for all not --- .../pinot/core/query/optimizer/QueryOptimizer.java | 8 +- .../optimizer/filter/TextMatchFilterOptimizer.java | 186 +++++++++++++++++++++ .../core/query/optimizer/QueryOptimizerTest.java | 55 ++++++ 3 files changed, 246 insertions(+), 3 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/QueryOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/QueryOptimizer.java index 0b98cdb88f..dbd9b5c70c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/QueryOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/QueryOptimizer.java @@ -30,6 +30,7 @@ import org.apache.pinot.core.query.optimizer.filter.IdenticalPredicateFilterOpti import org.apache.pinot.core.query.optimizer.filter.MergeEqInFilterOptimizer; import org.apache.pinot.core.query.optimizer.filter.MergeRangeFilterOptimizer; import org.apache.pinot.core.query.optimizer.filter.NumericalFilterOptimizer; +import org.apache.pinot.core.query.optimizer.filter.TextMatchFilterOptimizer; import org.apache.pinot.core.query.optimizer.filter.TimePredicateFilterOptimizer; import org.apache.pinot.core.query.optimizer.statement.StatementOptimizer; import org.apache.pinot.core.query.optimizer.statement.StringPredicateFilterOptimizer; @@ -39,14 +40,15 @@ import org.apache.pinot.spi.data.Schema; public class QueryOptimizer { // DO NOT change the order of these optimizers. - // - MergeEqInFilterOptimizer and MergeRangeFilterOptimizer relies on FlattenAndOrFilterOptimizer to flatten the - // AND/OR predicate so that the children are on the same level to be merged + // - MergeEqInFilterOptimizer, MergeRangeFilterOptimizer, and TextMatchFilterOptimizer each rely on + // FlattenAndOrFilterOptimizer to flatten the AND/OR predicate so that the children are on the same level to + // be merged // - TimePredicateFilterOptimizer and MergeRangeFilterOptimizer relies on NumericalFilterOptimizer to convert the // values to the proper format so that they can be properly parsed private static final List<FilterOptimizer> FILTER_OPTIMIZERS = Arrays.asList(new FlattenAndOrFilterOptimizer(), new IdenticalPredicateFilterOptimizer(), new MergeEqInFilterOptimizer(), new NumericalFilterOptimizer(), new TimePredicateFilterOptimizer(), - new MergeRangeFilterOptimizer()); + new MergeRangeFilterOptimizer(), new TextMatchFilterOptimizer()); private static final List<StatementOptimizer> STATEMENT_OPTIMIZERS = Collections.singletonList(new StringPredicateFilterOptimizer()); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java new file mode 100644 index 0000000000..aca4e2d5cc --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.optimizer.filter; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; +import org.apache.pinot.common.request.Expression; +import org.apache.pinot.common.request.ExpressionType; +import org.apache.pinot.common.request.Function; +import org.apache.pinot.common.utils.request.RequestUtils; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.sql.FilterKind; + + +/** + * The {@code TextMatchFilterOptimizer} merges `TEXT_MATCH` predicates on the same column within an `OR` or `AND`, + * maximizing the amount of the query that can be pushed down to Lucene + * + * NOTE: This optimizer follows the {@link FlattenAndOrFilterOptimizer}, so all the AND/OR filters are already + * flattened. + */ +public class TextMatchFilterOptimizer implements FilterOptimizer { + private static final String SPACE = " "; + + @Override + public Expression optimize(Expression filterExpression, @Nullable Schema schema) { + return filterExpression.getType() == ExpressionType.FUNCTION ? optimize(filterExpression) : filterExpression; + } + + private Expression optimize(Expression filterExpression) { + Function function = filterExpression.getFunctionCall(); + if (function == null) { + return filterExpression; + } + + // no optimization can be performed unless the function is an OR, AND, or NOT + String operator = function.getOperator(); + if (!operator.equals(FilterKind.OR.name()) && !operator.equals(FilterKind.AND.name()) && !operator.equals( + FilterKind.NOT.name())) { + return filterExpression; + } + + List<Expression> children = function.getOperands(); + children.replaceAll(this::optimize); + + List<Expression> newChildren = new ArrayList<>(); + Map<Expression, List<Expression>> textMatchMap = new HashMap<>(); + boolean recreateFilter = false; + + // iterate over all child expressions to collect TEXT_MATCH filters for each identifier + for (Expression child : children) { + Function childFunction = child.getFunctionCall(); + if (childFunction == null) { + newChildren.add(child); + } else { + String childOperator = childFunction.getOperator(); + if (childOperator.equals(FilterKind.TEXT_MATCH.name())) { + List<Expression> operands = childFunction.getOperands(); + Expression identifier = operands.get(0); + textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + } else if (childOperator.equals(FilterKind.NOT.name())) { + assert childFunction.getOperands().size() == 1; + Expression operand = childFunction.getOperands().get(0); + Function notChildFunction = operand.getFunctionCall(); + if (notChildFunction == null) { + newChildren.add(child); + continue; + } + if (notChildFunction.getOperator().equals(FilterKind.TEXT_MATCH.name())) { + List<Expression> operands = notChildFunction.getOperands(); + Expression identifier = operands.get(0); + textMatchMap.computeIfAbsent(identifier, k -> new ArrayList<>()).add(child); + continue; + } + newChildren.add(child); + } else { + Expression newChild = optimize(child); + if (!newChild.equals(child)) { + recreateFilter = true; + } + newChildren.add(optimize(child)); + } + } + } + + for (List<Expression> values : textMatchMap.values()) { + if (values.size() > 1) { + recreateFilter = true; + break; + } + } + if (recreateFilter) { + return getNewFilter(operator, newChildren, textMatchMap); + } + return filterExpression; + } + + private Expression getNewFilter(String operator, List<Expression> newChildren, + Map<Expression, List<Expression>> textMatchMap) { + // for each key in textMatchMap, build a TEXT_MATCH expression (merge list of filters) + for (Map.Entry<Expression, List<Expression>> entry : textMatchMap.entrySet()) { + // special case: if all expressions are NOT, then wrap the merged text match inside a NOT. otherwise, push the + // NOT down into the text match expression + boolean allNot = true; + for (Expression expression : entry.getValue()) { + if (!expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) { + allNot = false; + break; + } + } + + List<String> literals = new ArrayList<>(); + if (allNot) { + for (Expression expression : entry.getValue()) { + Expression operand = expression.getFunctionCall().getOperands().get(0); + literals.add(operand.getFunctionCall().getOperands().get(1).getLiteral().getStringValue()); + } + } else { + for (Expression expression : entry.getValue()) { + if (expression.getFunctionCall().getOperator().equals(FilterKind.NOT.name())) { + Expression operand = expression.getFunctionCall().getOperands().get(0); + literals.add(FilterKind.NOT.name() + SPACE + operand.getFunctionCall().getOperands().get(1).getLiteral() + .getStringValue()); + continue; + } + assert expression.getFunctionCall().getOperator().equals(FilterKind.TEXT_MATCH.name()); + literals.add(expression.getFunctionCall().getOperands().get(1).getLiteral().getStringValue()); + } + } + + // build the merged TEXT_MATCH expression + String mergedTextMatchFilter; + if (allNot) { + assert operator.equals(FilterKind.AND.name()) || operator.equals(FilterKind.OR.name()); + if (operator.equals(FilterKind.AND.name())) { + mergedTextMatchFilter = String.join(SPACE + FilterKind.OR.name() + SPACE, literals); + } else { + mergedTextMatchFilter = String.join(SPACE + FilterKind.AND.name() + SPACE, literals); + } + } else { + mergedTextMatchFilter = String.join(SPACE + operator + SPACE, literals); + } + Expression mergedTextMatchExpression = RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name()); + Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression(mergedTextMatchFilter); + mergedTextMatchExpression.getFunctionCall() + .setOperands(Arrays.asList(entry.getKey(), mergedTextMatchFilterExpression)); + + if (allNot) { + Expression notExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); + notExpression.getFunctionCall().setOperands(Collections.singletonList(mergedTextMatchExpression)); + newChildren.add(notExpression); + continue; + } + newChildren.add(mergedTextMatchExpression); + } + + if (newChildren.size() == 1) { + return newChildren.get(0); + } + assert operator.equals(FilterKind.OR.name()) || operator.equals(FilterKind.AND.name()); + Expression newExpression = RequestUtils.getFunctionExpression(operator); + newExpression.getFunctionCall().setOperands(newChildren); + return newExpression; + } +} diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 631b4ee9d2..337075f974 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -166,6 +166,21 @@ public class QueryOptimizerTest { assertEquals(fourthChildChildren.get(1).getFunctionCall().getOperator(), FilterKind.LESS_THAN.name()); } + @Test + public void testMergeTextMatchFilter() { + String query = + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') OR TEXT_MATCH(string, " + + "'baz')"; + PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + OPTIMIZER.optimize(pinotQuery, SCHEMA); + Function filterFunction = pinotQuery.getFilterExpression().getFunctionCall(); + assertEquals(filterFunction.getOperator(), FilterKind.TEXT_MATCH.name()); + List<Expression> operands = filterFunction.getOperands(); + assertEquals(operands.size(), 2); + assertEquals(operands.get(0), RequestUtils.getIdentifierExpression("string")); + assertEquals(operands.get(1), RequestUtils.getLiteralExpression("foo AND bar OR baz")); + } + private static Expression getRangeFilterExpression(String column, String rangeString) { Expression rangeFilterExpression = RequestUtils.getFunctionExpression(FilterKind.RANGE.name()); rangeFilterExpression.getFunctionCall().setOperands( @@ -250,6 +265,39 @@ public class QueryOptimizerTest { testQuery("SELECT * FROM testTable WHERE 1=1 AND true", "SELECT * FROM testTable WHERE true"); testQuery("SELECT * FROM testTable WHERE \"a\"=\"a\" AND true", "SELECT * FROM testTable WHERE true"); + + // TextMatchFilterOptimizer + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, 'baz')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND baz')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, '/.*ooba.*/')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND /.*ooba.*/')"); + testQuery("SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo AND bar')"); + testQuery("SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo AND bar')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND NOT bar')"); + testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'NOT foo AND bar')"); + testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo OR bar')"); + testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') OR NOT TEXT_MATCH(string, 'bar')", + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo AND bar')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') OR " + + "TEXT_MATCH(string, 'baz')", "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar OR baz')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1') OR " + + "TEXT_MATCH(string1, 'baz1') AND TEXT_MATCH(string2, 'foo')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') OR TEXT_MATCH(string1, 'baz1') AND " + + "TEXT_MATCH(string2, 'foo')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1')" + + "AND TEXT_MATCH(string2, 'foo2') AND TEXT_MATCH(string2, 'bar2')", + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') AND TEXT_MATCH(string2, 'foo2 AND bar2')"); + testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') OR TEXT_MATCH(string2, 'bar')"); + testCannotOptimizeQuery( + "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') OR TEXT_MATCH(string, 'bar')"); + testCannotOptimizeQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo')"); } private static void testQuery(String actual, String expected) { @@ -262,6 +310,13 @@ public class QueryOptimizerTest { comparePinotQuery(actualPinotQuery, expectedPinotQuery); } + private static void testCannotOptimizeQuery(String query) { + PinotQuery actualPinotQuery = CalciteSqlParser.compileToPinotQuery(query); + OPTIMIZER.optimize(actualPinotQuery, SCHEMA); + PinotQuery expectedPinotQuery = CalciteSqlParser.compileToPinotQuery(query); + comparePinotQuery(actualPinotQuery, expectedPinotQuery); + } + private static void comparePinotQuery(PinotQuery actual, PinotQuery expected) { if (expected.getFilterExpression() == null) { assertNull(actual.getFilterExpression()); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org