Berkof commented on a change in pull request #8255: URL: https://github.com/apache/ignite/pull/8255#discussion_r493576142
########## File path: modules/indexing/src/main/java/org/apache/ignite/internal/processors/query/h2/GridSubqueryJoinOptimizer.java ########## @@ -0,0 +1,869 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.query.h2; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.function.BiPredicate; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.ignite.IgniteSystemProperties; +import org.apache.ignite.internal.processors.query.h2.opt.GridH2ProxyIndex; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlAggregateFunction; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlAlias; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlArray; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlAst; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlColumn; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlElement; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlFunction; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlJoin; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperation; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlQuery; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlSelect; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlSubquery; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlTable; +import org.apache.ignite.internal.processors.query.h2.sql.GridSqlUnion; +import org.apache.ignite.internal.util.typedef.F; +import org.h2.index.Index; +import org.h2.table.Column; +import org.jetbrains.annotations.Nullable; + +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.AND; +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.EQUAL; +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.EQUAL_NULL_SAFE; +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.EXISTS; +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.IN; +import static org.apache.ignite.internal.processors.query.h2.sql.GridSqlOperationType.NEGATE; + +/** */ +public class GridSubqueryJoinOptimizer { + /** Predicat returns {@code true} if current element has subquery in its children. */ + private static final BiPredicate<GridSqlAst, GridSqlAst> ELEMENT_WITH_SUBQUERY + = (parent, child) -> child instanceof GridSqlSubquery; + + /** + * Predicate returns {@code true} in case there is a child + * which is an IN-expression and has a subquery in its children. + */ + private static final BiPredicate<GridSqlAst, GridSqlAst> ELEMENT_WITH_SUBQUERY_WITHIN_IN_EXPRESSION + = (parent, child) -> child instanceof GridSqlOperation && ((GridSqlOperation)child).operationType() == IN + && child.child(1) instanceof GridSqlSubquery; + + /** + * Predicate returns {@code true} in case there is a child + * which is an EXISTS-expression and has a subquery in its children. + */ + private static final BiPredicate<GridSqlAst, GridSqlAst> ELEMENT_WITH_SUBQUERY_WITHIN_EXISTS_EXPRESSION + = (parent, child) -> child instanceof GridSqlOperation && ((GridSqlOperation)child).operationType() == EXISTS + && child.child() instanceof GridSqlSubquery; + + /** Predicat returns {@code true} if current element is an AND operation. */ + private static final Predicate<GridSqlAst> ELEMENT_IS_AND_OPERATION + = elem -> elem instanceof GridSqlOperation && ((GridSqlOperation)elem).operationType() == AND; + + /** + * Predicate returns {@code true} in case there is a child which is an alias for a subquery. + */ + private static final BiPredicate<GridSqlAst, GridSqlAst> ELEMENT_WITH_ALIAS_WITH_SUBQUERY + = (parent, child) -> child instanceof GridSqlAlias && child.child() instanceof GridSqlSubquery; + + /** Predicat returns {@code true} if current element is a JOIN. */ + private static final Predicate<GridSqlAst> ELEMENT_IS_JOIN + = elem -> elem instanceof GridSqlJoin; + + /** Predicat returns {@code true} if current element is an EQUAL or EQUAL_NULL_SAFE operation. */ + private static final Predicate<GridSqlAst> ELEMENT_IS_EQ + = elem -> elem instanceof GridSqlOperation && (EQUAL == ((GridSqlOperation)elem).operationType() + || EQUAL_NULL_SAFE == ((GridSqlOperation)elem).operationType()); + + /** + * Whether to apply optimization or not. + */ + private static volatile Boolean optimizationEnabled; + + /** + * @return {@code true} if optimization should be applied. + */ + @SuppressWarnings("NonThreadSafeLazyInitialization") + private static boolean optimizationEnabled() { + if (optimizationEnabled == null) { // it's OK if this will be initialized several times in case of races + optimizationEnabled = Boolean.parseBoolean( + System.getProperty(IgniteSystemProperties.IGNITE_ENABLE_SUBQUERY_REWRITE_OPTIMIZATION, "true") + ); + } + + return optimizationEnabled; + } + + /** + * Pulls out subquery from parent query where possible. + * + * @param parent Parent query where to find and pull out subqueries. + */ + public static void pullOutSubQueries(GridSqlQuery parent) { + if (!optimizationEnabled()) + return; + + if (parent instanceof GridSqlUnion) { + GridSqlUnion union = (GridSqlUnion)parent; + + pullOutSubQueries(union.left()); + pullOutSubQueries(union.right()); + } + + GridSqlSelect select = (GridSqlSelect)parent; + + pullOutSubQryFromSelectExpr(select); + pullOutSubQryFromInClause(select); + pullOutSubQryFromExistsClause(select); + pullOutSubQryFromTableList(select); + } + + /** + * Pulls out subquery from select expression. + * + * @param select Parent query where to find and pull out subqueries. + */ + private static void pullOutSubQryFromSelectExpr(GridSqlSelect select) { + for (int i = 0; i < select.allColumns(); i++) { + boolean wasPulledOut = false; + GridSqlAst col = select.columns(false).get(i); + + if (col instanceof GridSqlSubquery) + wasPulledOut = pullOutSubQryFromSelectExpr(select, null, i); + else { + ASTNodeFinder finder = new ASTNodeFinder( + col, + ELEMENT_WITH_SUBQUERY + ); + + ASTNodeFinder.Result res; + while ((res = finder.findNext()) != null) + wasPulledOut |= pullOutSubQryFromSelectExpr(select, res.getEl(), res.getIdx()); + } + + if (wasPulledOut) // we have to analyze just pulled out element as well + i--; + } + } + + /** + * Pulls out subquery from table list. + * + * @param select Parent query where to find and pull out subqueries. + */ + private static void pullOutSubQryFromTableList(GridSqlSelect select) { + boolean wasPulledOut; + do { + wasPulledOut = false; + + // we have to check the root of the FROM clause in the loop + // to handle simple hierarchical queries like this: + // select * from (select * from (select id, name from emp)) + if (ELEMENT_WITH_ALIAS_WITH_SUBQUERY.test(null, select.from())) + wasPulledOut = pullOutSubQryFromTableList(select, null, -1); + else if (ELEMENT_IS_JOIN.test(select.from())) { + ASTNodeFinder finder = new ASTNodeFinder( + select.from(), + ELEMENT_WITH_ALIAS_WITH_SUBQUERY, + ELEMENT_IS_JOIN + ); + + ASTNodeFinder.Result res; + while ((res = finder.findNext()) != null) + wasPulledOut |= pullOutSubQryFromTableList(select, res.getEl(), res.getIdx()); + } + } + while (wasPulledOut); + } + + /** + * Pulls out subquery from IN expression. + * + * @param select Parent query where to find and pull out subqueries. + */ + private static void pullOutSubQryFromInClause(GridSqlSelect select) { + // for now it's not possible to have a subquery on top of WHERE clause tree after rewriting, + // so we could safely process it only once outside the loop + if (ELEMENT_WITH_SUBQUERY_WITHIN_IN_EXPRESSION.test(null, select.where())) + pullOutSubQryFromInClause(select, null, -1); + + if (!ELEMENT_IS_AND_OPERATION.test(select.where())) + return; + + boolean wasPulledOut; + do { + wasPulledOut = false; + + ASTNodeFinder finder = new ASTNodeFinder( + select.where(), + ELEMENT_WITH_SUBQUERY_WITHIN_IN_EXPRESSION, + ELEMENT_IS_AND_OPERATION + ); + + ASTNodeFinder.Result res; + while ((res = finder.findNext()) != null) + wasPulledOut |= pullOutSubQryFromInClause(select, res.getEl(), res.getIdx()); + } + while (wasPulledOut); + } + + /** + * Pulls out subquery from EXISTS expression. + * + * @param select Parent query where to find and pull out subqueries. + */ + private static void pullOutSubQryFromExistsClause(GridSqlSelect select) { + // for now it's not possible to have a subquery on top of WHERE clause tree after rewriting, + // so we could safely process it only once outside the loop + if (ELEMENT_WITH_SUBQUERY_WITHIN_EXISTS_EXPRESSION.test(null, select.where())) + pullOutSubQryFromExistsClause(select, null, -1); + + if (!ELEMENT_IS_AND_OPERATION.test(select.where())) + return; + + boolean wasPulledOut; + do { + wasPulledOut = false; + + ASTNodeFinder finder = new ASTNodeFinder( + select.where(), + ELEMENT_WITH_SUBQUERY_WITHIN_EXISTS_EXPRESSION, + ELEMENT_IS_AND_OPERATION + ); + + ASTNodeFinder.Result res; + while ((res = finder.findNext()) != null) + wasPulledOut |= pullOutSubQryFromExistsClause(select, res.getEl(), res.getIdx()); + } + while (wasPulledOut); + } + + /** + * Whether Select query is simple or not. + * <p> + * We call query simple if it is select query (not union) and it has neither having nor grouping, + * has no distinct clause, has no aggregations, has no limits, no sorting, no offset clause. + * Also it is not SELECT FOR UPDATE. + * + * @param subQry Sub query. + * @return {@code true} if it is simple query. + */ + private static boolean isSimpleSelect(GridSqlQuery subQry) { + if (subQry instanceof GridSqlUnion) + return false; + + GridSqlSelect select = (GridSqlSelect)subQry; + + boolean simple = F.isEmpty(select.sort()) + && select.offset() == null + && select.limit() == null + && !select.isForUpdate() + && !select.distinct() + && select.havingColumn() < 0 + && F.isEmpty(select.groupColumns()); + + if (!simple) + return false; + + for (GridSqlAst col : select.columns(true)) { + if (!(col instanceof GridSqlElement)) + continue; + + // we have to traverse the tree because there may be such expressions + // like ((MAX(col) - MIN(col)) / COUNT(col) + ASTNodeFinder aggFinder = new ASTNodeFinder( + col, + (p, c) -> p instanceof GridSqlAggregateFunction + ); + + if (aggFinder.findNext() != null) + return false; + } + + return true; + } + + /** + * Check whether table have unique index that can be built with provided column set. Review comment: table HAS uniq ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
