This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/hive.git
commit 92a4b29a10a8498c4f4ea463e02872ddd9c72956 Author: Krisztian Kasa <[email protected]> AuthorDate: Mon Jan 8 14:07:57 2024 +0100 HIVE-27948: Wrong results when using materialized views with non-deterministic/dynamic functions (Krisztian Kasa, reviewed by Stamatis Zampetakis) (cherry picked from commit 24fffdc508f9402ad7145b59b50de738b27c92b4) --- .../AlterMaterializedViewRewriteOperation.java | 13 +++-- .../show/ShowMaterializedViewsFormatter.java | 2 +- .../org/apache/hadoop/hive/ql/metadata/Hive.java | 6 +-- .../ql/metadata/HiveMaterializedViewsRegistry.java | 14 ++--- .../ql/metadata/HiveRelOptMaterialization.java | 31 ++--------- .../metadata/MaterializationValidationResult.java | 41 +++++++++++++++ .../hadoop/hive/ql/metadata/RewriteAlgorithm.java | 44 ++++++++++++++++ ...eMaterializedViewASTSubQueryRewriteShuttle.java | 6 +-- .../HiveRelOptMaterializationValidator.java | 61 ++++++++++++---------- .../org/apache/hadoop/hive/ql/parse/CBOPlan.java | 13 +++-- .../hadoop/hive/ql/parse/CalcitePlanner.java | 6 +-- .../apache/hadoop/hive/ql/parse/ParseUtils.java | 3 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 +++++----- .../ql/metadata/TestMaterializedViewsCache.java | 2 +- .../materialized_view_no_cbo_rewrite.q} | 0 .../materialized_view_rewrite_by_text_10.q | 11 ++++ .../materialized_view_rewrite_by_text_11.q} | 0 .../llap/materialized_view_no_cbo_rewrite.q.out} | 6 ++- .../materialized_view_rewrite_by_text_10.q.out | 40 ++++++++++++++ .../materialized_view_rewrite_by_text_11.q.out} | 6 ++- .../llap/materialized_view_rewrite_by_text_8.q.out | 4 +- 21 files changed, 235 insertions(+), 102 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rewrite/AlterMaterializedViewRewriteOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rewrite/AlterMaterializedViewRewriteOperation.java index 4f2b6cccc6e..f4ada77ba3c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rewrite/AlterMaterializedViewRewriteOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/alter/rewrite/AlterMaterializedViewRewriteOperation.java @@ -25,11 +25,15 @@ import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.ddl.DDLOperation; import org.apache.hadoop.hive.ql.ddl.DDLOperationContext; +import org.apache.hadoop.hive.ql.metadata.MaterializationValidationResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.ParseUtils; +import static org.apache.commons.lang3.StringUtils.isNotBlank; +import static org.apache.hadoop.hive.ql.processors.CompileProcessor.console; + /** * Operation process of enabling/disabling materialized view rewrite. */ @@ -64,9 +68,12 @@ public class AlterMaterializedViewRewriteOperation extends DDLOperation<AlterMat } throw new HiveException(msg); } - if (!planner.isValidAutomaticRewritingMaterialization()) { - throw new HiveException("Cannot enable rewriting for materialized view. " + - planner.getInvalidAutomaticRewritingMaterializationReason()); + MaterializationValidationResult validationResult = planner.getMaterializationValidationResult(); + String validationErrorMessage = validationResult.getErrorMessage(); + if (validationResult.getSupportedRewriteAlgorithms().isEmpty()) { + throw new HiveException(validationErrorMessage); + } else if (isNotBlank(validationErrorMessage)) { + console.printError(validationErrorMessage); } } catch (Exception e) { throw new HiveException(e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsFormatter.java index 7423f9067c0..c8c43af74c4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/materialized/show/ShowMaterializedViewsFormatter.java @@ -41,7 +41,7 @@ import java.util.concurrent.TimeUnit; import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_TIME_WINDOW; import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.IncrementalRebuildMode.UNKNOWN; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.ALL; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.ALL; /** * Formats SHOW MATERIALIZED VIEWS results. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 192fa13ffa1..0ca644745a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -40,8 +40,8 @@ import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCa import static org.apache.hadoop.hive.ql.ddl.DDLUtils.isIcebergStatsSource; import static org.apache.hadoop.hive.ql.ddl.DDLUtils.isIcebergTable; import static org.apache.hadoop.hive.ql.io.AcidUtils.getFullTableName; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.CALCITE; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.ALL; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.CALCITE; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.ALL; import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable; import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT; import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME; @@ -2236,7 +2236,7 @@ public class Hive { private List<HiveRelOptMaterialization> getValidMaterializedViews(List<Table> materializedViewTables, Set<TableName> tablesUsed, boolean forceMVContentsUpToDate, boolean expandGroupingSets, - HiveTxnManager txnMgr, EnumSet<HiveRelOptMaterialization.RewriteAlgorithm> scope) + HiveTxnManager txnMgr, EnumSet<RewriteAlgorithm> scope) throws HiveException { final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY); final boolean tryIncrementalRewriting = diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index ca11fcccffa..9c5bdfe18af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -83,9 +83,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.collect.ImmutableList; import static java.util.stream.Collectors.toList; -import static org.apache.commons.lang3.StringUtils.isBlank; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.ALL; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.TEXT; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.ALL; /** * Registry for materialized views. The goal of this cache is to avoid parsing and creating @@ -236,9 +234,7 @@ public final class HiveMaterializedViewsRegistry { } return new HiveRelOptMaterialization(viewScan, plan.getPlan(), - null, viewScan.getTable().getQualifiedName(), - isBlank(plan.getInvalidAutomaticRewritingMaterializationReason()) ? - EnumSet.allOf(HiveRelOptMaterialization.RewriteAlgorithm.class) : EnumSet.of(TEXT), + null, viewScan.getTable().getQualifiedName(), plan.getSupportedRewriteAlgorithms(), determineIncrementalRebuildMode(plan.getPlan()), plan.getAst()); } @@ -273,7 +269,7 @@ public final class HiveMaterializedViewsRegistry { } HiveRelOptMaterialization materialization = createMaterialization(conf, materializedViewTable); - if (materialization == null) { + if (materialization == null || materialization.getScope().isEmpty()) { return; } @@ -348,7 +344,7 @@ public final class HiveMaterializedViewsRegistry { */ List<HiveRelOptMaterialization> getRewritingMaterializedViews() { return materializedViewsCache.values().stream() - .filter(materialization -> materialization.getScope().contains(HiveRelOptMaterialization.RewriteAlgorithm.CALCITE)) + .filter(materialization -> materialization.getScope().contains(RewriteAlgorithm.CALCITE)) .collect(toList()); } @@ -358,7 +354,7 @@ public final class HiveMaterializedViewsRegistry { * @return the collection of materialized views, or the empty collection if none */ public HiveRelOptMaterialization getRewritingMaterializedView(String dbName, String viewName, - EnumSet<HiveRelOptMaterialization.RewriteAlgorithm> scope) { + EnumSet<RewriteAlgorithm> scope) { HiveRelOptMaterialization materialization = materializedViewsCache.get(dbName, viewName); if (materialization == null) { return null; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveRelOptMaterialization.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveRelOptMaterialization.java index b4a20cea0fb..04548bfb801 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveRelOptMaterialization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveRelOptMaterialization.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.ql.parse.ASTNode; import java.util.EnumSet; import java.util.List; -import java.util.function.Predicate; +import java.util.Set; import static org.apache.commons.collections.CollectionUtils.intersection; @@ -37,27 +37,6 @@ import static org.apache.commons.collections.CollectionUtils.intersection; */ public class HiveRelOptMaterialization extends RelOptMaterialization { - /** - * Enumeration of Materialized view query rewrite algorithms. - */ - public enum RewriteAlgorithm { - /** - * Query sql text is compared to stored materialized view definition sql texts. - */ - TEXT, - /** - * Use rewriting algorithm provided by Calcite. - */ - CALCITE; - - public static final EnumSet<RewriteAlgorithm> ALL = EnumSet.allOf(RewriteAlgorithm.class); - - public static final Predicate<EnumSet<RewriteAlgorithm>> ANY = - rewriteAlgorithms -> true; - public static final Predicate<EnumSet<RewriteAlgorithm>> NON_CALCITE = - rewriteAlgorithms -> !rewriteAlgorithms.contains(HiveRelOptMaterialization.RewriteAlgorithm.CALCITE); - } - public enum IncrementalRebuildMode { AVAILABLE, INSERT_ONLY, @@ -65,7 +44,7 @@ public class HiveRelOptMaterialization extends RelOptMaterialization { UNKNOWN } - private final EnumSet<RewriteAlgorithm> scope; + private final Set<RewriteAlgorithm> scope; private final boolean sourceTablesUpdateDeleteModified; private final boolean sourceTablesCompacted; private final IncrementalRebuildMode rebuildMode; @@ -73,13 +52,13 @@ public class HiveRelOptMaterialization extends RelOptMaterialization { public HiveRelOptMaterialization(RelNode tableRel, RelNode queryRel, RelOptTable starRelOptTable, List<String> qualifiedTableName, - EnumSet<RewriteAlgorithm> scope, IncrementalRebuildMode rebuildMode, ASTNode ast) { + Set<RewriteAlgorithm> scope, IncrementalRebuildMode rebuildMode, ASTNode ast) { this(tableRel, queryRel, starRelOptTable, qualifiedTableName, scope, false, false, rebuildMode, ast); } private HiveRelOptMaterialization(RelNode tableRel, RelNode queryRel, RelOptTable starRelOptTable, List<String> qualifiedTableName, - EnumSet<RewriteAlgorithm> scope, + Set<RewriteAlgorithm> scope, boolean sourceTablesUpdateDeleteModified, boolean sourceTablesCompacted, IncrementalRebuildMode rebuildMode, ASTNode ast) { super(tableRel, queryRel, starRelOptTable, qualifiedTableName); this.scope = scope; @@ -89,7 +68,7 @@ public class HiveRelOptMaterialization extends RelOptMaterialization { this.ast = ast; } - public EnumSet<RewriteAlgorithm> getScope() { + public Set<RewriteAlgorithm> getScope() { return scope; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializationValidationResult.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializationValidationResult.java new file mode 100644 index 00000000000..3f3372e9c6d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializationValidationResult.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import java.util.EnumSet; +import java.util.Set; + +public class MaterializationValidationResult { + private final EnumSet<RewriteAlgorithm> supportedRewriteAlgorithms; + private final String errorMessage; + + public MaterializationValidationResult( + EnumSet<RewriteAlgorithm> supportedRewriteAlgorithms, String errorMessage) { + this.supportedRewriteAlgorithms = supportedRewriteAlgorithms; + this.errorMessage = errorMessage; + } + + public Set<RewriteAlgorithm> getSupportedRewriteAlgorithms() { + return supportedRewriteAlgorithms; + } + + public String getErrorMessage() { + return errorMessage; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/RewriteAlgorithm.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/RewriteAlgorithm.java new file mode 100644 index 00000000000..308df729937 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/RewriteAlgorithm.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.metadata; + +import java.util.EnumSet; +import java.util.Set; +import java.util.function.Predicate; + +/** + * Enumeration of Materialized view query rewrite algorithms. + */ +public enum RewriteAlgorithm { + /** + * Query sql text is compared to stored materialized view definition sql texts. + */ + TEXT, + /** + * Use rewriting algorithm provided by Calcite. + */ + CALCITE; + + public static final EnumSet<RewriteAlgorithm> ALL = EnumSet.allOf(RewriteAlgorithm.class); + + public static final Predicate<Set<RewriteAlgorithm>> ANY = + rewriteAlgorithms -> true; + public static final Predicate<Set<RewriteAlgorithm>> NON_CALCITE = + rewriteAlgorithms -> !rewriteAlgorithms.contains(CALCITE); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java index 151a047c18e..d24ad12331b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization; +import org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; @@ -36,7 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayDeque; -import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -46,7 +46,7 @@ import java.util.function.Predicate; import static java.util.Collections.singletonList; import static java.util.Collections.unmodifiableMap; import static java.util.Collections.unmodifiableSet; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.NON_CALCITE; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.NON_CALCITE; import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable; /** @@ -140,7 +140,7 @@ public class HiveMaterializedViewASTSubQueryRewriteShuttle extends HiveRelShuttl public static RelNode getMaterializedViewByAST( ASTNode expandedAST, RelOptCluster optCluster, - Predicate<EnumSet<HiveRelOptMaterialization.RewriteAlgorithm>> filter, + Predicate<Set<RewriteAlgorithm>> filter, Hive db, Set<TableName> tablesUsedByOriginalPlan, HiveTxnManager txnManager) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java index cf419b170c1..4f20609cff9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptMaterializationValidator.java @@ -38,7 +38,8 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.util.Util; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.MaterializationValidationResult; +import org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin; @@ -52,8 +53,10 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + +import java.util.EnumSet; + +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.TEXT; /** * Checks the query plan for conditions that would make the plan unsuitable for @@ -62,13 +65,16 @@ import org.slf4j.LoggerFactory; * - References to non-deterministic functions. */ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { - static final Logger LOG = LoggerFactory.getLogger(HiveRelOptMaterializationValidator.class); + private static final String UNSUPPORTED_BY_CALCITE_FORMAT = + "Only query text based automatic rewriting is available for materialized view. " + + "Statement has unsupported %s: %s."; protected String resultCacheInvalidReason; - protected String automaticRewritingInvalidReason; + protected MaterializationValidationResult materializationValidationResult; public void validate(RelNode relNode) { try { + materializationValidationResult = new MaterializationValidationResult(RewriteAlgorithm.ALL, ""); relNode.accept(this); } catch (Util.FoundOne e) { // Can ignore - the check failed. @@ -113,7 +119,7 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { @Override public RelNode visit(HiveJoin join) { if (join.getJoinType() != JoinRelType.INNER) { - setAutomaticRewritingInvalidReason(join.getJoinType() + " join type is not supported by rewriting algorithm."); + unsupportedByCalciteRewrite("join type", join.getJoinType().toString()); } checkExpr(join.getCondition()); return super.visit(join); @@ -152,7 +158,7 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { @Override public RelNode visit(TableFunctionScan scan) { - setAutomaticRewritingInvalidReason(scan); + unsupportedByCalciteRewrite("expression", "window function"); checkExpr(scan.getCall()); return super.visit(scan); } @@ -231,13 +237,13 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { // Note: Not currently part of the HiveRelNode interface private RelNode visit(HiveUnion union) { - setAutomaticRewritingInvalidReason("Statement has unsupported operator: union."); + unsupportedByCalciteRewrite("operator", "union"); return visitChildren(union); } @Override public RelNode visit(HiveSortLimit sort) { - setAutomaticRewritingInvalidReason("Statement has unsupported clause: order by."); + unsupportedByCalciteRewrite("clause","order by"); checkExpr(sort.getFetchExpr()); checkExpr(sort.getOffsetExpr()); return visitChildren(sort); @@ -245,20 +251,20 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { // Note: Not currently part of the HiveRelNode interface private RelNode visit(HiveSortExchange sort) { - setAutomaticRewritingInvalidReason("Statement has unsupported clause: sort by."); + unsupportedByCalciteRewrite("clause", "sort by"); return visitChildren(sort); } // Note: Not currently part of the HiveRelNode interface private RelNode visit(HiveSemiJoin semiJoin) { - setAutomaticRewritingInvalidReason("Statement has unsupported join type: semi join."); + unsupportedByCalciteRewrite("join type", "semi join"); checkExpr(semiJoin.getCondition()); checkExpr(semiJoin.getJoinFilter()); return visitChildren(semiJoin); } private RelNode visit(HiveAntiJoin antiJoin) { - setAutomaticRewritingInvalidReason("Statement has unsupported join type: anti join."); + unsupportedByCalciteRewrite("join type", "anti join"); checkExpr(antiJoin.getCondition()); checkExpr(antiJoin.getJoinFilter()); return visitChildren(antiJoin); @@ -266,26 +272,30 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { // Note: Not currently part of the HiveRelNode interface private RelNode visit(HiveExcept except) { - setAutomaticRewritingInvalidReason("Statement has unsupported operator: except."); + unsupportedByCalciteRewrite("operator", "except"); return visitChildren(except); } // Note: Not currently part of the HiveRelNode interface private RelNode visit(HiveIntersect intersect) { - setAutomaticRewritingInvalidReason("Statement has unsupported operator: intersect."); + unsupportedByCalciteRewrite("operator", "intersect"); return visitChildren(intersect); } private void fail(String reason) { setResultCacheInvalidReason(reason); - setAutomaticRewritingInvalidReason(reason); + this.materializationValidationResult = new MaterializationValidationResult( + EnumSet.noneOf(RewriteAlgorithm.class), "Cannot enable automatic rewriting for materialized view. " + reason); throw Util.FoundOne.NULL; } private RelNode fail(RelNode node) { setResultCacheInvalidReason("Unsupported RelNode type " + node.getRelTypeName() + " encountered in the query plan"); - setAutomaticRewritingInvalidReason(node); + this.materializationValidationResult = + new MaterializationValidationResult(EnumSet.noneOf(RewriteAlgorithm.class), + String.format("Cannot enable automatic rewriting for materialized view. " + + "Unsupported RelNode type %s encountered in the query plan", node.getRelTypeName())); throw Util.FoundOne.NULL; } @@ -308,24 +318,19 @@ public class HiveRelOptMaterializationValidator extends HiveRelShuttleImpl { return resultCacheInvalidReason == null; } - public String getAutomaticRewritingInvalidReason() { - return automaticRewritingInvalidReason; - } - - public void setAutomaticRewritingInvalidReason(String automaticRewritingInvalidReason) { - if (isValidForAutomaticRewriting()) { - this.automaticRewritingInvalidReason = automaticRewritingInvalidReason; - } + public MaterializationValidationResult getAutomaticRewritingValidationResult() { + return materializationValidationResult; } - public void setAutomaticRewritingInvalidReason(RelNode node) { + public void unsupportedByCalciteRewrite(String sqlPartType, String sqlPart) { if (isValidForAutomaticRewriting()) { - this.automaticRewritingInvalidReason = "Unsupported RelNode type " + node.getRelTypeName() + - " encountered in the query plan"; + String errorMessage = String.format(UNSUPPORTED_BY_CALCITE_FORMAT, sqlPartType, sqlPart); + this.materializationValidationResult = + new MaterializationValidationResult(EnumSet.of(TEXT), errorMessage); } } public boolean isValidForAutomaticRewriting() { - return automaticRewritingInvalidReason == null; + return RewriteAlgorithm.ALL.equals(materializationValidationResult.getSupportedRewriteAlgorithms()); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CBOPlan.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CBOPlan.java index 4230ef07b4a..53beeeb6df7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CBOPlan.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CBOPlan.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.calcite.rel.RelNode; +import org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm; + +import java.util.Set; /** * Wrapper of Calcite plan. @@ -26,12 +29,12 @@ import org.apache.calcite.rel.RelNode; public class CBOPlan { private final ASTNode ast; private final RelNode plan; - private final String invalidAutomaticRewritingMaterializationReason; + private final Set<RewriteAlgorithm> supportedRewriteAlgorithms; - public CBOPlan(ASTNode ast, RelNode plan, String invalidAutomaticRewritingMaterializationReason) { + public CBOPlan(ASTNode ast, RelNode plan, Set<RewriteAlgorithm> supportedRewriteAlgorithms) { this.ast = ast; this.plan = plan; - this.invalidAutomaticRewritingMaterializationReason = invalidAutomaticRewritingMaterializationReason; + this.supportedRewriteAlgorithms = supportedRewriteAlgorithms; } public ASTNode getAst() { @@ -52,7 +55,7 @@ public class CBOPlan { * Null or empty string otherwise. * @return String contains error message or null. */ - public String getInvalidAutomaticRewritingMaterializationReason() { - return invalidAutomaticRewritingMaterializationReason; + public Set<RewriteAlgorithm> getSupportedRewriteAlgorithms() { + return supportedRewriteAlgorithms; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index b105de8174e..7293c1822ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -355,7 +355,7 @@ import javax.sql.DataSource; import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveMaterializedViewASTSubQueryRewriteShuttle.getMaterializedViewByAST; -import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.ANY; +import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.ANY; public class CalcitePlanner extends SemanticAnalyzer { @@ -1681,8 +1681,8 @@ public class CalcitePlanner extends SemanticAnalyzer { materializationValidator.validate(calcitePlan); setInvalidResultCacheReason( materializationValidator.getResultCacheInvalidReason()); - setInvalidAutomaticRewritingMaterializationReason( - materializationValidator.getAutomaticRewritingInvalidReason()); + setMaterializationValidationResult( + materializationValidator.getAutomaticRewritingValidationResult()); // 2. Apply pre-join order optimizations calcitePlan = applyPreJoinOrderingTransforms(calcitePlan, mdProvider.getMetadataProvider(), executorProvider); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index db959192db7..29bc7aea404 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -541,7 +541,8 @@ public final class ParseUtils { final ASTNode ast = parse(viewQuery, ctx); final CalcitePlanner analyzer = getAnalyzer(conf, ctx); RelNode logicalPlan = analyzer.genLogicalPlan(ast); - return new CBOPlan(ast, logicalPlan, analyzer.getInvalidAutomaticRewritingMaterializationReason()); + return new CBOPlan( + ast, logicalPlan, analyzer.getMaterializationValidationResult().getSupportedRewriteAlgorithms()); } public static List<FieldSchema> parseQueryAndGetSchema(HiveConf conf, String viewQuery) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 769ab25e43e..e6a9a66dcd5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -181,6 +181,7 @@ import org.apache.hadoop.hive.ql.lib.SemanticGraphWalker; import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.LockException; +import org.apache.hadoop.hive.ql.metadata.MaterializationValidationResult; import org.apache.hadoop.hive.ql.metadata.DefaultConstraint; import org.apache.hadoop.hive.ql.metadata.DummyPartition; import org.apache.hadoop.hive.ql.metadata.Hive; @@ -456,7 +457,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY); private String invalidResultCacheReason; - private String invalidAutomaticRewritingMaterializationReason; + private MaterializationValidationResult materializationValidationResult; private final NullOrdering defaultNullOrder; @@ -14670,11 +14671,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } throw new SemanticException(msg); } - if (!isValidAutomaticRewritingMaterialization()) { - String errorMessage = "Only query text based automatic rewriting is available for materialized view. " + - getInvalidAutomaticRewritingMaterializationReason(); + if (materializationValidationResult.getSupportedRewriteAlgorithms().isEmpty()) { + createVwDesc.setRewriteEnabled(false); + } + String errorMessage = materializationValidationResult.getErrorMessage(); + if (isNotBlank(errorMessage)) { console.printError(errorMessage); - LOG.warn(errorMessage); } } } catch (HiveException e) { @@ -15931,18 +15933,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { public String colTypes; } - public String getInvalidAutomaticRewritingMaterializationReason() { - return invalidAutomaticRewritingMaterializationReason; - } - - public void setInvalidAutomaticRewritingMaterializationReason( - String invalidAutomaticRewritingMaterializationReason) { - this.invalidAutomaticRewritingMaterializationReason = - invalidAutomaticRewritingMaterializationReason; + public MaterializationValidationResult getMaterializationValidationResult() { + return materializationValidationResult; } - public boolean isValidAutomaticRewritingMaterialization() { - return (invalidAutomaticRewritingMaterializationReason == null); + public void setMaterializationValidationResult( + MaterializationValidationResult materializationValidationResult) { + this.materializationValidationResult = + materializationValidationResult; } public String getInvalidResultCacheReason() { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java index 34e85b47195..6978fd10004 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestMaterializedViewsCache.java @@ -157,7 +157,7 @@ class TestMaterializedViewsCache { private static HiveRelOptMaterialization createMaterialization(Table table) throws ParseException { return new HiveRelOptMaterialization( new DummyRel(table), new DummyRel(table), null, asList(table.getDbName(), table.getTableName()), - EnumSet.allOf(HiveRelOptMaterialization.RewriteAlgorithm.class), + RewriteAlgorithm.ALL, HiveRelOptMaterialization.IncrementalRebuildMode.AVAILABLE, ParseUtils.parse(table.getViewExpandedText(), null)); } diff --git a/ql/src/test/queries/clientnegative/materialized_view_no_cbo_rewrite_2.q b/ql/src/test/queries/clientpositive/materialized_view_no_cbo_rewrite.q similarity index 100% rename from ql/src/test/queries/clientnegative/materialized_view_no_cbo_rewrite_2.q rename to ql/src/test/queries/clientpositive/materialized_view_no_cbo_rewrite.q diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q new file mode 100644 index 00000000000..43deec4a814 --- /dev/null +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_10.q @@ -0,0 +1,11 @@ +-- Materialzed view definition has non-deterministic function +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE EMPS (ENAME STRING, BIRTH_EPOCH_SECS INT) STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +CREATE MATERIALIZED VIEW v_emp AS SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP(); + +-- View can not be used +explain cbo +SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP(); diff --git a/ql/src/test/queries/clientnegative/materialized_view_no_supported_op_rewrite_2.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_11.q similarity index 100% rename from ql/src/test/queries/clientnegative/materialized_view_no_supported_op_rewrite_2.q rename to ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_11.q diff --git a/ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_no_cbo_rewrite.q.out similarity index 87% rename from ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out rename to ql/src/test/results/clientpositive/llap/materialized_view_no_cbo_rewrite.q.out index 9083c736d64..4cba8933ab5 100644 --- a/ql/src/test/results/clientnegative/materialized_view_no_cbo_rewrite_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_no_cbo_rewrite.q.out @@ -36,4 +36,8 @@ PREHOOK: query: alter materialized view cmv_mat_view enable rewrite PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE PREHOOK: Input: default@cmv_mat_view PREHOOK: Output: default@cmv_mat_view -FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException: Cannot enable rewriting for materialized view. Statement has unsupported clause: sort by. +Only query text based automatic rewriting is available for materialized view. Statement has unsupported clause: sort by. +POSTHOOK: query: alter materialized view cmv_mat_view enable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view +POSTHOOK: Output: default@cmv_mat_view diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out new file mode 100644 index 00000000000..3455a1fe9ee --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_10.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: CREATE TABLE EMPS (ENAME STRING, BIRTH_EPOCH_SECS INT) STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@EMPS +POSTHOOK: query: CREATE TABLE EMPS (ENAME STRING, BIRTH_EPOCH_SECS INT) STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@EMPS +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +Cannot enable automatic rewriting for materialized view. UNIX_TIMESTAMP is not a deterministic function +PREHOOK: query: CREATE MATERIALIZED VIEW v_emp AS SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP() +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps +PREHOOK: Output: database:default +PREHOOK: Output: default@v_emp +POSTHOOK: query: CREATE MATERIALIZED VIEW v_emp AS SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP() +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v_emp +POSTHOOK: Lineage: v_emp.birth_epoch_secs SIMPLE [(emps)emps.FieldSchema(name:birth_epoch_secs, type:int, comment:null), ] +POSTHOOK: Lineage: v_emp.ename SIMPLE [(emps)emps.FieldSchema(name:ename, type:string, comment:null), ] +unix_timestamp(void) is deprecated. Use current_timestamp instead. +unix_timestamp(void) is deprecated. Use current_timestamp instead. +PREHOOK: query: explain cbo +SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP() +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +SELECT * FROM EMPS WHERE BIRTH_EPOCH_SECS <= UNIX_TIMESTAMP() +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +CBO PLAN: +HiveProject(ename=[$0], birth_epoch_secs=[$1]) + HiveFilter(condition=[<=(CAST($1):BIGINT, UNIX_TIMESTAMP())]) + HiveTableScan(table=[[default, emps]], table:alias=[emps]) + diff --git a/ql/src/test/results/clientnegative/materialized_view_no_supported_op_rewrite_2.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_11.q.out similarity index 87% rename from ql/src/test/results/clientnegative/materialized_view_no_supported_op_rewrite_2.q.out rename to ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_11.q.out index bb771614eec..bd12d44dc3b 100644 --- a/ql/src/test/results/clientnegative/materialized_view_no_supported_op_rewrite_2.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_11.q.out @@ -35,4 +35,8 @@ PREHOOK: query: alter materialized view cmv_mat_view enable rewrite PREHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE PREHOOK: Input: default@cmv_mat_view PREHOOK: Output: default@cmv_mat_view -FAILED: Execution Error, return code 40000 from org.apache.hadoop.hive.ql.ddl.DDLTask. org.apache.hadoop.hive.ql.metadata.HiveException: Cannot enable rewriting for materialized view. LEFT join type is not supported by rewriting algorithm. +Only query text based automatic rewriting is available for materialized view. Statement has unsupported join type: LEFT. +POSTHOOK: query: alter materialized view cmv_mat_view enable rewrite +POSTHOOK: type: ALTER_MATERIALIZED_VIEW_REWRITE +POSTHOOK: Input: default@cmv_mat_view +POSTHOOK: Output: default@cmv_mat_view diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_8.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_8.q.out index fa31a16a265..e003990db64 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_8.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_8.q.out @@ -8,7 +8,7 @@ POSTHOOK: query: create table t1(col0 int) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -Only query text based automatic rewriting is available for materialized view. LEFT join type is not supported by rewriting algorithm. +Only query text based automatic rewriting is available for materialized view. Statement has unsupported join type: LEFT. Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mat1 as select l.col0 from t1 l left outer join t1 r on (l.col0 = r.col0) where l.col0 = 20 @@ -23,7 +23,7 @@ POSTHOOK: Input: default@t1 POSTHOOK: Output: database:default POSTHOOK: Output: default@mat1 POSTHOOK: Lineage: mat1.col0 SIMPLE [] -Only query text based automatic rewriting is available for materialized view. LEFT join type is not supported by rewriting algorithm. +Only query text based automatic rewriting is available for materialized view. Statement has unsupported join type: LEFT. Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: create materialized view mat2 as select col0 from
