HIVE-11789: Better support for functions recognition in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7201c264 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7201c264 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7201c264 Branch: refs/heads/beeline-cli Commit: 7201c264a1fe8347fd87fc8c1bb835083e9aac75 Parents: 79244ab Author: Jesus Camacho Rodriguez <[email protected]> Authored: Thu Sep 17 17:48:01 2015 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Thu Sep 17 17:48:01 2015 +0100 ---------------------------------------------------------------------- .../calcite/reloperators/HiveBetween.java | 75 ++++++++++++++++++++ .../optimizer/calcite/reloperators/HiveIn.java | 41 +++++++++++ .../calcite/rules/HivePreFilteringRule.java | 37 +++------- .../translator/SqlFunctionConverter.java | 16 ++++- 4 files changed, 142 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java new file mode 100644 index 0000000..2388939 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; + +public class HiveBetween extends SqlSpecialOperator { + + public static final SqlSpecialOperator INSTANCE = + new HiveBetween(); + + private HiveBetween() { + super( + "BETWEEN", + SqlKind.BETWEEN, + 30, + true, + ReturnTypes.BOOLEAN_NULLABLE, + FIRST_BOOLEAN_THEN_FIRST_KNOWN, + null); + } + + /** + * Operand type-inference strategy where an unknown operand type is derived + * from the first operand with a known type, but the first operand is a boolean. + */ + public static final SqlOperandTypeInference FIRST_BOOLEAN_THEN_FIRST_KNOWN = + new SqlOperandTypeInference() { + public void inferOperandTypes( + SqlCallBinding callBinding, + RelDataType returnType, + RelDataType[] operandTypes) { + final RelDataType unknownType = + callBinding.getValidator().getUnknownType(); + RelDataType knownType = unknownType; + for (int i = 1; i < callBinding.getCall().getOperandList().size(); i++) { + SqlNode operand = callBinding.getCall().getOperandList().get(i); + knownType = callBinding.getValidator().deriveType( + callBinding.getScope(), operand); + if (!knownType.equals(unknownType)) { + break; + } + } + + RelDataTypeFactory typeFactory = callBinding.getTypeFactory(); + operandTypes[0] = typeFactory.createSqlType(SqlTypeName.BOOLEAN); + for (int i = 1; i < operandTypes.length; ++i) { + operandTypes[i] = knownType; + } + } + }; +} http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java new file mode 100644 index 0000000..6d87003 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.ReturnTypes; + +public class HiveIn extends SqlSpecialOperator { + + public static final SqlSpecialOperator INSTANCE = + new HiveIn(); + + private HiveIn() { + super( + "IN", + SqlKind.IN, + 30, + true, + ReturnTypes.BOOLEAN_NULLABLE, + InferTypes.FIRST_KNOWN, + null); + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index dde6288..3e2311c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; import java.util.Collection; +import java.util.EnumSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; @@ -41,22 +42,11 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import com.google.common.collect.ImmutableList; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; -import com.google.common.collect.Sets; public class HivePreFilteringRule extends RelOptRule { @@ -71,18 +61,13 @@ public class HivePreFilteringRule extends RelOptRule { private final FilterFactory filterFactory; - private static final Set<String> COMPARISON_UDFS = Sets.newHashSet( - GenericUDFOPEqual.class.getAnnotation(Description.class).name(), - GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(), - GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(), - GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(), - GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(), - GenericUDFOPLessThan.class.getAnnotation(Description.class).name(), - GenericUDFOPNotEqual.class.getAnnotation(Description.class).name()); - private static final String IN_UDF = - GenericUDFIn.class.getAnnotation(Description.class).name(); - private static final String BETWEEN_UDF = - GenericUDFBetween.class.getAnnotation(Description.class).name(); + private static final Set<SqlKind> COMPARISON = EnumSet.of( + SqlKind.EQUALS, + SqlKind.GREATER_THAN_OR_EQUAL, + SqlKind.LESS_THAN_OR_EQUAL, + SqlKind.GREATER_THAN, + SqlKind.LESS_THAN, + SqlKind.NOT_EQUALS); private HivePreFilteringRule() { @@ -176,7 +161,7 @@ public class HivePreFilteringRule extends RelOptRule { continue; } RexCall conjCall = (RexCall) conjunction; - if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) { + if(COMPARISON.contains(conjCall.getOperator().getKind())) { if (conjCall.operands.get(0) instanceof RexInputRef && conjCall.operands.get(1) instanceof RexLiteral) { reductionCondition.put(conjCall.operands.get(0).toString(), @@ -188,11 +173,11 @@ public class HivePreFilteringRule extends RelOptRule { conjCall); addedToReductionCondition = true; } - } else if(conjCall.getOperator().getName().equals(IN_UDF)) { + } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) { reductionCondition.put(conjCall.operands.get(0).toString(), conjCall); addedToReductionCondition = true; - } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) { + } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { reductionCondition.put(conjCall.operands.get(1).toString(), conjCall); addedToReductionCondition = true; http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 219289c..fd78824 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseDriver; @@ -193,7 +195,16 @@ public class SqlFunctionConverter { HiveToken hToken = calciteToHiveToken.get(op); ASTNode node; if (hToken != null) { - node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text); + switch (op.kind) { + case IN: + case BETWEEN: + case ROW: + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); + node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); + break; + default: + node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text); + } } else { node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); if (op.kind != SqlKind.CAST) { @@ -296,6 +307,9 @@ public class SqlFunctionConverter { hToken(HiveParser.GREATERTHANOREQUALTO, ">=")); registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not")); registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>")); + registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); + registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); + registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {
