kasakrisz commented on code in PR #4344: URL: https://github.com/apache/hive/pull/4344#discussion_r1214091188
########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartitionFilter.g4: ########## @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +grammar PartitionFilter; + +filter + : filterExpression + ; + +filterExpression + : conditionExpression #singleCondition + | LPAREN filterExpression RPAREN #parenedFilter + | left=filterExpression logicOperator right=filterExpression #binaryFilter + ; + +conditionExpression + : key=identifier comparisonOperator value=constant #comparison + | value=constant comparisonOperator key=identifier #reverseComparison + | key=identifier NOT? BETWEEN lower=constant AND upper=constant #betweenCondition + | key=identifier NOT? IN LPAREN values=constantSeq RPAREN #inCondition + | LPAREN STRUCT identifierList RPAREN NOT? IN constStructList #multiColInExpression + ; + +logicOperator + : AND | OR + ; + +comparisonOperator + : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ | LIKE + ; + +identifier + : IDENTIFIER #unquotedIdentifer + ; + +identifierList + : LPAREN ident+=identifier (COMMA ident+=identifier)* RPAREN + ; + +constant + : STRING+ #stringLiteral + | number #numericLiteral + | date #dateLiteral + | timestamp #timestampLiteral + ; + +constantSeq + : values+=constant (COMMA values+=constant)* + ; + +constStruct + : CONST STRUCT LPAREN constantSeq RPAREN + ; + +constStructList + : LPAREN structs+=constStruct (COMMA structs+=constStruct) RPAREN + ; + +number + : MINUS? INTEGER_VALUE #integerLiteral + ; + +date + : DATE value=STRING Review Comment: Is it possible to make this rule stronger? I mean the keyword `DATE` is always followed by a quoted `DATE_VALUE` not just any string ########## ql/src/test/results/clientpositive/llap/temp_table_partition_type_in_plan.q.out: ########## @@ -32,11 +32,13 @@ PREHOOK: query: SELECT * FROM datePartTbl_temp WHERE date_prt IN (CAST('2014-08- PREHOOK: type: QUERY PREHOOK: Input: default@dateparttbl_temp PREHOOK: Input: default@dateparttbl_temp@date_prt=2014-08-09 +PREHOOK: Input: default@dateparttbl_temp@date_prt=2014-08-10 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM datePartTbl_temp WHERE date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE)) POSTHOOK: type: QUERY POSTHOOK: Input: default@dateparttbl_temp POSTHOOK: Input: default@dateparttbl_temp@date_prt=2014-08-09 +POSTHOOK: Input: default@dateparttbl_temp@date_prt=2014-08-10 Review Comment: Partition `date_prt=2014-08-10` is loaded, is this expected? From the filter predicate ``` date_prt IN (CAST('2014-08-09' AS DATE), CAST('2014-08-08' AS DATE)) ``` I would expect only `date_prt=2014-08-09` since `date_prt=2014-08-08` does not exists. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { Review Comment: Is it possible that we end up with more that one children? Please share an example input and write a test. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { + return node.getChild(0).accept(this); + } + return null; + } + + @Override + public ExpressionTree visitFilter(PartitionFilterParser.FilterContext ctx) { + ExpressionTree tree = new ExpressionTree(); + TreeNode treeNode = (TreeNode) visit(ctx.filterExpression()); + tree.setRoot(treeNode); + return tree; + } + + @Override + public TreeNode visitSingleCondition(PartitionFilterParser.SingleConditionContext ctx) { + return (TreeNode) visit(ctx.conditionExpression()); + } + + @Override + public TreeNode visitParenedFilter(PartitionFilterParser.ParenedFilterContext ctx) { + return (TreeNode) visit(ctx.filterExpression()); + } + + @Override + public TreeNode visitBinaryFilter(PartitionFilterParser.BinaryFilterContext ctx) { + TreeNode left = (TreeNode) visit(ctx.left); + TreeNode right = (TreeNode) visit(ctx.right); + LogicalOperator operator = visitLogicOperator(ctx.logicOperator()); + return new TreeNode(left, operator, right); + } + + @Override + public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + return leafNode; + } + + @Override + public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + leafNode.isReverseOrder = true; + return leafNode; + } + + @Override + public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { + LeafNode left = new LeafNode(); + LeafNode right = new LeafNode(); + left.keyName = right.keyName = ctx.key.getText(); + left.value = visit(ctx.lower); + right.value = visit(ctx.upper); + + boolean isPositive = ctx.NOT() == null; + left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; + + return new TreeNode(left, rootOperator, right); + } + + @Override + public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { + List<Object> values = visitConstantSeq(ctx.constantSeq()); + boolean isPositive = ctx.NOT() == null; + String keyName = ctx.key.getText(); + + TreeNode root = null; + for (int i = 0; i < values.size(); ++i) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyName; + leafNode.value = values.get(i); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + + if (i == 0) { + root = leafNode; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, leafNode); + } + } + + return root; + } + + @Override + public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpressionContext ctx) { + TreeNode root = new TreeNode(); + List<String> keyNames = visitIdentifierList(ctx.identifierList()); + List<List<Object>> structs = visitConstStructList(ctx.constStructList()); + boolean isPositive = ctx.NOT() == null; + for (int i = 0; i < structs.size(); ++i) { + List<Object> struct = structs.get(i); + if (keyNames.size() != struct.size()) { + throw new ParseCancellationException("Struct key " + keyNames + " does not match value " + struct); + } + TreeNode node = new TreeNode(); + for (int j = 0; j < struct.size(); ++j) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyNames.get(j); + leafNode.value = struct.get(j); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + if (j == 0) { + node = leafNode; + } else { + node = new TreeNode(node, isPositive ? LogicalOperator.AND : LogicalOperator.OR, leafNode); + } + } + if (i == 0) { + root = node; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, node); + } + } + + return root; + } + + @Override + public LogicalOperator visitLogicOperator(PartitionFilterParser.LogicOperatorContext ctx) { + switch (ctx.getText().toUpperCase(Locale.ROOT)) { + case "AND": return LogicalOperator.AND; + case "OR": return LogicalOperator.OR; + default: return null; + } + } + + @Override + public Operator visitComparisonOperator(PartitionFilterParser.ComparisonOperatorContext ctx) { + TerminalNode node = (TerminalNode) ctx.getChild(0); + switch (node.getSymbol().getType()) { + case PartitionFilterParser.EQ: return Operator.EQUALS; + case PartitionFilterParser.NEQ: return Operator.NOTEQUALS; + case PartitionFilterParser.NEQJ: return Operator.NOTEQUALS2; + case PartitionFilterParser.LT: return Operator.LESSTHAN; + case PartitionFilterParser.LTE: return Operator.LESSTHANOREQUALTO; + case PartitionFilterParser.GT: return Operator.GREATERTHAN; + case PartitionFilterParser.GTE: return Operator.GREATERTHANOREQUALTO; + case PartitionFilterParser.LIKE: return Operator.LIKE; + } + return null; + } + + @Override + public List<Object> visitConstantSeq(PartitionFilterParser.ConstantSeqContext ctx) { + return ctx.values.stream().map(this::visit).collect(Collectors.toList()); + } + + @Override + public List<Object> visitConstStruct(PartitionFilterParser.ConstStructContext ctx) { + return visitConstantSeq(ctx.constantSeq()); + } + + @Override + public List<List<Object>> visitConstStructList(PartitionFilterParser.ConstStructListContext ctx) { + return ctx.structs.stream().map(this::visitConstStruct).collect(Collectors.toList()); + } + + @Override + public List<String> visitIdentifierList(PartitionFilterParser.IdentifierListContext ctx) { + return ctx.ident.stream().map(i -> i.getText()).collect(Collectors.toList()); + } + + @Override + public String visitStringLiteral(PartitionFilterParser.StringLiteralContext ctx) { + return unquoteString(ctx.getText()); + } + + private String unquoteString(String string) { + if (string.length() > 1) { + if ((string.charAt(0) == '\'' && string.charAt(string.length() -1 ) == '\'') + || (string.charAt(0) == '"' && string.charAt(string.length() - 1) == '"')) { + return string.substring(1, string.length() - 1); + } + } + return string; + } + + @Override + public Long visitIntegerLiteral(PartitionFilterParser.IntegerLiteralContext ctx) { + return Long.parseLong(ctx.getText()); Review Comment: Is is possible to get `NumberFormatException` here? Can `ctx.getText()` return null? ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { + return node.getChild(0).accept(this); + } + return null; + } + + @Override + public ExpressionTree visitFilter(PartitionFilterParser.FilterContext ctx) { + ExpressionTree tree = new ExpressionTree(); + TreeNode treeNode = (TreeNode) visit(ctx.filterExpression()); + tree.setRoot(treeNode); + return tree; + } + + @Override + public TreeNode visitSingleCondition(PartitionFilterParser.SingleConditionContext ctx) { + return (TreeNode) visit(ctx.conditionExpression()); + } + + @Override + public TreeNode visitParenedFilter(PartitionFilterParser.ParenedFilterContext ctx) { + return (TreeNode) visit(ctx.filterExpression()); + } + + @Override + public TreeNode visitBinaryFilter(PartitionFilterParser.BinaryFilterContext ctx) { + TreeNode left = (TreeNode) visit(ctx.left); + TreeNode right = (TreeNode) visit(ctx.right); + LogicalOperator operator = visitLogicOperator(ctx.logicOperator()); + return new TreeNode(left, operator, right); + } + + @Override + public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + return leafNode; + } + + @Override + public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + leafNode.isReverseOrder = true; + return leafNode; + } + + @Override + public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { + LeafNode left = new LeafNode(); + LeafNode right = new LeafNode(); + left.keyName = right.keyName = ctx.key.getText(); + left.value = visit(ctx.lower); + right.value = visit(ctx.upper); + + boolean isPositive = ctx.NOT() == null; + left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; + + return new TreeNode(left, rootOperator, right); + } + + @Override + public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { + List<Object> values = visitConstantSeq(ctx.constantSeq()); + boolean isPositive = ctx.NOT() == null; + String keyName = ctx.key.getText(); + + TreeNode root = null; + for (int i = 0; i < values.size(); ++i) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyName; + leafNode.value = values.get(i); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + + if (i == 0) { + root = leafNode; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, leafNode); + } + } + + return root; + } + + @Override + public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpressionContext ctx) { + TreeNode root = new TreeNode(); + List<String> keyNames = visitIdentifierList(ctx.identifierList()); + List<List<Object>> structs = visitConstStructList(ctx.constStructList()); + boolean isPositive = ctx.NOT() == null; + for (int i = 0; i < structs.size(); ++i) { + List<Object> struct = structs.get(i); + if (keyNames.size() != struct.size()) { + throw new ParseCancellationException("Struct key " + keyNames + " does not match value " + struct); + } + TreeNode node = new TreeNode(); + for (int j = 0; j < struct.size(); ++j) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyNames.get(j); + leafNode.value = struct.get(j); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + if (j == 0) { + node = leafNode; + } else { + node = new TreeNode(node, isPositive ? LogicalOperator.AND : LogicalOperator.OR, leafNode); + } + } + if (i == 0) { + root = node; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, node); + } + } + + return root; + } + + @Override + public LogicalOperator visitLogicOperator(PartitionFilterParser.LogicOperatorContext ctx) { + switch (ctx.getText().toUpperCase(Locale.ROOT)) { + case "AND": return LogicalOperator.AND; + case "OR": return LogicalOperator.OR; + default: return null; + } + } + + @Override + public Operator visitComparisonOperator(PartitionFilterParser.ComparisonOperatorContext ctx) { + TerminalNode node = (TerminalNode) ctx.getChild(0); + switch (node.getSymbol().getType()) { + case PartitionFilterParser.EQ: return Operator.EQUALS; + case PartitionFilterParser.NEQ: return Operator.NOTEQUALS; + case PartitionFilterParser.NEQJ: return Operator.NOTEQUALS2; + case PartitionFilterParser.LT: return Operator.LESSTHAN; + case PartitionFilterParser.LTE: return Operator.LESSTHANOREQUALTO; + case PartitionFilterParser.GT: return Operator.GREATERTHAN; + case PartitionFilterParser.GTE: return Operator.GREATERTHANOREQUALTO; + case PartitionFilterParser.LIKE: return Operator.LIKE; + } + return null; + } + + @Override + public List<Object> visitConstantSeq(PartitionFilterParser.ConstantSeqContext ctx) { + return ctx.values.stream().map(this::visit).collect(Collectors.toList()); + } + + @Override + public List<Object> visitConstStruct(PartitionFilterParser.ConstStructContext ctx) { + return visitConstantSeq(ctx.constantSeq()); + } + + @Override + public List<List<Object>> visitConstStructList(PartitionFilterParser.ConstStructListContext ctx) { + return ctx.structs.stream().map(this::visitConstStruct).collect(Collectors.toList()); + } + + @Override + public List<String> visitIdentifierList(PartitionFilterParser.IdentifierListContext ctx) { + return ctx.ident.stream().map(i -> i.getText()).collect(Collectors.toList()); + } + + @Override + public String visitStringLiteral(PartitionFilterParser.StringLiteralContext ctx) { + return unquoteString(ctx.getText()); + } + + private String unquoteString(String string) { + if (string.length() > 1) { Review Comment: Can `string` be null? ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterParser.java: ########## @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.util.function.Function; + +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PartFilterParser { + private static final Logger LOG = LoggerFactory.getLogger(PartFilterParser.class); + private static final ThreadLocal<AstBuilder> astBuilder = ThreadLocal.withInitial(AstBuilder::new); Review Comment: Does it has to be ThreadLocal? Does `AstBuilder` or any of its ancestors has state? ########## standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java: ########## @@ -2446,6 +2446,16 @@ public void testPartitionFilter() throws Exception { "p3 between 31 and 32 and p1 between \"p12\" and \"p14\"", 3); checkFilter(client, dbName, tblName, "p4 between \"p41\" and \"p44\"", 5); + // Test in + checkFilter(client, dbName, tblName, "p1 in (\"p11\", \"p12\")", 4); + checkFilter(client, dbName, tblName, "p2 in (\"p21\", \"p25\")", 3); + checkFilter(client, dbName, tblName, "p3 not in (31, 33)", 3); + checkFilter(client, dbName, tblName, "p4 not in ('p43', 'p44')", 3); + + // Test multi-in + checkFilter(client, dbName, tblName, "(struct (p1, p2)) in (const struct ('p11', 'p22'), const struct ('p12', 'p22'))", 1); Review Comment: The currently supported multi column in expression format on master is different: ``` (struct(ds1,ds2)) IN (struct(2000-05-08, 2001-04-08), struct(2000-05-09, 2001-04-09)) ``` Please run [pointlookup4.q](https://github.com/apache/hive/blob/master/ql/src/test/queries/clientpositive/pointlookup4.q) and attach a debugger. The q.out file is not changed by your patch but I suspect that some query passes filters in the format I mentioned and it worth checking whether parsing was successful with the new parser. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { Review Comment: `AstBuilder` is too general and IIUC we are not building an AST but an `ExpressionTree` here. Could you please rename this class? ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { + return node.getChild(0).accept(this); + } + return null; + } + + @Override + public ExpressionTree visitFilter(PartitionFilterParser.FilterContext ctx) { + ExpressionTree tree = new ExpressionTree(); + TreeNode treeNode = (TreeNode) visit(ctx.filterExpression()); + tree.setRoot(treeNode); + return tree; + } + + @Override + public TreeNode visitSingleCondition(PartitionFilterParser.SingleConditionContext ctx) { + return (TreeNode) visit(ctx.conditionExpression()); + } + + @Override + public TreeNode visitParenedFilter(PartitionFilterParser.ParenedFilterContext ctx) { + return (TreeNode) visit(ctx.filterExpression()); + } + + @Override + public TreeNode visitBinaryFilter(PartitionFilterParser.BinaryFilterContext ctx) { + TreeNode left = (TreeNode) visit(ctx.left); + TreeNode right = (TreeNode) visit(ctx.right); + LogicalOperator operator = visitLogicOperator(ctx.logicOperator()); + return new TreeNode(left, operator, right); + } + + @Override + public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + return leafNode; + } + + @Override + public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + leafNode.isReverseOrder = true; + return leafNode; + } + + @Override + public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { + LeafNode left = new LeafNode(); + LeafNode right = new LeafNode(); + left.keyName = right.keyName = ctx.key.getText(); + left.value = visit(ctx.lower); + right.value = visit(ctx.upper); + + boolean isPositive = ctx.NOT() == null; + left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; + + return new TreeNode(left, rootOperator, right); + } + + @Override + public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { + List<Object> values = visitConstantSeq(ctx.constantSeq()); + boolean isPositive = ctx.NOT() == null; + String keyName = ctx.key.getText(); + + TreeNode root = null; + for (int i = 0; i < values.size(); ++i) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyName; + leafNode.value = values.get(i); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + + if (i == 0) { + root = leafNode; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, leafNode); + } + } + + return root; + } + + @Override + public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpressionContext ctx) { + TreeNode root = new TreeNode(); + List<String> keyNames = visitIdentifierList(ctx.identifierList()); + List<List<Object>> structs = visitConstStructList(ctx.constStructList()); + boolean isPositive = ctx.NOT() == null; + for (int i = 0; i < structs.size(); ++i) { + List<Object> struct = structs.get(i); + if (keyNames.size() != struct.size()) { + throw new ParseCancellationException("Struct key " + keyNames + " does not match value " + struct); + } Review Comment: Can this check be moved before the loop? Also, please rephrase the error message to highlight that the number of elements are not match. ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { + return node.getChild(0).accept(this); + } + return null; + } + + @Override + public ExpressionTree visitFilter(PartitionFilterParser.FilterContext ctx) { + ExpressionTree tree = new ExpressionTree(); + TreeNode treeNode = (TreeNode) visit(ctx.filterExpression()); + tree.setRoot(treeNode); + return tree; + } + + @Override + public TreeNode visitSingleCondition(PartitionFilterParser.SingleConditionContext ctx) { + return (TreeNode) visit(ctx.conditionExpression()); + } + + @Override + public TreeNode visitParenedFilter(PartitionFilterParser.ParenedFilterContext ctx) { + return (TreeNode) visit(ctx.filterExpression()); + } + + @Override + public TreeNode visitBinaryFilter(PartitionFilterParser.BinaryFilterContext ctx) { + TreeNode left = (TreeNode) visit(ctx.left); + TreeNode right = (TreeNode) visit(ctx.right); + LogicalOperator operator = visitLogicOperator(ctx.logicOperator()); + return new TreeNode(left, operator, right); + } + + @Override + public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + return leafNode; + } + + @Override + public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + leafNode.isReverseOrder = true; + return leafNode; + } + + @Override + public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { + LeafNode left = new LeafNode(); + LeafNode right = new LeafNode(); + left.keyName = right.keyName = ctx.key.getText(); + left.value = visit(ctx.lower); + right.value = visit(ctx.upper); + + boolean isPositive = ctx.NOT() == null; + left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; + + return new TreeNode(left, rootOperator, right); + } + + @Override + public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { + List<Object> values = visitConstantSeq(ctx.constantSeq()); + boolean isPositive = ctx.NOT() == null; + String keyName = ctx.key.getText(); + + TreeNode root = null; + for (int i = 0; i < values.size(); ++i) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyName; + leafNode.value = values.get(i); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + + if (i == 0) { + root = leafNode; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, leafNode); + } + } + + return root; + } + + @Override + public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpressionContext ctx) { + TreeNode root = new TreeNode(); + List<String> keyNames = visitIdentifierList(ctx.identifierList()); + List<List<Object>> structs = visitConstStructList(ctx.constStructList()); + boolean isPositive = ctx.NOT() == null; + for (int i = 0; i < structs.size(); ++i) { + List<Object> struct = structs.get(i); + if (keyNames.size() != struct.size()) { + throw new ParseCancellationException("Struct key " + keyNames + " does not match value " + struct); + } + TreeNode node = new TreeNode(); + for (int j = 0; j < struct.size(); ++j) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyNames.get(j); + leafNode.value = struct.get(j); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + if (j == 0) { + node = leafNode; + } else { + node = new TreeNode(node, isPositive ? LogicalOperator.AND : LogicalOperator.OR, leafNode); + } + } + if (i == 0) { + root = node; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, node); + } + } + + return root; + } + + @Override + public LogicalOperator visitLogicOperator(PartitionFilterParser.LogicOperatorContext ctx) { + switch (ctx.getText().toUpperCase(Locale.ROOT)) { + case "AND": return LogicalOperator.AND; + case "OR": return LogicalOperator.OR; + default: return null; Review Comment: Please throw a `ParseCancellationException` with a meaningful error message like ``` "Unsupported operator: " + ctx.getText() ``` ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/AstBuilder.java: ########## @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.sql.Date; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.List; +import java.util.Locale; +import java.util.TimeZone; +import java.util.stream.Collectors; + +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.RuleNode; +import org.antlr.v4.runtime.tree.TerminalNode; + +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; +import static org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode; + +public class AstBuilder extends PartitionFilterBaseVisitor<Object> { + private final DateTimeFormatter dateFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + private final DateTimeFormatter timestampFormat = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") + .withZone(TimeZone.getTimeZone("UTC").toZoneId()); + + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + @Override + public Object visitChildren(RuleNode node) { + if (node.getChildCount() == 1) { + return node.getChild(0).accept(this); + } + return null; + } + + @Override + public ExpressionTree visitFilter(PartitionFilterParser.FilterContext ctx) { + ExpressionTree tree = new ExpressionTree(); + TreeNode treeNode = (TreeNode) visit(ctx.filterExpression()); + tree.setRoot(treeNode); + return tree; + } + + @Override + public TreeNode visitSingleCondition(PartitionFilterParser.SingleConditionContext ctx) { + return (TreeNode) visit(ctx.conditionExpression()); + } + + @Override + public TreeNode visitParenedFilter(PartitionFilterParser.ParenedFilterContext ctx) { + return (TreeNode) visit(ctx.filterExpression()); + } + + @Override + public TreeNode visitBinaryFilter(PartitionFilterParser.BinaryFilterContext ctx) { + TreeNode left = (TreeNode) visit(ctx.left); + TreeNode right = (TreeNode) visit(ctx.right); + LogicalOperator operator = visitLogicOperator(ctx.logicOperator()); + return new TreeNode(left, operator, right); + } + + @Override + public TreeNode visitComparison(PartitionFilterParser.ComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + return leafNode; + } + + @Override + public Object visitReverseComparison(PartitionFilterParser.ReverseComparisonContext ctx) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = ctx.key.getText(); + leafNode.value = visit(ctx.value); + leafNode.operator = visitComparisonOperator(ctx.comparisonOperator()); + leafNode.isReverseOrder = true; + return leafNode; + } + + @Override + public TreeNode visitBetweenCondition(PartitionFilterParser.BetweenConditionContext ctx) { + LeafNode left = new LeafNode(); + LeafNode right = new LeafNode(); + left.keyName = right.keyName = ctx.key.getText(); + left.value = visit(ctx.lower); + right.value = visit(ctx.upper); + + boolean isPositive = ctx.NOT() == null; + left.operator = isPositive ? Operator.GREATERTHANOREQUALTO : Operator.LESSTHAN; + right.operator = isPositive ? Operator.LESSTHANOREQUALTO : Operator.GREATERTHAN; + LogicalOperator rootOperator = isPositive ? LogicalOperator.AND : LogicalOperator.OR; + + return new TreeNode(left, rootOperator, right); + } + + @Override + public TreeNode visitInCondition(PartitionFilterParser.InConditionContext ctx) { + List<Object> values = visitConstantSeq(ctx.constantSeq()); + boolean isPositive = ctx.NOT() == null; + String keyName = ctx.key.getText(); + + TreeNode root = null; + for (int i = 0; i < values.size(); ++i) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyName; + leafNode.value = values.get(i); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + + if (i == 0) { + root = leafNode; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, leafNode); + } + } + + return root; + } + + @Override + public TreeNode visitMultiColInExpression(PartitionFilterParser.MultiColInExpressionContext ctx) { + TreeNode root = new TreeNode(); + List<String> keyNames = visitIdentifierList(ctx.identifierList()); + List<List<Object>> structs = visitConstStructList(ctx.constStructList()); + boolean isPositive = ctx.NOT() == null; + for (int i = 0; i < structs.size(); ++i) { + List<Object> struct = structs.get(i); + if (keyNames.size() != struct.size()) { + throw new ParseCancellationException("Struct key " + keyNames + " does not match value " + struct); + } + TreeNode node = new TreeNode(); + for (int j = 0; j < struct.size(); ++j) { + LeafNode leafNode = new LeafNode(); + leafNode.keyName = keyNames.get(j); + leafNode.value = struct.get(j); + leafNode.operator = isPositive ? Operator.EQUALS : Operator.NOTEQUALS2; + if (j == 0) { + node = leafNode; + } else { + node = new TreeNode(node, isPositive ? LogicalOperator.AND : LogicalOperator.OR, leafNode); + } + } + if (i == 0) { + root = node; + } else { + root = new TreeNode(root, isPositive ? LogicalOperator.OR : LogicalOperator.AND, node); + } + } + + return root; + } + + @Override + public LogicalOperator visitLogicOperator(PartitionFilterParser.LogicOperatorContext ctx) { + switch (ctx.getText().toUpperCase(Locale.ROOT)) { + case "AND": return LogicalOperator.AND; + case "OR": return LogicalOperator.OR; + default: return null; + } + } + + @Override + public Operator visitComparisonOperator(PartitionFilterParser.ComparisonOperatorContext ctx) { + TerminalNode node = (TerminalNode) ctx.getChild(0); + switch (node.getSymbol().getType()) { + case PartitionFilterParser.EQ: return Operator.EQUALS; + case PartitionFilterParser.NEQ: return Operator.NOTEQUALS; + case PartitionFilterParser.NEQJ: return Operator.NOTEQUALS2; + case PartitionFilterParser.LT: return Operator.LESSTHAN; + case PartitionFilterParser.LTE: return Operator.LESSTHANOREQUALTO; + case PartitionFilterParser.GT: return Operator.GREATERTHAN; + case PartitionFilterParser.GTE: return Operator.GREATERTHANOREQUALTO; + case PartitionFilterParser.LIKE: return Operator.LIKE; + } + return null; Review Comment: Please throw a ParseCancellationException with a meaningful error message ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartitionFilter.g4: ########## @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +grammar PartitionFilter; + +filter + : filterExpression + ; + +filterExpression + : conditionExpression #singleCondition + | LPAREN filterExpression RPAREN #parenedFilter + | left=filterExpression logicOperator right=filterExpression #binaryFilter Review Comment: How precedence of `OR` and `AND` operators ensured? ########## standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/PartFilterParser.java: ########## @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.parser; + +import java.util.function.Function; + +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.misc.Interval; +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PartFilterParser { + private static final Logger LOG = LoggerFactory.getLogger(PartFilterParser.class); + private static final ThreadLocal<AstBuilder> astBuilder = ThreadLocal.withInitial(AstBuilder::new); + + public static ExpressionTree parseFilter(String filter) throws MetaException { + return parse(filter, parser -> astBuilder.get().visitFilter(parser.filter())); + } + + private static <T> T parse(String filter, Function<PartitionFilterParser, T> toResult) + throws MetaException { + LOG.debug("Parsing filter: " + filter); + CharStream upperCaseCharStream = new UpperCaseCharStream(CharStreams.fromString(filter)); + PartitionFilterLexer lexer = new PartitionFilterLexer(upperCaseCharStream); + lexer.removeErrorListeners(); + lexer.addErrorListener(ParseErrorListener.INSTANCE); + + CommonTokenStream tokenStream = new CommonTokenStream(lexer); + PartitionFilterParser parser = new PartitionFilterParser(tokenStream); + parser.removeErrorListeners(); + parser.addErrorListener(ParseErrorListener.INSTANCE); + + try { + return toResult.apply(parser); + } catch (ParseCancellationException e) { + throw new MetaException("Error parsing partition filter: " + e.getMessage()); + } + } + + /** Case-insensitive ANTLR string stream */ + private static class UpperCaseCharStream implements CharStream { Review Comment: How about extending `CodePointCharStream` instead of wrapping it? In that case only `int LA(int i)` would be overriden. It is done for antlr 3 https://github.com/apache/hive/blob/7cd3107a76d633ef5fae2ffb8ec16953ac968092/parser/src/java/org/apache/hadoop/hive/ql/parse/ANTLRNoCaseStringStream.java#L37-L51 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
