This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new d6a04694bb8 [feat](Nereids) support nereids hint position detaction
(#39113) (#39416)
d6a04694bb8 is described below
commit d6a04694bb8a67f5731527c4a50ab81aa4aa055a
Author: LiBinfeng <[email protected]>
AuthorDate: Fri Aug 16 10:56:53 2024 +0800
[feat](Nereids) support nereids hint position detaction (#39113) (#39416)
cherry-pick from master #39113
When use hint in wrong position or use unsupport hint, use channel(2) to
filter it out
---
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 18 +++++
.../doris/nereids/parser/LogicalPlanBuilder.java | 90 +++++++++++++---------
.../apache/doris/nereids/parser/NereidsParser.java | 34 +++++++-
.../doris/nereids/parser/NereidsParserTest.java | 8 --
regression-test/data/nereids_p0/hint/test_hint.out | 81 +++++++++++++++++++
.../data/nereids_p0/hint/test_leading.out | 4 +-
.../suites/nereids_p0/hint/test_hint.groovy | 60 +++++++++++++++
7 files changed, 248 insertions(+), 47 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index bddf221f107..64d2e9ce8ca 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -72,6 +72,19 @@ lexer grammar DorisLexer;
public void markUnclosedComment() {
has_unclosed_bracketed_comment = true;
}
+
+ // This variable will hold the external state
+ private boolean channel2;
+
+ // Method to set the external state
+ public void setChannel2(boolean value) {
+ this.channel2 = value;
+ }
+
+ // Method to decide the channel based on external state
+ private boolean isChannel2() {
+ return this.channel2;
+ }
}
SEMICOLON: ';';
@@ -654,6 +667,11 @@ BRACKETED_COMMENT
: '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' |
{markUnclosedComment();} EOF) -> channel(HIDDEN)
;
+HINT_WITH_CHANNEL
+ : {isChannel2()}? HINT_START .*? HINT_END -> channel(2)
+ ;
+
+
FROM_DUAL
: 'FROM' WS+ 'DUAL' -> channel(HIDDEN);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 6e4f888c390..7d9f36750d5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -296,6 +296,12 @@ import java.util.stream.Collectors;
@SuppressWarnings({"OptionalUsedAsFieldOrParameterType",
"OptionalGetWithoutIsPresent"})
public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
+ private final Map<Integer, ParserRuleContext> selectHintMap;
+
+ public LogicalPlanBuilder(Map<Integer, ParserRuleContext> selectHintMap) {
+ this.selectHintMap = selectHintMap;
+ }
+
@SuppressWarnings("unchecked")
protected <T> T typedVisit(ParseTree ctx) {
return (T) ctx.accept(this);
@@ -604,7 +610,16 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
Optional.ofNullable(ctx.aggClause()),
Optional.ofNullable(ctx.havingClause()));
selectPlan = withQueryOrganization(selectPlan,
ctx.queryOrganization());
- return withSelectHint(selectPlan, selectCtx.selectHint());
+ if ((selectHintMap == null) || selectHintMap.isEmpty()) {
+ return selectPlan;
+ }
+ List<ParserRuleContext> selectHintContexts = Lists.newArrayList();
+ for (Integer key : selectHintMap.keySet()) {
+ if (key > selectCtx.getStart().getStopIndex() && key <
selectCtx.getStop().getStartIndex()) {
+ selectHintContexts.add(selectHintMap.get(key));
+ }
+ }
+ return withSelectHint(selectPlan, selectHintContexts);
});
}
@@ -1785,47 +1800,50 @@ public class LogicalPlanBuilder extends
DorisParserBaseVisitor<Object> {
return last;
}
- private LogicalPlan withSelectHint(LogicalPlan logicalPlan,
SelectHintContext hintContext) {
- if (hintContext == null) {
+ private LogicalPlan withSelectHint(LogicalPlan logicalPlan,
List<ParserRuleContext> hintContexts) {
+ if (hintContexts.isEmpty()) {
return logicalPlan;
}
Map<String, SelectHint> hints = Maps.newLinkedHashMap();
- for (HintStatementContext hintStatement : hintContext.hintStatements) {
- String hintName =
hintStatement.hintName.getText().toLowerCase(Locale.ROOT);
- switch (hintName) {
- case "set_var":
- Map<String, Optional<String>> parameters =
Maps.newLinkedHashMap();
- for (HintAssignmentContext kv : hintStatement.parameters) {
- if (kv.key != null) {
- String parameterName =
visitIdentifierOrText(kv.key);
- Optional<String> value = Optional.empty();
- if (kv.constantValue != null) {
- Literal literal = (Literal)
visit(kv.constantValue);
- value =
Optional.ofNullable(literal.toLegacyLiteral().getStringValue());
- } else if (kv.identifierValue != null) {
- // maybe we should throw exception when the
identifierValue is quoted identifier
- value =
Optional.ofNullable(kv.identifierValue.getText());
+ for (ParserRuleContext hintContext : hintContexts) {
+ SelectHintContext selectHintContext = (SelectHintContext)
hintContext;
+ for (HintStatementContext hintStatement :
selectHintContext.hintStatements) {
+ String hintName =
hintStatement.hintName.getText().toLowerCase(Locale.ROOT);
+ switch (hintName) {
+ case "set_var":
+ Map<String, Optional<String>> parameters =
Maps.newLinkedHashMap();
+ for (HintAssignmentContext kv :
hintStatement.parameters) {
+ if (kv.key != null) {
+ String parameterName =
visitIdentifierOrText(kv.key);
+ Optional<String> value = Optional.empty();
+ if (kv.constantValue != null) {
+ Literal literal = (Literal)
visit(kv.constantValue);
+ value =
Optional.ofNullable(literal.toLegacyLiteral().getStringValue());
+ } else if (kv.identifierValue != null) {
+ // maybe we should throw exception when
the identifierValue is quoted identifier
+ value =
Optional.ofNullable(kv.identifierValue.getText());
+ }
+ parameters.put(parameterName, value);
}
- parameters.put(parameterName, value);
}
- }
- hints.put(hintName, new SelectHintSetVar(hintName,
parameters));
- break;
- case "leading":
- List<String> leadingParameters = new ArrayList<String>();
- for (HintAssignmentContext kv : hintStatement.parameters) {
- if (kv.key != null) {
- String parameterName =
visitIdentifierOrText(kv.key);
- leadingParameters.add(parameterName);
+ hints.put(hintName, new SelectHintSetVar(hintName,
parameters));
+ break;
+ case "leading":
+ List<String> leadingParameters = new
ArrayList<String>();
+ for (HintAssignmentContext kv :
hintStatement.parameters) {
+ if (kv.key != null) {
+ String parameterName =
visitIdentifierOrText(kv.key);
+ leadingParameters.add(parameterName);
+ }
}
- }
- hints.put(hintName, new SelectHintLeading(hintName,
leadingParameters));
- break;
- case "ordered":
- hints.put(hintName, new SelectHintOrdered(hintName));
- break;
- default:
- break;
+ hints.put(hintName, new SelectHintLeading(hintName,
leadingParameters));
+ break;
+ case "ordered":
+ hints.put(hintName, new SelectHintOrdered(hintName));
+ break;
+ default:
+ break;
+ }
}
}
return new LogicalSelectHint<>(hints, logicalPlan);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
index 70065c7edae..7bf7c7737bf 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
@@ -27,13 +27,16 @@ import
org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.ParserRuleContext;
+import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import java.util.List;
+import java.util.Map;
import java.util.function.Function;
/**
@@ -80,12 +83,41 @@ public class NereidsParser {
private <T> T parse(String sql, Function<DorisParser, ParserRuleContext>
parseFunction) {
ParserRuleContext tree = toAst(sql, parseFunction);
- LogicalPlanBuilder logicalPlanBuilder = new LogicalPlanBuilder();
+ LogicalPlanBuilder logicalPlanBuilder = new
LogicalPlanBuilder(getHintMap(sql, DorisParser::selectHint));
return (T) logicalPlanBuilder.visit(tree);
}
+ /** get hint map */
+ public static Map<Integer, ParserRuleContext> getHintMap(String sql,
+
Function<DorisParser, ParserRuleContext> parseFunction) {
+ // parse hint first round
+ DorisLexer hintLexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(sql)));
+ hintLexer.setChannel2(true);
+ CommonTokenStream hintTokenStream = new CommonTokenStream(hintLexer);
+
+ Map<Integer, ParserRuleContext> selectHintMap = Maps.newHashMap();
+
+ Token hintToken = hintTokenStream.getTokenSource().nextToken();
+ while (hintToken != null && hintToken.getType() != DorisLexer.EOF) {
+ int tokenType = hintToken.getType();
+ if (tokenType == DorisLexer.HINT_WITH_CHANNEL) {
+ String hintSql = sql.substring(hintToken.getStartIndex(),
hintToken.getStopIndex() + 1);
+ DorisLexer newHintLexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(hintSql)));
+ newHintLexer.setChannel2(false);
+ CommonTokenStream newHintTokenStream = new
CommonTokenStream(newHintLexer);
+ DorisParser hintParser = new DorisParser(newHintTokenStream);
+ ParserRuleContext hintContext =
parseFunction.apply(hintParser);
+ selectHintMap.put(hintToken.getStartIndex(), hintContext);
+ }
+ hintToken = hintTokenStream.getTokenSource().nextToken();
+ }
+ return selectHintMap;
+ }
+
+ /** toAst */
private ParserRuleContext toAst(String sql, Function<DorisParser,
ParserRuleContext> parseFunction) {
DorisLexer lexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(sql)));
+ lexer.setChannel2(true);
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
DorisParser parser = new DorisParser(tokenStream);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
index 69ba9d85db9..f834efde505 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/parser/NereidsParserTest.java
@@ -341,16 +341,10 @@ public class NereidsParserTest extends ParserTestBase {
parsePlan("select * from t1 join [broadcast] t2 on t1.key1=t2.key1")
.matches(logicalJoin().when(j -> j.getHint() ==
JoinHint.BROADCAST_RIGHT));
- parsePlan("select * from t1 join /*+ broadcast */ t2 on
t1.key1=t2.key1")
- .matches(logicalJoin().when(j -> j.getHint() ==
JoinHint.BROADCAST_RIGHT));
-
// invalid hint position
parsePlan("select * from [shuffle] t1 join t2 on t1.key1=t2.key1")
.assertThrowsExactly(ParseException.class);
- parsePlan("select * from /*+ shuffle */ t1 join t2 on t1.key1=t2.key1")
- .assertThrowsExactly(ParseException.class);
-
// invalid hint content
parsePlan("select * from t1 join [bucket] t2 on t1.key1=t2.key1")
.assertThrowsExactly(ParseException.class)
@@ -361,8 +355,6 @@ public class NereidsParserTest extends ParserTestBase {
+ "----------------------^^^");
// invalid multiple hints
- parsePlan("select * from t1 join /*+ shuffle , broadcast */ t2 on
t1.key1=t2.key1")
- .assertThrowsExactly(ParseException.class);
parsePlan("select * from t1 join [shuffle,broadcast] t2 on
t1.key1=t2.key1")
.assertThrowsExactly(ParseException.class);
diff --git a/regression-test/data/nereids_p0/hint/test_hint.out
b/regression-test/data/nereids_p0/hint/test_hint.out
new file mode 100644
index 00000000000..3edf5bf51c8
--- /dev/null
+++ b/regression-test/data/nereids_p0/hint/test_hint.out
@@ -0,0 +1,81 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select1_1 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------hashJoin[INNER_JOIN](t1.c1 = t2.c2)
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute
+------------PhysicalOlapScan[t1]
+
+Hint log:
+Used: leading(t2 broadcast t1 )
+UnUsed:
+SyntaxError:
+
+-- !select1_2 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------PhysicalStorageLayerAggregate
+
+-- !select1_3 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------PhysicalStorageLayerAggregate
+
+-- !select1_4 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------PhysicalStorageLayerAggregate
+
+-- !select1_5 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------hashJoin[INNER_JOIN](t1.c1 = t2.c2)
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute
+------------PhysicalOlapScan[t1]
+
+Hint log:
+Used: leading(t2 broadcast t1 )
+UnUsed:
+SyntaxError:
+
+-- !select1_6 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------hashJoin[INNER_JOIN](t1.c1 = t2.c2)
+----------PhysicalOlapScan[t2]
+----------PhysicalDistribute
+------------PhysicalOlapScan[t1]
+
+Hint log:
+Used: leading(t2 broadcast t1 )
+UnUsed:
+SyntaxError:
+
+-- !select1_7 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------PhysicalStorageLayerAggregate
+
+-- !select1_8 --
+PhysicalResultSink
+--hashAgg[GLOBAL]
+----PhysicalDistribute
+------hashAgg[LOCAL]
+--------PhysicalStorageLayerAggregate
+
diff --git a/regression-test/data/nereids_p0/hint/test_leading.out
b/regression-test/data/nereids_p0/hint/test_leading.out
index 470dc086812..07cc7c16284 100644
--- a/regression-test/data/nereids_p0/hint/test_leading.out
+++ b/regression-test/data/nereids_p0/hint/test_leading.out
@@ -2538,8 +2538,8 @@ PhysicalResultSink
------------PhysicalOlapScan[t3]
Hint log:
-Used: leading(t1 broadcast t2 t3 )
-UnUsed:
+Used: leading(t1 broadcast t2 broadcast t3 )
+UnUsed:
SyntaxError:
-- !select95_4 --
diff --git a/regression-test/suites/nereids_p0/hint/test_hint.groovy
b/regression-test/suites/nereids_p0/hint/test_hint.groovy
new file mode 100644
index 00000000000..d847f17fe64
--- /dev/null
+++ b/regression-test/suites/nereids_p0/hint/test_hint.groovy
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+suite("test_hint") {
+ // create database and tables
+ sql 'DROP DATABASE IF EXISTS test_hint'
+ sql 'CREATE DATABASE IF NOT EXISTS test_hint'
+ sql 'use test_hint'
+
+ // setting planner to nereids
+ sql 'set exec_mem_limit=21G'
+ sql 'set be_number_for_test=1'
+ sql 'set parallel_pipeline_task_num=1'
+ sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION"
+ sql 'set enable_nereids_planner=true'
+ sql "set ignore_shape_nodes='PhysicalProject'"
+ sql 'set enable_fallback_to_original_planner=false'
+ sql 'set runtime_filter_mode=OFF'
+
+ // create tables
+ sql """drop table if exists t1;"""
+ sql """drop table if exists t2;"""
+
+ sql """create table t1 (c1 int, c11 int) distributed by hash(c1) buckets 3
properties('replication_num' = '1');"""
+ sql """create table t2 (c2 int, c22 int) distributed by hash(c2) buckets 3
properties('replication_num' = '1');"""
+
+// test hint positions, remove join in order to make sure shape stable when no
use hint
+ qt_select1_1 """explain shape plan select /*+ leading(t2 broadcast t1) */
count(*) from t1 join t2 on c1 = c2;"""
+
+ qt_select1_2 """explain shape plan /*+ leading(t2 broadcast t1) */ select
count(*) from t1;"""
+
+ qt_select1_3 """explain shape plan select /*+DBP: ROUTE={GROUP_ID(zjaq)}*/
count(*) from t1;"""
+
+ qt_select1_4 """explain shape plan/*+DBP: ROUTE={GROUP_ID(zjaq)}*/ select
count(*) from t1;"""
+
+ qt_select1_5 """explain shape plan /*+ leading(t2 broadcast t1) */ select
/*+ leading(t2 broadcast t1) */ count(*) from t1 join t2 on c1 = c2;"""
+
+ qt_select1_6 """explain shape plan/*+DBP: ROUTE={GROUP_ID(zjaq)}*/ select
/*+ leading(t2 broadcast t1) */ count(*) from t1 join t2 on c1 = c2;"""
+
+ qt_select1_7 """explain shape plan /*+ leading(t2 broadcast t1) */ select
/*+DBP: ROUTE={GROUP_ID(zjaq)}*/ count(*) from t1;"""
+
+ qt_select1_8 """explain shape plan /*+DBP: ROUTE={GROUP_ID(zjaq)}*/ select
/*+DBP: ROUTE={GROUP_ID(zjaq)}*/ count(*) from t1;"""
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]