This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b4272eb8fec [fix](Nereids) fix lexer Backtracking or Ambiguity cause
of key word duplicate (#39590) (#41033)
b4272eb8fec is described below
commit b4272eb8fece8b69a735440d65d6a795bb5d0bf2
Author: LiBinfeng <[email protected]>
AuthorDate: Fri Sep 20 11:27:00 2024 +0800
[fix](Nereids) fix lexer Backtracking or Ambiguity cause of key word
duplicate (#39590) (#41033)
cherry-pick: #39590
nereids timeout cause of parser cost too long time intro by:
https://github.com/apache/doris/pull/39113
---
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 36 ++--------------------
.../antlr4/org/apache/doris/nereids/DorisParser.g4 | 5 ++-
.../apache/doris/nereids/parser/NereidsParser.java | 8 ++---
3 files changed, 8 insertions(+), 41 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index dc5d6998838..abc52a812f9 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -47,22 +47,6 @@ lexer grammar DorisLexer;
}
}
- /**
- * This method will be called when we see '/*' and try to match it as a
bracketed comment.
- * If the next character is '+', it should be parsed as hint later, and we
cannot match
- * it as a bracketed comment.
- *
- * Returns true if the next character is '+'.
- */
- public boolean isHint() {
- int nextChar = _input.LA(1);
- if (nextChar == '+') {
- return true;
- } else {
- return false;
- }
- }
-
/**
* This method will be called when the character stream ends and try to find
out the
* unclosed bracketed comment.
@@ -72,19 +56,6 @@ lexer grammar DorisLexer;
public void markUnclosedComment() {
has_unclosed_bracketed_comment = true;
}
-
- // This variable will hold the external state
- private boolean channel2;
-
- // Method to set the external state
- public void setChannel2(boolean value) {
- this.channel2 = value;
- }
-
- // Method to decide the channel based on external state
- private boolean isChannel2() {
- return this.channel2;
- }
}
SEMICOLON: ';';
@@ -627,6 +598,7 @@ COLON: ':';
ARROW: '->';
HINT_START: '/*+';
HINT_END: '*/';
+COMMENT_START: '/*';
ATSIGN: '@';
DOUBLEATSIGN: '@@';
@@ -706,11 +678,7 @@ SIMPLE_COMMENT
;
BRACKETED_COMMENT
- : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' |
{markUnclosedComment();} EOF) -> channel(HIDDEN)
- ;
-
-HINT_WITH_CHANNEL
- : {isChannel2()}? HINT_START .*? HINT_END -> channel(2)
+ : COMMENT_START ( BRACKETED_COMMENT | . )*? ('*/' |
{markUnclosedComment();} EOF) -> channel(2)
;
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 957a10c3545..f449da72716 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
@@ -1192,7 +1192,7 @@ havingClause
: HAVING booleanExpression
;
-selectHint: HINT_START hintStatements+=hintStatement (COMMA?
hintStatements+=hintStatement)* HINT_END;
+selectHint: hintStatements+=hintStatement (COMMA?
hintStatements+=hintStatement)* HINT_END;
hintStatement
: hintName=identifier (LEFT_PAREN parameters+=hintAssignment (COMMA?
parameters+=hintAssignment)* RIGHT_PAREN)?
@@ -1809,6 +1809,7 @@ nonReserved
| COLOCATE
| COLUMNS
| COMMENT
+ | COMMENT_START
| COMMIT
| COMMITTED
| COMPACT
@@ -1894,6 +1895,8 @@ nonReserved
| HASH
| HDFS
| HELP
+ | HINT_END
+ | HINT_START
| HISTOGRAM
| HLL_UNION
| HOSTNAME
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
index 3ba1e0e5431..f6f05392cb8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/NereidsParser.java
@@ -298,18 +298,15 @@ public class NereidsParser {
Function<DorisParser, ParserRuleContext> parseFunction) {
// parse hint first round
DorisLexer hintLexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(sql)));
- hintLexer.setChannel2(true);
CommonTokenStream hintTokenStream = new CommonTokenStream(hintLexer);
Map<Integer, ParserRuleContext> selectHintMap = Maps.newHashMap();
Token hintToken = hintTokenStream.getTokenSource().nextToken();
while (hintToken != null && hintToken.getType() != DorisLexer.EOF) {
- int tokenType = hintToken.getType();
- if (tokenType == DorisLexer.HINT_WITH_CHANNEL) {
- String hintSql = sql.substring(hintToken.getStartIndex(),
hintToken.getStopIndex() + 1);
+ if (hintToken.getChannel() == 2 &&
sql.charAt(hintToken.getStartIndex() + 2) == '+') {
+ String hintSql = sql.substring(hintToken.getStartIndex() + 3,
hintToken.getStopIndex() + 1);
DorisLexer newHintLexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(hintSql)));
- newHintLexer.setChannel2(false);
CommonTokenStream newHintTokenStream = new
CommonTokenStream(newHintLexer);
DorisParser hintParser = new DorisParser(newHintTokenStream);
ParserRuleContext hintContext =
parseFunction.apply(hintParser);
@@ -323,7 +320,6 @@ public class NereidsParser {
/** toAst */
public static ParserRuleContext toAst(String sql, Function<DorisParser,
ParserRuleContext> parseFunction) {
DorisLexer lexer = new DorisLexer(new
CaseInsensitiveStream(CharStreams.fromString(sql)));
- lexer.setChannel2(true);
CommonTokenStream tokenStream = new CommonTokenStream(lexer);
DorisParser parser = new DorisParser(tokenStream);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]