This is an automated email from the ASF dual-hosted git repository. qiaojialin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push: new 88feb511cb5 Move path related antlr defined files to tsfile module (#11813) 88feb511cb5 is described below commit 88feb511cb562c12d3a37c84d1c27f99282bd36c Author: Jackie Tien <jackietie...@gmail.com> AuthorDate: Tue Jan 2 20:00:11 2024 +0800 Move path related antlr defined files to tsfile module (#11813) --- .gitignore | 2 + .../db/queryengine/plan/parser/ASTVisitor.java | 7 +- iotdb-core/tsfile/pom.xml | 56 +++++- .../antlr4/org/apache/tsfile/parser/PathLexer.g4 | 212 +++++++++++++++++++++ .../antlr4/org/apache/tsfile/parser}/PathParser.g4 | 9 +- .../tsfile/common/constant/TsFileConstant.java | 4 - .../read/common/parser/PathNodesGenerator.java | 8 +- .../tsfile/read/common/parser/PathVisitor.java | 6 +- 8 files changed, 285 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index b33d12e672b..2ec38495e28 100644 --- a/.gitignore +++ b/.gitignore @@ -124,6 +124,8 @@ iotdb-client/client-py/LICENSE iotdb-core/antlr/gen/ iotdb-core/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/gen/ iotdb-core/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/IoTDBSqlLexer.tokens +iotdb-core/tsfile/gen/ +iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/gen/ # Develocity .mvn/.gradle-enterprise/ diff --git a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/parser/ASTVisitor.java b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/parser/ASTVisitor.java index 3f12fe84b30..2d31694eab6 100644 --- a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/parser/ASTVisitor.java +++ b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/plan/parser/ASTVisitor.java @@ -223,6 +223,7 @@ import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Consumer; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.apache.iotdb.commons.schema.SchemaConstant.ALL_RESULT_NODES; @@ -259,6 +260,10 @@ public class ASTVisitor extends IoTDBSqlParserBaseVisitor<Statement> { private static final String LIMIT_CONFIGURATION_ENABLED_ERROR_MSG = "Limit configuration is not enabled, please enable it first."; + private static final String NODE_NAME_IN_INTO_PATH_MATCHER = "([a-zA-Z0-9_${}\\u2E80-\\u9FFF]+)"; + private static final Pattern NODE_NAME_IN_INTO_PATH_PATTERN = + Pattern.compile(NODE_NAME_IN_INTO_PATH_MATCHER); + private static final String IGNORENULL = "IgnoreNull"; private ZoneId zoneId; @@ -2032,7 +2037,7 @@ public class ASTVisitor extends IoTDBSqlParserBaseVisitor<Statement> { private static void checkNodeNameInIntoPath(String src) { // ${} are allowed - if (!TsFileConstant.NODE_NAME_IN_INTO_PATH_PATTERN.matcher(src).matches()) { + if (!NODE_NAME_IN_INTO_PATH_PATTERN.matcher(src).matches()) { throw new SemanticException( String.format( "%s is illegal, unquoted node name in select into clause can only consist of digits, characters, $, { and }", diff --git a/iotdb-core/tsfile/pom.xml b/iotdb-core/tsfile/pom.xml index 07db3bbc312..4849955a60b 100644 --- a/iotdb-core/tsfile/pom.xml +++ b/iotdb-core/tsfile/pom.xml @@ -40,11 +40,6 @@ <artifactId>common-api</artifactId> <version>${project.version}</version> </dependency> - <dependency> - <groupId>org.apache.iotdb</groupId> - <artifactId>iotdb-antlr</artifactId> - <version>1.3.1-SNAPSHOT</version> - </dependency> <dependency> <groupId>com.github.luben</groupId> <artifactId>zstd-jni</artifactId> @@ -113,6 +108,57 @@ </dependencies> <build> <plugins> + <plugin> + <groupId>org.antlr</groupId> + <artifactId>antlr4-maven-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>antlr4</goal> + </goals> + <configuration> + <listener>false</listener> + <visitor>true</visitor> + <libDirectory>src/main/antlr4/org/apache/tsfile/parser</libDirectory> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + <executions> + <execution> + <id>add-source</id> + <goals> + <goal>add-source</goal> + </goals> + <phase>generate-sources</phase> + <configuration> + <sources> + <source>${project.build.directory}/generated-sources/antlr4</source> + </sources> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-clean-plugin</artifactId> + <configuration> + <filesets> + <fileset> + <directory>${basedir}/gen</directory> + </fileset> + <fileset> + <directory>${basedir}/src</directory> + <includes> + <include>**/*.tokens</include> + </includes> + </fileset> + </filesets> + </configuration> + </plugin> <!-- Generate an OSGI compatible MANIFEST file. --> diff --git a/iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathLexer.g4 b/iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathLexer.g4 new file mode 100644 index 00000000000..0f682f4ea04 --- /dev/null +++ b/iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathLexer.g4 @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +lexer grammar PathLexer; + +ROOT + : R O O T + ; + +/** + * 1. Whitespace + */ + +// Instead of discarding whitespace completely, send them to a channel invisable to the parser, so +// that the lexer could still produce WS tokens for the CLI's highlighter. +WS + : + [ \u000B\t\r\n]+ -> channel(HIDDEN) + ; + +/** + * 2. Keywords, new keywords should be added into IdentifierParser.g4 + */ + +// Common Keywords + +TIME + : T I M E + ; + +TIMESTAMP + : T I M E S T A M P + ; + +/** + * 3. Operators + */ + +// Operators. Arithmetics + +MINUS : '-'; +PLUS : '+'; +DIV : '/'; +MOD : '%'; + + +// Operators. Comparation + +OPERATOR_DEQ : '=='; +OPERATOR_SEQ : '='; +OPERATOR_GT : '>'; +OPERATOR_GTE : '>='; +OPERATOR_LT : '<'; +OPERATOR_LTE : '<='; +OPERATOR_NEQ : '!=' | '<>'; + +OPERATOR_BITWISE_AND : '&'; + +OPERATOR_LOGICAL_AND : '&&'; + +OPERATOR_BITWISE_OR : '|'; + +OPERATOR_LOGICAL_OR : '||'; + +OPERATOR_NOT : '!'; + +/** + * 4. Constructors Symbols + */ + +DOT : '.'; +COMMA : ','; +SEMI: ';'; +STAR: '*'; +DOUBLE_STAR: '**'; +LR_BRACKET : '('; +RR_BRACKET : ')'; +LS_BRACKET : '['; +RS_BRACKET : ']'; +DOUBLE_COLON: '::'; + +/** + * 5. Literals + */ + +// String Literal + +STRING_LITERAL + : DQUOTA_STRING + | SQUOTA_STRING + ; + + +// Date & Time Literal + +DURATION_LITERAL + : (INTEGER_LITERAL+ (Y|M O|W|D|H|M|S|M S|U S|N S))+ + ; + +DATETIME_LITERAL + : DATE_LITERAL ((T | WS) TIME_LITERAL (('+' | '-') INTEGER_LITERAL ':' INTEGER_LITERAL)?)? + ; + +fragment DATE_LITERAL + : INTEGER_LITERAL '-' INTEGER_LITERAL '-' INTEGER_LITERAL + | INTEGER_LITERAL '/' INTEGER_LITERAL '/' INTEGER_LITERAL + | INTEGER_LITERAL '.' INTEGER_LITERAL '.' INTEGER_LITERAL + ; + +fragment TIME_LITERAL + : INTEGER_LITERAL ':' INTEGER_LITERAL ':' INTEGER_LITERAL (DOT INTEGER_LITERAL)? + ; + +// Number Literal + +INTEGER_LITERAL + : DEC_DIGIT+ + ; + +EXPONENT_NUM_PART + : DEC_DIGIT+ ('e'|'E') ('+'|'-')? DEC_DIGIT+ + ; + +fragment DEC_DIGIT + : [0-9] + ; + + +ID + : NAME_CHAR+ + ; + +QUOTED_ID + : BQUOTA_STRING + ; + + + +fragment NAME_CHAR + : 'A'..'Z' + | 'a'..'z' + | '0'..'9' + | '_' + | ':' + | '@' + | '#' + | '$' + | '{' + | '}' + | CN_CHAR + ; + +fragment CN_CHAR + : '\u2E80'..'\u9FFF' + ; + +fragment DQUOTA_STRING + : '"' ( '""' | ~('"') )* '"' + ; + +fragment SQUOTA_STRING + : '\'' ( '\'\'' | ~('\'') )* '\'' + ; + +fragment BQUOTA_STRING + : '`' ( '``' | ~('`') )* '`' + ; + +// Characters and write it this way for case sensitivity + +fragment A: [aA]; +fragment B: [bB]; +fragment C: [cC]; +fragment D: [dD]; +fragment E: [eE]; +fragment F: [fF]; +fragment G: [gG]; +fragment H: [hH]; +fragment I: [iI]; +fragment J: [jJ]; +fragment K: [kK]; +fragment L: [lL]; +fragment M: [mM]; +fragment N: [nN]; +fragment O: [oO]; +fragment P: [pP]; +fragment Q: [qQ]; +fragment R: [rR]; +fragment S: [sS]; +fragment T: [tT]; +fragment U: [uU]; +fragment V: [vV]; +fragment W: [wW]; +fragment X: [xX]; +fragment Y: [yY]; +fragment Z: [zZ]; \ No newline at end of file diff --git a/iotdb-core/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/PathParser.g4 b/iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathParser.g4 similarity index 92% rename from iotdb-core/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/PathParser.g4 rename to iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathParser.g4 index eaf38e28edc..91acbed70ac 100644 --- a/iotdb-core/antlr/src/main/antlr4/org/apache/iotdb/db/qp/sql/PathParser.g4 +++ b/iotdb-core/tsfile/src/main/antlr4/org/apache/tsfile/parser/PathParser.g4 @@ -19,9 +19,8 @@ parser grammar PathParser; -options { tokenVocab=SqlLexer; } +options { tokenVocab=PathLexer; } -import IdentifierParser; /** * PartialPath and Path used by Session API and TsFile API should be parsed by Antlr4. @@ -56,6 +55,12 @@ nodeNameSlice | INTEGER_LITERAL ; +identifier + : DURATION_LITERAL + | ID + | QUOTED_ID + ; + wildcard : STAR | DOUBLE_STAR diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/common/constant/TsFileConstant.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/common/constant/TsFileConstant.java index b7fd8ccbe15..ae3dc1bcd04 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/common/constant/TsFileConstant.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/common/constant/TsFileConstant.java @@ -48,9 +48,5 @@ public class TsFileConstant { private static final String NODE_NAME_MATCHER = "(\\*{0,2}[a-zA-Z0-9_\\u2E80-\\u9FFF]+\\*{0,2})"; public static final Pattern NODE_NAME_PATTERN = Pattern.compile(NODE_NAME_MATCHER); - private static final String NODE_NAME_IN_INTO_PATH_MATCHER = "([a-zA-Z0-9_${}\\u2E80-\\u9FFF]+)"; - public static final Pattern NODE_NAME_IN_INTO_PATH_PATTERN = - Pattern.compile(NODE_NAME_IN_INTO_PATH_MATCHER); - private TsFileConstant() {} } diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathNodesGenerator.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathNodesGenerator.java index 5aa49cffbd2..7d62a62ea92 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathNodesGenerator.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathNodesGenerator.java @@ -19,8 +19,6 @@ package org.apache.iotdb.tsfile.read.common.parser; -import org.apache.iotdb.db.qp.sql.PathParser; -import org.apache.iotdb.db.qp.sql.SqlLexer; import org.apache.iotdb.tsfile.exception.PathParseException; import org.antlr.v4.runtime.CharStream; @@ -29,6 +27,8 @@ import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.misc.ParseCancellationException; import org.antlr.v4.runtime.tree.ParseTree; +import org.apache.tsfile.parser.PathLexer; +import org.apache.tsfile.parser.PathParser; /** convert String path to String[] nodes * */ public class PathNodesGenerator { @@ -60,7 +60,7 @@ public class PathNodesGenerator { CharStream charStream1 = CharStreams.fromString(path); - SqlLexer lexer1 = new SqlLexer(charStream1); + PathLexer lexer1 = new PathLexer(charStream1); lexer1.removeErrorListeners(); lexer1.addErrorListener(PathParseError.INSTANCE); @@ -80,7 +80,7 @@ public class PathNodesGenerator { } catch (Exception ex) { CharStream charStream2 = CharStreams.fromString(path); - SqlLexer lexer2 = new SqlLexer(charStream2); + PathLexer lexer2 = new PathLexer(charStream2); lexer2.removeErrorListeners(); lexer2.addErrorListener(PathParseError.INSTANCE); diff --git a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathVisitor.java b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathVisitor.java index d45dd93c1aa..0c36f7a0d72 100644 --- a/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathVisitor.java +++ b/iotdb-core/tsfile/src/main/java/org/apache/iotdb/tsfile/read/common/parser/PathVisitor.java @@ -19,12 +19,12 @@ package org.apache.iotdb.tsfile.read.common.parser; -import org.apache.iotdb.db.qp.sql.PathParser; -import org.apache.iotdb.db.qp.sql.PathParser.NodeNameContext; -import org.apache.iotdb.db.qp.sql.PathParserBaseVisitor; import org.apache.iotdb.tsfile.common.constant.TsFileConstant; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.tsfile.parser.PathParser; +import org.apache.tsfile.parser.PathParser.NodeNameContext; +import org.apache.tsfile.parser.PathParserBaseVisitor; import java.util.List;