This is an automated email from the ASF dual-hosted git repository.
VGalaxies pushed a commit to branch feature/subscription-column-filter
in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 6a626bfd2069a92f15d436ff68db8ad0402f1503
Author: Codex <codex@localhost>
AuthorDate: Fri Jun 12 15:53:23 2026 +0000
Use generated grammar for column filter parsing
---
.../columnfilter/ColumnFilterParser.java | 455 ++++++++-------------
1 file changed, 175 insertions(+), 280 deletions(-)
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/subscription/columnfilter/ColumnFilterParser.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/subscription/columnfilter/ColumnFilterParser.java
index 94e573bbd7d..8b785988b25 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/subscription/columnfilter/ColumnFilterParser.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/subscription/columnfilter/ColumnFilterParser.java
@@ -33,15 +33,50 @@ import
org.apache.iotdb.commons.queryengine.plan.relational.sql.ast.NotExpressio
import
org.apache.iotdb.commons.queryengine.plan.relational.sql.ast.QualifiedName;
import
org.apache.iotdb.commons.queryengine.plan.relational.sql.ast.StringLiteral;
import
org.apache.iotdb.commons.queryengine.plan.relational.sql.parser.ParsingException;
+import org.apache.iotdb.db.relational.grammar.sql.ColumnFilterBaseVisitor;
+import org.apache.iotdb.db.relational.grammar.sql.ColumnFilterLexer;
import org.apache.iotdb.rpc.subscription.exception.SubscriptionException;
+import org.antlr.v4.runtime.BaseErrorListener;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.DefaultErrorStrategy;
+import org.antlr.v4.runtime.InputMismatchException;
+import org.antlr.v4.runtime.Parser;
+import org.antlr.v4.runtime.RecognitionException;
+import org.antlr.v4.runtime.Recognizer;
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.atn.PredictionMode;
+import org.antlr.v4.runtime.tree.TerminalNode;
+
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
-import java.util.Locale;
+import java.util.Objects;
+import java.util.regex.Pattern;
public class ColumnFilterParser {
+ private static final Pattern SINGLE_FIELD_PATTERN =
+
Pattern.compile("\\s*(?:[A-Za-z_][A-Za-z_0-9]*|\"(?:\"\"|[^\"])*\")\\s*");
+ private static final Pattern FUNCTION_CALL_START_PATTERN =
+
Pattern.compile("\\s*(?:[A-Za-z_][A-Za-z_0-9]*|\"(?:\"\"|[^\"])*\")\\s*\\(.*");
+ private static final Pattern UNQUOTED_COMPARISON_RIGHT_PATTERN =
+ Pattern.compile("(?is).*(?:!=|<>|=)\\s*[A-Za-z_][A-Za-z_0-9]*\\s*");
+
+ private static final BaseErrorListener ERROR_LISTENER =
+ new BaseErrorListener() {
+ @Override
+ public void syntaxError(
+ final Recognizer<?, ?> recognizer,
+ final Object offendingSymbol,
+ final int line,
+ final int charPositionInLine,
+ final String message,
+ final RecognitionException e) {
+ throw new ParsingException(message, e, line, charPositionInLine + 1);
+ }
+ };
+
public Expression parseAndValidate(final String rawColumnFilter) throws
SubscriptionException {
try {
final Expression expression = parse(rawColumnFilter);
@@ -55,325 +90,185 @@ public class ColumnFilterParser {
Expression parse(final String rawColumnFilter) {
if (rawColumnFilter == null || rawColumnFilter.trim().isEmpty()) {
- throw parsingException("column-filter should not be empty", 0);
+ throw new ParsingException("column-filter should not be empty", null, 1,
1);
}
- return new InternalParser(tokenize(rawColumnFilter)).parse();
- }
-
- private static List<Token> tokenize(final String expression) {
- final List<Token> tokens = new ArrayList<>();
- int offset = 0;
- while (offset < expression.length()) {
- final char ch = expression.charAt(offset);
- if (Character.isWhitespace(ch)) {
- offset++;
- continue;
- }
-
- switch (ch) {
- case '(':
- tokens.add(new Token(TokenType.LEFT_PAREN, "(", offset));
- offset++;
- continue;
- case ')':
- tokens.add(new Token(TokenType.RIGHT_PAREN, ")", offset));
- offset++;
- continue;
- case ',':
- tokens.add(new Token(TokenType.COMMA, ",", offset));
- offset++;
- continue;
- case '=':
- tokens.add(new Token(TokenType.EQ, "=", offset));
- offset++;
- continue;
- case '!':
- if (offset + 1 < expression.length() && expression.charAt(offset +
1) == '=') {
- tokens.add(new Token(TokenType.NEQ, "!=", offset));
- offset += 2;
- continue;
- }
- throw parsingException("unexpected character '!'", offset);
- case '<':
- if (offset + 1 < expression.length() && expression.charAt(offset +
1) == '>') {
- tokens.add(new Token(TokenType.NEQ, "<>", offset));
- offset += 2;
- continue;
- }
- throw parsingException("unsupported comparison operator '<'",
offset);
- case '>':
- throw parsingException("unsupported comparison operator '>'",
offset);
- case '"':
- final int start = offset;
- final StringBuilder builder = new StringBuilder();
- offset++;
- while (offset < expression.length()) {
- final char current = expression.charAt(offset);
- if (current == '"') {
- if (offset + 1 < expression.length() && expression.charAt(offset
+ 1) == '"') {
- builder.append('"');
- offset += 2;
- continue;
- }
- offset++;
- tokens.add(new Token(TokenType.QUOTED, builder.toString(),
start));
- break;
- }
- builder.append(current);
- offset++;
- }
- if (tokens.isEmpty() || tokens.get(tokens.size() - 1).position !=
start) {
- throw parsingException("unterminated quoted literal", start);
- }
- continue;
- default:
- if (isIdentifierStart(ch)) {
- final int startPosition = offset;
- offset++;
- while (offset < expression.length() &&
isIdentifierPart(expression.charAt(offset))) {
- offset++;
+ validateUnsupportedSyntax(rawColumnFilter);
+
+ final ColumnFilterLexer lexer = new
ColumnFilterLexer(CharStreams.fromString(rawColumnFilter));
+ final CommonTokenStream tokenStream = new CommonTokenStream(lexer);
+ final org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser parser
=
+ new
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser(tokenStream);
+
+ lexer.removeErrorListeners();
+ lexer.addErrorListener(ERROR_LISTENER);
+ parser.removeErrorListeners();
+ parser.addErrorListener(ERROR_LISTENER);
+ parser.setErrorHandler(
+ new DefaultErrorStrategy() {
+ @Override
+ public Token recoverInline(final Parser recognizer) throws
RecognitionException {
+ if (nextTokensContext == null) {
+ throw new InputMismatchException(recognizer);
}
- final String text = expression.substring(startPosition, offset);
- tokens.add(new Token(keywordType(text), text, startPosition));
- continue;
+ throw new InputMismatchException(recognizer, nextTokensState,
nextTokensContext);
}
- throw parsingException(String.format("unexpected character '%s'",
ch), offset);
- }
- }
- tokens.add(new Token(TokenType.EOF, "", expression.length()));
- return tokens;
- }
-
- private static boolean isIdentifierStart(final char ch) {
- return Character.isLetter(ch) || ch == '_';
- }
-
- private static boolean isIdentifierPart(final char ch) {
- return Character.isLetterOrDigit(ch) || ch == '_';
- }
+ });
- private static TokenType keywordType(final String text) {
- switch (text.toUpperCase(Locale.ROOT)) {
- case "TRUE":
- return TokenType.TRUE;
- case "FALSE":
- return TokenType.FALSE;
- case "AND":
- return TokenType.AND;
- case "OR":
- return TokenType.OR;
- case "NOT":
- return TokenType.NOT;
- case "IN":
- return TokenType.IN;
- case "LIKE":
- return TokenType.LIKE;
- case "REGEXP":
- return TokenType.REGEXP;
- case "IS":
- return TokenType.IS;
- case "NULL":
- return TokenType.NULL;
- case "ESCAPE":
- return TokenType.ESCAPE;
- default:
- return TokenType.IDENTIFIER;
+ try {
+ parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
+ return new AstBuilder().visit(parser.columnFilter());
+ } catch (final ParsingException e) {
+ tokenStream.seek(0);
+ parser.reset();
+ parser.getInterpreter().setPredictionMode(PredictionMode.LL);
+ return new AstBuilder().visit(parser.columnFilter());
}
}
- private static ParsingException parsingException(final String message, final
int position) {
- return new ParsingException(message, null, 1, position + 1);
- }
-
- private enum TokenType {
- IDENTIFIER,
- QUOTED,
- TRUE,
- FALSE,
- AND,
- OR,
- NOT,
- IN,
- LIKE,
- REGEXP,
- IS,
- NULL,
- ESCAPE,
- EQ,
- NEQ,
- LEFT_PAREN,
- RIGHT_PAREN,
- COMMA,
- EOF
- }
-
- private static class Token {
- private final TokenType type;
- private final String text;
- private final int position;
-
- private Token(final TokenType type, final String text, final int position)
{
- this.type = type;
- this.text = text;
- this.position = position;
+ private static void validateUnsupportedSyntax(final String rawColumnFilter) {
+ final String trimmedColumnFilter = rawColumnFilter.trim();
+ if ((SINGLE_FIELD_PATTERN.matcher(rawColumnFilter).matches()
+ && !"true".equalsIgnoreCase(trimmedColumnFilter)
+ && !"false".equalsIgnoreCase(trimmedColumnFilter))
+ || FUNCTION_CALL_START_PATTERN.matcher(rawColumnFilter).matches()) {
+ throw new ParsingException("expected column predicate operator", null,
1, 1);
+ }
+ if (UNQUOTED_COMPARISON_RIGHT_PATTERN.matcher(rawColumnFilter).matches()) {
+ throw new ParsingException("expected string literal", null, 1, 1);
+ }
+ for (int i = 0; i < rawColumnFilter.length(); i++) {
+ final char ch = rawColumnFilter.charAt(i);
+ if (ch == '<') {
+ if (i + 1 < rawColumnFilter.length() && rawColumnFilter.charAt(i + 1)
== '>') {
+ i++;
+ continue;
+ }
+ throw new ParsingException("unsupported comparison operator '<'",
null, 1, i + 1);
+ }
+ if (ch == '>') {
+ throw new ParsingException("unsupported comparison operator '>'",
null, 1, i + 1);
+ }
+ if (ch == '+') {
+ throw new ParsingException("unexpected character '+'", null, 1, i + 1);
+ }
}
}
- private static class InternalParser {
-
- private final List<Token> tokens;
- private int cursor;
-
- private InternalParser(final List<Token> tokens) {
- this.tokens = tokens;
- }
+ private static class AstBuilder extends ColumnFilterBaseVisitor<Expression> {
- private Expression parse() {
- final Expression expression = parseOr();
- expect(TokenType.EOF);
- return expression;
+ @Override
+ public Expression visitColumnFilter(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.ColumnFilterContext
+ context) {
+ return visit(context.booleanExpression());
}
- private Expression parseOr() {
- Expression result = parseAnd();
- while (match(TokenType.OR)) {
- result = LogicalExpression.or(result, parseAnd());
- }
- return result;
+ @Override
+ public Expression visitPredicateExpression(
+ final org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser
+ .PredicateExpressionContext
+ context) {
+ return visit(context.predicate());
}
- private Expression parseAnd() {
- Expression result = parseNot();
- while (match(TokenType.AND)) {
- result = LogicalExpression.and(result, parseNot());
- }
- return result;
+ @Override
+ public Expression visitLogicalNot(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.LogicalNotContext
+ context) {
+ return new NotExpression(visit(context.booleanExpression()));
}
- private Expression parseNot() {
- if (match(TokenType.NOT)) {
- return new NotExpression(parseNot());
- }
- return parsePredicate();
+ @Override
+ public Expression visitLogicalBinary(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.LogicalBinaryContext
+ context) {
+ final Expression left = visit(context.booleanExpression(0));
+ final Expression right = visit(context.booleanExpression(1));
+ return Objects.nonNull(context.AND())
+ ? LogicalExpression.and(left, right)
+ : LogicalExpression.or(left, right);
}
- private Expression parsePredicate() {
- if (match(TokenType.LEFT_PAREN)) {
- final Expression expression = parseOr();
- expect(TokenType.RIGHT_PAREN);
- return expression;
+ @Override
+ public Expression visitPredicate(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.PredicateContext
+ context) {
+ if (Objects.nonNull(context.booleanValue())) {
+ return visit(context.booleanValue());
}
- if (match(TokenType.TRUE)) {
- return new BooleanLiteral("true");
- }
- if (match(TokenType.FALSE)) {
- return new BooleanLiteral("false");
+ if (Objects.nonNull(context.booleanExpression())) {
+ return visit(context.booleanExpression());
}
- final Identifier field = parseField();
- if (match(TokenType.EQ)) {
- return new ComparisonExpression(
- ComparisonExpression.Operator.EQUAL, field, parseStringLiteral());
- }
- if (match(TokenType.NEQ)) {
+ final Identifier field = toIdentifier(context.field());
+ if (Objects.nonNull(context.comparisonOperator())) {
return new ComparisonExpression(
- ComparisonExpression.Operator.NOT_EQUAL, field,
parseStringLiteral());
+ Objects.nonNull(context.comparisonOperator().EQ())
+ ? ComparisonExpression.Operator.EQUAL
+ : ComparisonExpression.Operator.NOT_EQUAL,
+ field,
+ toStringLiteral(context.string(0)));
}
- if (match(TokenType.NOT)) {
- if (match(TokenType.IN)) {
- return new NotExpression(parseInPredicate(field));
- }
- if (match(TokenType.LIKE)) {
- return new NotExpression(parseLikePredicate(field));
+ if (Objects.nonNull(context.IN())) {
+ final List<Expression> values = new ArrayList<>();
+ for (final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.StringContext
+ string : context.string()) {
+ values.add(toStringLiteral(string));
}
- if (match(TokenType.REGEXP)) {
- return new NotExpression(parseRegexpFunction(field));
- }
- throw parsingException("expected IN, LIKE, or REGEXP after NOT",
previous().position);
- }
- if (match(TokenType.IN)) {
- return parseInPredicate(field);
- }
- if (match(TokenType.LIKE)) {
- return parseLikePredicate(field);
- }
- if (match(TokenType.REGEXP)) {
- return parseRegexpFunction(field);
+ return maybeNegate(
+ new InPredicate(field, new InListExpression(values)),
Objects.nonNull(context.NOT()));
}
- if (match(TokenType.IS)) {
- final boolean isNot = match(TokenType.NOT);
- expect(TokenType.NULL);
- final Expression isNull = new IsNullPredicate(field);
- return isNot ? new NotExpression(isNull) : isNull;
+ if (Objects.nonNull(context.LIKE())) {
+ final Expression like =
+ context.string().size() > 1
+ ? new LikePredicate(
+ field, toStringLiteral(context.string(0)),
toStringLiteral(context.string(1)))
+ : new LikePredicate(field, toStringLiteral(context.string(0)));
+ return maybeNegate(like, Objects.nonNull(context.NOT()));
}
-
- throw parsingException("expected column predicate operator",
peek().position);
- }
-
- private InPredicate parseInPredicate(final Identifier field) {
- expect(TokenType.LEFT_PAREN);
- final List<Expression> values = new ArrayList<>();
- values.add(parseStringLiteral());
- while (match(TokenType.COMMA)) {
- values.add(parseStringLiteral());
+ if (Objects.nonNull(context.REGEXP())) {
+ final Expression regexp =
+ new FunctionCall(
+ QualifiedName.of("regexp_like"),
+ List.of(field, toStringLiteral(context.string(0))));
+ return maybeNegate(regexp, Objects.nonNull(context.NOT()));
}
- expect(TokenType.RIGHT_PAREN);
- return new InPredicate(field, new InListExpression(values));
- }
-
- private LikePredicate parseLikePredicate(final Identifier field) {
- final StringLiteral pattern = parseStringLiteral();
- if (match(TokenType.ESCAPE)) {
- return new LikePredicate(field, pattern, parseStringLiteral());
+ if (Objects.nonNull(context.IS())) {
+ return maybeNegate(new IsNullPredicate(field),
Objects.nonNull(context.NOT()));
}
- return new LikePredicate(field, pattern);
- }
-
- private FunctionCall parseRegexpFunction(final Identifier field) {
- return new FunctionCall(
- QualifiedName.of("regexp_like"), Arrays.asList(field,
parseStringLiteral()));
- }
- private Identifier parseField() {
- if (match(TokenType.IDENTIFIER)) {
- return new Identifier(previous().text, false);
- }
- if (match(TokenType.QUOTED)) {
- return new Identifier(previous().text, true);
- }
- throw parsingException("expected column metadata field",
peek().position);
+ throw new IllegalArgumentException("unsupported column-filter
predicate");
}
- private StringLiteral parseStringLiteral() {
- if (match(TokenType.QUOTED)) {
- return new StringLiteral(previous().text);
- }
- throw parsingException("expected string literal", peek().position);
+ @Override
+ public Expression visitBooleanValue(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.BooleanValueContext
+ context) {
+ return Objects.nonNull(context.TRUE())
+ ? BooleanLiteral.TRUE_LITERAL
+ : BooleanLiteral.FALSE_LITERAL;
}
- private boolean match(final TokenType type) {
- if (peek().type != type) {
- return false;
- }
- cursor++;
- return true;
+ private static Expression maybeNegate(final Expression expression, final
boolean negated) {
+ return negated ? new NotExpression(expression) : expression;
}
- private Token expect(final TokenType type) {
- if (peek().type == type) {
- cursor++;
- return previous();
+ private static Identifier toIdentifier(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.FieldContext
context) {
+ final TerminalNode quoted = context.QUOTED_IDENTIFIER();
+ if (Objects.nonNull(quoted)) {
+ return new Identifier(unquote(quoted.getText()), true);
}
- throw parsingException(
- String.format("expected %s but found '%s'", type, peek().text),
peek().position);
+ return new Identifier(context.IDENTIFIER().getText());
}
- private Token peek() {
- return tokens.get(cursor);
+ private static StringLiteral toStringLiteral(
+ final
org.apache.iotdb.db.relational.grammar.sql.ColumnFilterParser.StringContext
context) {
+ return new StringLiteral(unquote(context.QUOTED_IDENTIFIER().getText()));
}
- private Token previous() {
- return tokens.get(cursor - 1);
+ private static String unquote(final String text) {
+ return text.substring(1, text.length() - 1).replace("\"\"", "\"");
}
}
}