http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyLexer.g4 ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyLexer.g4 b/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyLexer.g4 new file mode 100644 index 0000000..7bcec42 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyLexer.g4 @@ -0,0 +1,882 @@ +/* + * This file is adapted from the Antlr4 Java grammar which has the following license + * + * Copyright (c) 2013 Terence Parr, Sam Harwell + * All rights reserved. + * [The "BSD licence"] + * + * http://www.opensource.org/licenses/bsd-license.php + * + * Subsequent modifications by the Groovy community have been done under the Apache License v2: + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * The Groovy grammar is based on the official grammar for Java: + * https://github.com/antlr/grammars-v4/blob/master/java/Java.g4 + */ +lexer grammar GroovyLexer; + +options { + superClass = AbstractLexer; +} + +@header { + import static org.apache.groovy.parser.antlr4.SemanticPredicates.*; + import java.util.Deque; + import java.util.ArrayDeque; + import java.util.Map; + import java.util.HashMap; + import java.util.Set; + import java.util.HashSet; + import java.util.Collections; + import java.util.Arrays; +} + +@members { + private long tokenIndex = 0; + private int lastTokenType = 0; + + /** + * Record the index and token type of the current token while emitting tokens. + */ + @Override + public void emit(Token token) { + this.tokenIndex++; + + int tokenType = token.getType(); + if (Token.DEFAULT_CHANNEL == token.getChannel()) { + this.lastTokenType = tokenType; + } + + if (RollBackOne == tokenType) { + this.rollbackOneChar(); + } + + super.emit(token); + } + + private static final Set<Integer> REGEX_CHECK_SET = + Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(Identifier, CapitalizedIdentifier, NullLiteral, BooleanLiteral, THIS, RPAREN, RBRACK, RBRACE, IntegerLiteral, FloatingPointLiteral, StringLiteral, GStringEnd, INC, DEC))); + private boolean isRegexAllowed() { + if (REGEX_CHECK_SET.contains(this.lastTokenType)) { + return false; + } + + return true; + } + + /** + * just a hook, which will be overrided by GroovyLangLexer + */ + protected void rollbackOneChar() {} + + private static class Paren { + private String text; + private int lastTokenType; + private int line; + private int column; + + public Paren(String text, int lastTokenType, int line, int column) { + this.text = text; + this.lastTokenType = lastTokenType; + this.line = line; + this.column = column; + } + + public String getText() { + return this.text; + } + + public int getLastTokenType() { + return this.lastTokenType; + } + + public int getLine() { + return line; + } + + public int getColumn() { + return column; + } + + @Override + public int hashCode() { + return (int) (text.hashCode() * line + column); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Paren)) { + return false; + } + + Paren other = (Paren) obj; + + return this.text.equals(other.text) && (this.line == other.line && this.column == other.column); + } + } + + private static final Map<String, String> PAREN_MAP = Collections.unmodifiableMap(new HashMap<String, String>() { + { + put("(", ")"); + put("[", "]"); + put("{", "}"); + } + }); + + private final Deque<Paren> parenStack = new ArrayDeque<>(32); + private void enterParen() { + parenStack.push(new Paren(getText(), this.lastTokenType, getLine(), getCharPositionInLine() + 1)); + } + private void exitParen() { + Paren paren = parenStack.peek(); + String text = getText(); + + require(null != paren, "Too many '" + text + "'"); + require(text.equals(PAREN_MAP.get(paren.getText())), + "'" + paren.getText() + "'" + new PositionInfo(paren.getLine(), paren.getColumn()) + " can not match '" + text + "'"); + + parenStack.pop(); + } + private boolean isInsideParens() { + Paren paren = parenStack.peek(); + + // We just care about "(" and "[", inside which the new lines will be ignored. + // Notice: the new lines between "{" and "}" can not be ignored. + if (null == paren) { + return false; + } + return ("(".equals(paren.getText()) && TRY != paren.getLastTokenType()) // we don't treat try-paren(i.e. try (....)) as parenthesis + || "[".equals(paren.getText()); + } + private void ignoreTokenInsideParens() { + if (!this.isInsideParens()) { + return; + } + + this.setChannel(Token.HIDDEN_CHANNEL); + } + private void ignoreMultiLineCommentConditionally() { + if (!this.isInsideParens() && isFollowedByWhiteSpaces(_input)) { + return; + } + + this.setChannel(Token.HIDDEN_CHANNEL); + } + + @Override + public int getSyntaxErrorSource() { + return GroovySyntaxError.LEXER; + } + + @Override + public int getErrorLine() { + return getLine(); + } + + @Override + public int getErrorColumn() { + return getCharPositionInLine() + 1; + } +} + + +// §3.10.5 String Literals + +StringLiteral + : '"' DqStringCharacter*? '"' + | '\'' SqStringCharacter*? '\'' + + | '/' { this.isRegexAllowed() && _input.LA(1) != '*' }? + SlashyStringCharacter+? '/' + + | '"""' TdqStringCharacter*? '"""' + | '\'\'\'' TsqStringCharacter*? '\'\'\'' + | '$/' DollarSlashyStringCharacter+? '/$' + ; + +// Groovy gstring +GStringBegin + : '"' DqStringCharacter*? DOLLAR -> pushMode(DQ_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +TdqGStringBegin + : '"""' TdqStringCharacter*? DOLLAR -> type(GStringBegin), pushMode(TDQ_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +SlashyGStringBegin + : '/' { this.isRegexAllowed() && _input.LA(1) != '*' }? SlashyStringCharacter*? DOLLAR { isFollowedByJavaLetterInGString(_input) }? -> type(GStringBegin), pushMode(SLASHY_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +DollarSlashyGStringBegin + : '$/' DollarSlashyStringCharacter*? DOLLAR { isFollowedByJavaLetterInGString(_input) }? -> type(GStringBegin), pushMode(DOLLAR_SLASHY_GSTRING_MODE), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; + +mode DQ_GSTRING_MODE; +GStringEnd + : '"' -> popMode + ; +GStringPart + : DOLLAR -> pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +GStringCharacter + : DqStringCharacter -> more + ; + +mode TDQ_GSTRING_MODE; +TdqGStringEnd + : '"""' -> type(GStringEnd), popMode + ; +TdqGStringPart + : DOLLAR -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +TdqGStringCharacter + : TdqStringCharacter -> more + ; + +mode SLASHY_GSTRING_MODE; +SlashyGStringEnd + : '$'? '/' -> type(GStringEnd), popMode + ; +SlashyGStringPart + : DOLLAR { isFollowedByJavaLetterInGString(_input) }? -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +SlashyGStringCharacter + : SlashyStringCharacter -> more + ; + +mode DOLLAR_SLASHY_GSTRING_MODE; +DollarSlashyGStringEnd + : '/$' -> type(GStringEnd), popMode + ; +DollarSlashyGStringPart + : DOLLAR { isFollowedByJavaLetterInGString(_input) }? -> type(GStringPart), pushMode(GSTRING_TYPE_SELECTOR_MODE) + ; +DollarSlashyGStringCharacter + : DollarSlashyStringCharacter -> more + ; + +mode GSTRING_TYPE_SELECTOR_MODE; +GStringLBrace + : '{' { this.enterParen(); } -> type(LBRACE), popMode, pushMode(DEFAULT_MODE) + ; +GStringIdentifier + : IdentifierInGString -> type(Identifier), popMode, pushMode(GSTRING_PATH_MODE) + ; + + +mode GSTRING_PATH_MODE; +GStringPathPart + : '.' IdentifierInGString + ; +RollBackOne + : . { + // a trick to handle GStrings followed by EOF properly + if (EOF == _input.LA(1) && ('"' == _input.LA(-1) || '/' == _input.LA(-1))) { + setType(GStringEnd); + } else { + setChannel(HIDDEN); + } + } -> popMode + ; + + +mode DEFAULT_MODE; +// character in the double quotation string. e.g. "a" +fragment +DqStringCharacter + : ~["\\$] + | EscapeSequence + ; + +// character in the single quotation string. e.g. 'a' +fragment +SqStringCharacter + : ~['\\] + | EscapeSequence + ; + +// character in the triple double quotation string. e.g. """a""" +fragment TdqStringCharacter + : ~["\\$] + | '"' { !(_input.LA(1) == '"' && _input.LA(2) == '"') }? + | EscapeSequence + ; + +// character in the triple single quotation string. e.g. '''a''' +fragment TsqStringCharacter + : ~['\\] + | '\'' { !(_input.LA(1) == '\'' && _input.LA(2) == '\'') }? + | EscapeSequence + ; + +// character in the slashy string. e.g. /a/ +fragment SlashyStringCharacter + : SlashEscape + | '$' { !isFollowedByJavaLetterInGString(_input) }? + | ~[/$\u0000] + ; + +// character in the collar slashy string. e.g. $/a/$ +fragment DollarSlashyStringCharacter + : SlashEscape | DollarSlashEscape | DollarDollarEscape + | '/' { _input.LA(1) != '$' }? + | '$' { !isFollowedByJavaLetterInGString(_input) }? + | ~[/$\u0000] + ; + +// Groovy keywords +AS : 'as'; +DEF : 'def'; +IN : 'in'; +TRAIT : 'trait'; + + +// §3.9 Keywords +BuiltInPrimitiveType + : BOOLEAN + | CHAR + | BYTE + | SHORT + | INT + | LONG + | FLOAT + | DOUBLE + ; + +ABSTRACT : 'abstract'; +ASSERT : 'assert'; + +fragment +BOOLEAN : 'boolean'; + +BREAK : 'break'; + +fragment +BYTE : 'byte'; + +CASE : 'case'; +CATCH : 'catch'; + +fragment +CHAR : 'char'; + +CLASS : 'class'; +CONST : 'const'; +CONTINUE : 'continue'; +DEFAULT : 'default'; +DO : 'do'; + +fragment +DOUBLE : 'double'; + +ELSE : 'else'; +ENUM : 'enum'; +EXTENDS : 'extends'; +FINAL : 'final'; +FINALLY : 'finally'; + +fragment +FLOAT : 'float'; + + +FOR : 'for'; +IF : 'if'; +GOTO : 'goto'; +IMPLEMENTS : 'implements'; +IMPORT : 'import'; +INSTANCEOF : 'instanceof'; + +fragment +INT : 'int'; + +INTERFACE : 'interface'; + +fragment +LONG : 'long'; + +NATIVE : 'native'; +NEW : 'new'; +PACKAGE : 'package'; +PRIVATE : 'private'; +PROTECTED : 'protected'; +PUBLIC : 'public'; +RETURN : 'return'; + +fragment +SHORT : 'short'; + + +STATIC : 'static'; +STRICTFP : 'strictfp'; +SUPER : 'super'; +SWITCH : 'switch'; +SYNCHRONIZED : 'synchronized'; +THIS : 'this'; +THROW : 'throw'; +THROWS : 'throws'; +TRANSIENT : 'transient'; +TRY : 'try'; +VOID : 'void'; +VOLATILE : 'volatile'; +WHILE : 'while'; + + +// §3.10.1 Integer Literals + +IntegerLiteral + : DecimalIntegerLiteral + | HexIntegerLiteral + | OctalIntegerLiteral + | BinaryIntegerLiteral + ; + +fragment +DecimalIntegerLiteral + : DecimalNumeral IntegerTypeSuffix? + ; + +fragment +HexIntegerLiteral + : HexNumeral IntegerTypeSuffix? + ; + +fragment +OctalIntegerLiteral + : OctalNumeral IntegerTypeSuffix? + ; + +fragment +BinaryIntegerLiteral + : BinaryNumeral IntegerTypeSuffix? + ; + +fragment +IntegerTypeSuffix + : [lLiIgG] + ; + +fragment +DecimalNumeral + : '0' + | NonZeroDigit (Digits? | Underscores Digits) + ; + +fragment +Digits + : Digit (DigitOrUnderscore* Digit)? + ; + +fragment +Digit + : '0' + | NonZeroDigit + ; + +fragment +NonZeroDigit + : [1-9] + ; + +fragment +DigitOrUnderscore + : Digit + | '_' + ; + +fragment +Underscores + : '_'+ + ; + +fragment +HexNumeral + : '0' [xX] HexDigits + ; + +fragment +HexDigits + : HexDigit (HexDigitOrUnderscore* HexDigit)? + ; + +fragment +HexDigit + : [0-9a-fA-F] + ; + +fragment +HexDigitOrUnderscore + : HexDigit + | '_' + ; + +fragment +OctalNumeral + : '0' Underscores? OctalDigits + ; + +fragment +OctalDigits + : OctalDigit (OctalDigitOrUnderscore* OctalDigit)? + ; + +fragment +OctalDigit + : [0-7] + ; + +fragment +OctalDigitOrUnderscore + : OctalDigit + | '_' + ; + +fragment +BinaryNumeral + : '0' [bB] BinaryDigits + ; + +fragment +BinaryDigits + : BinaryDigit (BinaryDigitOrUnderscore* BinaryDigit)? + ; + +fragment +BinaryDigit + : [01] + ; + +fragment +BinaryDigitOrUnderscore + : BinaryDigit + | '_' + ; + +// §3.10.2 Floating-Point Literals + +FloatingPointLiteral + : DecimalFloatingPointLiteral + | HexadecimalFloatingPointLiteral + ; + +fragment +DecimalFloatingPointLiteral + : Digits '.' Digits ExponentPart? FloatTypeSuffix? + | Digits ExponentPart FloatTypeSuffix? + | Digits FloatTypeSuffix + ; + +fragment +ExponentPart + : ExponentIndicator SignedInteger + ; + +fragment +ExponentIndicator + : [eE] + ; + +fragment +SignedInteger + : Sign? Digits + ; + +fragment +Sign + : [+-] + ; + +fragment +FloatTypeSuffix + : [fFdDgG] + ; + +fragment +HexadecimalFloatingPointLiteral + : HexSignificand BinaryExponent FloatTypeSuffix? + ; + +fragment +HexSignificand + : HexNumeral '.'? + | '0' [xX] HexDigits? '.' HexDigits + ; + +fragment +BinaryExponent + : BinaryExponentIndicator SignedInteger + ; + +fragment +BinaryExponentIndicator + : [pP] + ; + +// §3.10.3 Boolean Literals + +BooleanLiteral + : 'true' + | 'false' + ; + + +// §3.10.6 Escape Sequences for Character and String Literals + +fragment +EscapeSequence + : '\\' [btnfr"'\\] + | OctalEscape + | UnicodeEscape + | DollarEscape + | LineEscape + ; + + +fragment +OctalEscape + : '\\' OctalDigit + | '\\' OctalDigit OctalDigit + | '\\' ZeroToThree OctalDigit OctalDigit + ; + +// Groovy allows 1 or more u's after the backslash +fragment +UnicodeEscape + : '\\' 'u'+ HexDigit HexDigit HexDigit HexDigit + ; + +fragment +ZeroToThree + : [0-3] + ; + +// Groovy Escape Sequences + +fragment +DollarEscape + : '\\' DOLLAR + ; + +fragment +LineEscape + : '\\' '\r'? '\n' + ; + +fragment +SlashEscape + : '\\' '/' + ; + +fragment +DollarSlashEscape + : '$/$' + ; + +fragment +DollarDollarEscape + : '$$' + ; +// §3.10.7 The Null Literal + +NullLiteral + : 'null' + ; + +// Groovy Operators + +RANGE_INCLUSIVE : '..'; +RANGE_EXCLUSIVE : '..<'; +SPREAD_DOT : '*.'; +SAFE_DOT : '?.'; +ELVIS : '?:'; +METHOD_POINTER : '.&'; +METHOD_REFERENCE : '::'; +REGEX_FIND : '=~'; +REGEX_MATCH : '==~'; +POWER : '**'; +POWER_ASSIGN : '**='; +SPACESHIP : '<=>'; +IDENTICAL : '==='; +NOT_IDENTICAL : '!=='; +ARROW : '->'; + +// !internalPromise will be parsed as !in ternalPromise, so semantic predicates are necessary +NOT_INSTANCEOF : '!instanceof' { isFollowedBy(_input, ' ', '\t', '\r', '\n') }?; +NOT_IN : '!in' { isFollowedBy(_input, ' ', '\t', '\r', '\n', '[', '(', '{') }?; + +fragment +DOLLAR : '$'; + + +// §3.11 Separators + +LPAREN : '(' { this.enterParen(); } -> pushMode(DEFAULT_MODE); +RPAREN : ')' { this.exitParen(); } -> popMode; +LBRACE : '{' { this.enterParen(); } -> pushMode(DEFAULT_MODE); +RBRACE : '}' { this.exitParen(); } -> popMode; +LBRACK : '[' { this.enterParen(); } -> pushMode(DEFAULT_MODE); +RBRACK : ']' { this.exitParen(); } -> popMode; + +SEMI : ';'; +COMMA : ','; +DOT : '.'; + +// §3.12 Operators + +ASSIGN : '='; +GT : '>'; +LT : '<'; +NOT : '!'; +BITNOT : '~'; +QUESTION : '?'; +COLON : ':'; +EQUAL : '=='; +LE : '<='; +GE : '>='; +NOTEQUAL : '!='; +AND : '&&'; +OR : '||'; +INC : '++'; +DEC : '--'; +ADD : '+'; +SUB : '-'; +MUL : '*'; +DIV : '/'; +BITAND : '&'; +BITOR : '|'; +XOR : '^'; +MOD : '%'; + + +ADD_ASSIGN : '+='; +SUB_ASSIGN : '-='; +MUL_ASSIGN : '*='; +DIV_ASSIGN : '/='; +AND_ASSIGN : '&='; +OR_ASSIGN : '|='; +XOR_ASSIGN : '^='; +MOD_ASSIGN : '%='; +LSHIFT_ASSIGN : '<<='; +RSHIFT_ASSIGN : '>>='; +URSHIFT_ASSIGN : '>>>='; +ELVIS_ASSIGN : '?='; + + +// §3.8 Identifiers (must appear after all keywords in the grammar) +CapitalizedIdentifier + : [A-Z] JavaLetterOrDigit* + + // FIXME REMOVE THE FOLLOWING ALTERNATIVE. Groovy's identifier can be unicode escape(e.g. def \u4e00\u9fa5 = '123'), which will impact the performance and is pointless to support IMO + | [A-Z] (JavaLetterOrDigit | UnicodeEscape)* + ; + +Identifier + : JavaLetter JavaLetterOrDigit* + + // FIXME REMOVE THE FOLLOWING ALTERNATIVE. Groovy's identifier can be unicode escape(e.g. def \u4e00\u9fa5 = '123'), which will impact the performance and is pointless to support IMO + | (JavaLetter | UnicodeEscape) (JavaLetterOrDigit | UnicodeEscape)* + ; + +fragment +IdentifierInGString + : JavaLetterInGString JavaLetterOrDigitInGString* + ; + +fragment +JavaLetterInGString + : [a-zA-Z_] // these are the "java letters" below 0x7F, except for $ + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +fragment +JavaLetterOrDigitInGString + : [a-zA-Z0-9_] // these are the "java letters or digits" below 0x7F, except for $ + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + + +fragment +JavaLetter + : [a-zA-Z$_] // these are the "java letters" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierStart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +fragment +JavaLetterOrDigit + : [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F + | // covers all characters above 0x7F which are not a surrogate + ~[\u0000-\u007F\uD800-\uDBFF] + {Character.isJavaIdentifierPart(_input.LA(-1))}? + | // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF + [\uD800-\uDBFF] [\uDC00-\uDFFF] + {Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)))}? + ; + +// +// Additional symbols not defined in the lexical specification +// + +AT : '@'; +ELLIPSIS : '...'; + +// +// Whitespace, line escape and comments +// +WS : ([ \t\u000C]+ | LineEscape+) -> skip + ; + + +// Inside (...) and [...] but not {...}, ignore newlines. +NL : '\r'? '\n' { this.ignoreTokenInsideParens(); } + ; + +// Multiple-line comments(including groovydoc comments) +ML_COMMENT + : '/*' .*? '*/' { this.ignoreMultiLineCommentConditionally(); } -> type(NL) + ; + +// Single-line comments +SL_COMMENT + : '//' ~[\r\n\uFFFF]* { this.ignoreTokenInsideParens(); } -> type(NL) + ; + +// Script-header comments. +// The very first characters of the file may be "#!". If so, ignore the first line. +SH_COMMENT + : '#!' { 0 == this.tokenIndex }?<fail={"Shebang comment should appear at the first line"}> ~[\r\n\uFFFF]* -> skip + ; + +// Unexpected characters will be handled by groovy parser later. +UNEXPECTED_CHAR + : . + ;
http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyParser.g4 ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyParser.g4 b/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyParser.g4 new file mode 100644 index 0000000..6ddf17f --- /dev/null +++ b/subprojects/parser-antlr4/src/main/antlr4/org/apache/groovy/parser/antlr4/GroovyParser.g4 @@ -0,0 +1,1270 @@ +/* + * This file is adapted from the Antlr4 Java grammar which has the following license + * + * Copyright (c) 2013 Terence Parr, Sam Harwell + * All rights reserved. + * [The "BSD licence"] + * + * http://www.opensource.org/licenses/bsd-license.php + * + * Subsequent modifications by the Groovy community have been done under the Apache License v2: + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * The Groovy grammar is based on the official grammar for Java: + * https://github.com/antlr/grammars-v4/blob/master/java/Java.g4 + */ +parser grammar GroovyParser; + +options { + tokenVocab = GroovyLexer; + contextSuperClass = GroovyParserRuleContext; + superClass = AbstractParser; +} + +@header { + import java.util.Map; + import org.codehaus.groovy.util.ListHashMap; + import org.apache.groovy.parser.antlr4.SemanticPredicates; + import org.codehaus.groovy.GroovyBugError; +} + +@members { + + public static class GroovyParserRuleContext extends ParserRuleContext { + private Map metaDataMap = null; + + public GroovyParserRuleContext() {} + + public GroovyParserRuleContext(ParserRuleContext parent, int invokingStateNumber) { + super(parent, invokingStateNumber); + } + + /** + * Gets the node meta data. + * + * @param key - the meta data key + * @return the node meta data value for this key + */ + public <T> T getNodeMetaData(Object key) { + if (metaDataMap == null) { + return (T) null; + } + return (T) metaDataMap.get(key); + } + + /** + * Sets the node meta data. + * + * @param key - the meta data key + * @param value - the meta data value + * @throws GroovyBugError if key is null or there is already meta + * data under that key + */ + public void setNodeMetaData(Object key, Object value) { + if (key==null) throw new GroovyBugError("Tried to set meta data with null key on "+this+"."); + if (metaDataMap == null) { + metaDataMap = new ListHashMap(); + } + Object old = metaDataMap.put(key,value); + if (old!=null) throw new GroovyBugError("Tried to overwrite existing meta data "+this+"."); + } + + /** + * Sets the node meta data but allows overwriting values. + * + * @param key - the meta data key + * @param value - the meta data value + * @return the old node meta data value for this key + * @throws GroovyBugError if key is null + */ + public Object putNodeMetaData(Object key, Object value) { + if (key == null) throw new GroovyBugError("Tried to set meta data with null key on " + this + "."); + if (metaDataMap == null) { + metaDataMap = new ListHashMap(); + } + return metaDataMap.put(key, value); + } + } + + @Override + public int getSyntaxErrorSource() { + return GroovySyntaxError.PARSER; + } + + @Override + public int getErrorLine() { + Token token = _input.LT(-1); + + if (null == token) { + return -1; + } + + return token.getLine(); + } + + @Override + public int getErrorColumn() { + Token token = _input.LT(-1); + + if (null == token) { + return -1; + } + + return token.getCharPositionInLine() + 1 + token.getText().length(); + } +} + +// starting point for parsing a groovy file +compilationUnit + : nls + (packageDeclaration (sep | EOF))? (statement (sep | EOF))* EOF + ; + +packageDeclaration + : annotationsOpt PACKAGE qualifiedName + ; + +importDeclaration + : annotationsOpt IMPORT STATIC? qualifiedName (DOT MUL | AS alias=identifier)? + ; + + +typeDeclaration + : classOrInterfaceModifiersOpt classDeclaration + ; + +modifier + : classOrInterfaceModifier + | m=( NATIVE + | SYNCHRONIZED + | TRANSIENT + | VOLATILE + | DEF + ) + ; + +modifiersOpt + : modifiers? + ; + +modifiers + : (modifier nls)+ + ; + +classOrInterfaceModifiersOpt + : classOrInterfaceModifiers? + ; + +classOrInterfaceModifiers + : (classOrInterfaceModifier nls)+ + ; + +classOrInterfaceModifier + : annotation // class or interface + | m=( PUBLIC // class or interface + | PROTECTED // class or interface + | PRIVATE // class or interface + | STATIC // class or interface + | ABSTRACT // class or interface + | FINAL // class only -- does not apply to interfaces + | STRICTFP // class or interface + | DEFAULT // interface only -- does not apply to classes + ) + ; + +variableModifier + : annotation + | m=( FINAL + | DEF + // Groovy supports declaring local variables as instance/class fields, + // e.g. import groovy.transform.*; @Field static List awe = [1, 2, 3] + // e.g. import groovy.transform.*; def a = { @Field public List awe = [1, 2, 3] } + // Notice: Groovy 2.4.7 just allows to declare local variables with the following modifiers when using annotations(e.g. @Field) + // TODO check whether the following modifiers accompany annotations or not. Because the legacy codes(e.g. benchmark/bench/heapsort.groovy) allow to declare the special instance/class fields without annotations, we leave it as it is for the time being + | PUBLIC + | PROTECTED + | PRIVATE + | STATIC + | ABSTRACT + | STRICTFP + ) + ; + +variableModifiersOpt + : variableModifiers? + ; + +variableModifiers + : (variableModifier nls)+ + ; + +typeParameters + : LT nls typeParameter (COMMA nls typeParameter)* nls GT + ; + +typeParameter + : className (EXTENDS nls typeBound)? + ; + +typeBound + : type (BITAND nls type)* + ; + +typeList + : type (COMMA nls type)* + ; + + +/** + * t 0: class; 1: interface; 2: enum; 3: annotation; 4: trait + */ +classDeclaration +locals[ int t ] + : ( CLASS { $t = 0; } + | INTERFACE { $t = 1; } + | ENUM { $t = 2; } + | AT INTERFACE { $t = 3; } + | TRAIT { $t = 4; } + ) + identifier nls + + ( + { 3 != $t }? + typeParameters? nls + ( + { 2 != $t }? + (EXTENDS nls + ( + // Only interface can extend more than one super class + {1 == $t}? scs=typeList + | + sc=type + ) + nls)? + | + /* enum should not have type parameters and extends */ + ) + + ( + {1 != $t}? + (IMPLEMENTS nls is=typeList nls)? + | + /* interface should not implement other interfaces */ + ) + | + /* annotation should not have implements and extends*/ + ) + + classBody[$t] + ; + +// t see the comment of classDeclaration +classBody[int t] + : LBRACE nls + ( + /* Only enum can have enum constants */ + { 2 == $t }? + enumConstants? nls + | + + ) + classBodyDeclaration[$t]? (sep classBodyDeclaration[$t])* sep? RBRACE + ; + +enumConstants + : enumConstant (nls COMMA nls enumConstant)* (nls COMMA)? + ; + +enumConstant + : annotationsOpt identifier arguments? anonymousInnerClassDeclaration[1]? + ; + +classBodyDeclaration[int t] + : SEMI + | (STATIC nls)? block + | memberDeclaration[$t] + ; + +memberDeclaration[int t] + : methodDeclaration[0, $t] + | fieldDeclaration + | modifiersOpt classDeclaration + ; + +/** + * t 0: *class member* all kinds of method declaration AND constructor declaration, + * 1: normal method declaration, 2: abstract method declaration + * 3: normal method declaration OR abstract method declaration + * ct 9: script, other see the comment of classDeclaration + */ +methodDeclaration[int t, int ct] + : { 3 == $ct }? + returnType[$ct] methodName LPAREN rparen (DEFAULT nls elementValue)? + | + ( { 0 == $t }? + modifiersOpt typeParameters? + | modifiersOpt typeParameters? returnType[$ct] + | modifiers typeParameters? returnType[$ct]? + ) + methodName formalParameters (nls THROWS nls qualifiedClassNameList)? + ( + { 0 == $t || 3 == $t || 1 == $t}? + nls methodBody + | + { 0 == $t || 3 == $t || 2 == $t }? + /* no method body */ + ) + ; + +methodName + : identifier + | stringLiteral + ; + +returnType[int ct] + : + standardType + | + // annotation method can not have void return type + { 3 != $ct }? VOID + ; + +fieldDeclaration + : variableDeclaration[1] + ; + +variableDeclarators + : variableDeclarator (COMMA nls variableDeclarator)* + ; + +variableDeclarator + : variableDeclaratorId (nls ASSIGN nls variableInitializer)? + ; + +variableDeclaratorId + : identifier + ; + +variableInitializer + : statementExpression + | standardLambda + ; + +variableInitializers + : variableInitializer nls (COMMA nls variableInitializer nls)* nls COMMA? + ; + +standardType +options { baseContext = type; } + : primitiveType (LBRACK RBRACK)* + | standardClassOrInterfaceType (LBRACK RBRACK)* + ; + +type + : ( primitiveType + | + // !!! ERROR ALTERNATIVE !!! + VOID { require(false, "void is not allowed here", -4); } + ) (LBRACK RBRACK)* + | generalClassOrInterfaceType (LBRACK RBRACK)* + ; + +classOrInterfaceType + : ( qualifiedClassName + | qualifiedStandardClassName + ) typeArguments? + ; + +generalClassOrInterfaceType +options { baseContext = classOrInterfaceType; } + : qualifiedClassName typeArguments? + ; + +standardClassOrInterfaceType +options { baseContext = classOrInterfaceType; } + : qualifiedStandardClassName typeArguments? + ; + +primitiveType + : BuiltInPrimitiveType + ; + +typeArguments + : LT nls typeArgument (COMMA nls typeArgument)* nls GT + ; + +typeArgument + : type + | QUESTION ((EXTENDS | SUPER) nls type)? + ; + +qualifiedClassNameList + : qualifiedClassName (COMMA nls qualifiedClassName)* + ; + +formalParameters + : LPAREN formalParameterList? rparen + ; + +formalParameterList + : formalParameter (COMMA nls formalParameter)* (COMMA nls lastFormalParameter)? + | lastFormalParameter + ; + +formalParameter + : variableModifiersOpt type? variableDeclaratorId (nls ASSIGN nls expression)? + ; + +lastFormalParameter + : variableModifiersOpt type? ELLIPSIS variableDeclaratorId (nls ASSIGN nls expression)? + ; + +methodBody + : block + ; + +qualifiedName + : qualifiedNameElement (DOT qualifiedNameElement)* + ; + +/** + * Java doesn't have the keywords 'as', 'in', 'def', 'trait' so we make some allowances + * for them in package names for better integration with existing Java packages + */ +qualifiedNameElement + : identifier + | DEF + | IN + | AS + | TRAIT + ; + +qualifiedClassName + : (qualifiedNameElement DOT)* identifier + ; + +qualifiedStandardClassName + : (qualifiedNameElement DOT)* (className DOT)* className + ; + +literal + : IntegerLiteral #integerLiteralAlt + | FloatingPointLiteral #floatingPointLiteralAlt + | stringLiteral #stringLiteralAlt + | BooleanLiteral #booleanLiteralAlt + | NullLiteral #nullLiteralAlt + ; + +// GSTRING + +gstring + : GStringBegin gstringValue (GStringPart gstringValue)* GStringEnd + ; + +gstringValue + : gstringPath + | LBRACE statementExpression? RBRACE + | closure + ; + +gstringPath + : identifier GStringPathPart* + ; + + +// LAMBDA EXPRESSION +lambda +options { baseContext = standardLambda; } + : lambdaParameters nls ARROW nls lambdaBody + ; + +standardLambda + : standardLambdaParameters nls ARROW nls lambdaBody + ; + +lambdaParameters +options { baseContext = standardLambdaParameters; } + : formalParameters + + // { a -> a * 2 } can be parsed as a lambda expression in a block, but we expect a closure. + // So it is better to put parameters in the parentheses and the following single parameter without parentheses is limited +// | variableDeclaratorId + ; + +standardLambdaParameters + : formalParameters + | variableDeclaratorId + ; + +lambdaBody + : block + | statementExpression + ; + + +// CLOSURE +closure +locals[ String footprint = "" ] + : LBRACE nls (formalParameterList? nls ARROW nls)? blockStatementsOpt RBRACE + ; + +blockStatementsOpt + : blockStatements? + ; + +blockStatements + : blockStatement (sep blockStatement)* sep? + ; + +// ANNOTATIONS + +annotationsOpt + : (annotation nls)* + ; + +annotation + : AT annotationName ( LPAREN elementValues? rparen )? + ; + +elementValues + : elementValuePairs + | elementValue + ; + +annotationName : qualifiedClassName ; + +elementValuePairs + : elementValuePair (COMMA elementValuePair)* + ; + +elementValuePair + : elementValuePairName nls ASSIGN nls elementValue + ; + +elementValuePairName + : identifier + | keywords + ; + +// TODO verify the potential performance issue because rule expression contains sub-rule assignments(https://github.com/antlr/grammars-v4/issues/215) +elementValue + : elementValueArrayInitializer + | annotation + | expression + ; + +elementValueArrayInitializer + : LBRACK (elementValue (COMMA elementValue)*)? (COMMA)? RBRACK + ; + +// STATEMENTS / BLOCKS + +block + : LBRACE nls blockStatementsOpt RBRACE + ; + +blockStatement + : localVariableDeclaration + | statement + ; + +localVariableDeclaration + : { !SemanticPredicates.isInvalidLocalVariableDeclaration(_input) }? + variableDeclaration[0] + ; + +/** + * t 0: local variable declaration; 1: field declaration + */ +variableDeclaration[int t] + : ( { 0 == $t }? variableModifiers + | { 1 == $t }? modifiers + ) + type? variableDeclarators + | + ( { 0 == $t }? variableModifiersOpt + | { 1 == $t }? modifiersOpt + ) + type variableDeclarators + | + ( { 0 == $t }? variableModifiers + | { 1 == $t }? modifiers + ) + typeNamePairs nls ASSIGN nls variableInitializer + ; + +typeNamePairs + : LPAREN typeNamePair (COMMA typeNamePair)* rparen + ; + +typeNamePair + : type? variableDeclaratorId + ; + +variableNames + : LPAREN variableDeclaratorId (COMMA variableDeclaratorId)+ rparen + ; + +switchStatement +locals[ String footprint = "" ] + : SWITCH parExpression nls LBRACE nls switchBlockStatementGroup* nls RBRACE + ; + +loopStatement +locals[ String footprint = "" ] + : FOR LPAREN forControl rparen nls statement #forStmtAlt + | WHILE parExpression nls statement #whileStmtAlt + | DO nls statement nls WHILE parExpression #doWhileStmtAlt + ; + +continueStatement +locals[ boolean isInsideLoop ] +@init { + try { + $isInsideLoop = null != $loopStatement::footprint; + } catch(NullPointerException e) { + $isInsideLoop = false; + } +} + : CONTINUE + { require($isInsideLoop, "the continue statement is only allowed inside loops", -8); } + identifier? + ; + +breakStatement +locals[ boolean isInsideLoop, boolean isInsideSwitch ] +@init { + try { + $isInsideLoop = null != $loopStatement::footprint; + } catch(NullPointerException e) { + $isInsideLoop = false; + } + + try { + $isInsideSwitch = null != $switchStatement::footprint; + } catch(NullPointerException e) { + $isInsideSwitch = false; + } +} + : BREAK + { require($isInsideLoop || $isInsideSwitch, "the break statement is only allowed inside loops or switches", -5); } + identifier? + ; + +tryCatchStatement +locals[boolean resourcesExists = false] + : TRY (resources { $resourcesExists = true; })? nls + block + ( + (nls catchClause)+ + (nls finallyBlock)? + | + nls finallyBlock + | + // try-with-resources can have no catche and finally clauses + { $resourcesExists }?<fail={"catch or finally clauses are required for try-catch statement"}> + ) + ; + +assertStatement +locals[ String footprint = "" ] + : ASSERT ce=expression ((COLON | COMMA) nls me=expression)? + ; + +statement + : block #blockStmtAlt + | IF parExpression nls tb=statement ((nls | sep) ELSE nls fb=statement)? #ifElseStmtAlt + | loopStatement #loopStmtAlt + + | tryCatchStatement #tryCatchStmtAlt + + | switchStatement #switchStmtAlt + | SYNCHRONIZED parExpression nls block #synchronizedStmtAlt + | RETURN expression? #returnStmtAlt + | THROW expression #throwStmtAlt + + | breakStatement #breakStmtAlt + | continueStatement #continueStmtAlt + + | identifier COLON nls statement #labeledStmtAlt + + // Import statement. Can be used in any scope. Has "import x as y" also. + | importDeclaration #importStmtAlt + + | assertStatement #assertStmtAlt + + | typeDeclaration #typeDeclarationStmtAlt + | localVariableDeclaration #localVariableDeclarationStmtAlt + + // validate the method in the AstBuilder#visitMethodDeclaration, e.g. method without method body is not allowed + | { !SemanticPredicates.isInvalidMethodDeclaration(_input) }? + methodDeclaration[3, 9] #methodDeclarationStmtAlt + + | statementExpression #expressionStmtAlt + + | SEMI #emptyStmtAlt + ; + +catchClause + : CATCH LPAREN variableModifiersOpt catchType? identifier rparen nls block + ; + +catchType + : qualifiedClassName (BITOR qualifiedClassName)* + ; + +finallyBlock + : FINALLY nls block + ; + + +resources + : LPAREN nls resourceList sep? rparen + ; + +resourceList + : resource (sep resource)* + ; + +resource + : localVariableDeclaration + | expression + ; + + +/** Matches cases then statements, both of which are mandatory. + * To handle empty cases at the end, we add switchLabel* to statement. + */ +switchBlockStatementGroup + : (switchLabel nls)+ blockStatements + ; + +switchLabel + : CASE expression COLON + | DEFAULT COLON + ; + +forControl + : enhancedForControl + | classicalForControl + ; + +enhancedForControl + : variableModifiersOpt type? variableDeclaratorId (COLON | IN) expression + ; + +classicalForControl + : forInit? SEMI expression? SEMI forUpdate? + ; + +forInit + : localVariableDeclaration + | expressionList[false] + ; + +forUpdate + : expressionList[false] + ; + + +// EXPRESSIONS + +castParExpression + : LPAREN type rparen + ; + +parExpression + : LPAREN (statementExpression | standardLambda) rparen + ; + +expressionList[boolean canSpread] + : expressionListElement[$canSpread] (COMMA expressionListElement[$canSpread])* + ; + +expressionListElement[boolean canSpread] + : ( MUL { require($canSpread, "spread operator is not allowed here", -1); } + | + ) expression + ; + +/** + * In order to resolve the syntactic ambiguities, e.g. (String)'abc' can be parsed as a cast expression or a parentheses-less method call(method name: (String), arguments: 'abc') + * try to match expression first. + * If it is not a normal expression, then try to match the command expression + */ +statementExpression + : expression #normalExprAlt + | commandExpression #commandExprAlt + ; + +postfixExpression +locals[ boolean isInsideAssert ] +@init { + try { + $isInsideAssert = null != $assertStatement::footprint; + } catch(NullPointerException e) { + $isInsideAssert = false; + } +} + : pathExpression op=(INC | DEC)? + ; + +expression + // qualified names, array expressions, method invocation, post inc/dec, type casting (level 1) + // The cast expression must be put before pathExpression to resovle the ambiguities between type casting and call on parentheses expression, e.g. (int)(1 / 2) + : castParExpression expression #castExprAlt + | postfixExpression #postfixExprAlt + + // ~(BNOT)/!(LNOT) (level 1) + | (BITNOT | NOT) nls expression #unaryNotExprAlt + + // math power operator (**) (level 2) + | left=expression op=POWER nls right=expression #powerExprAlt + + // ++(prefix)/--(prefix)/+(unary)/-(unary) (level 3) + | op=(INC | DEC | ADD | SUB) expression #unaryAddExprAlt + + // multiplication/division/modulo (level 4) + | left=expression nls op=(MUL | DIV | MOD) nls right=expression #multiplicativeExprAlt + + // binary addition/subtraction (level 5) + | left=expression op=(ADD | SUB) nls right=expression #additiveExprAlt + + // bit shift expressions (level 6) + | left=expression nls + ( ( dlOp=LT LT + | tgOp=GT GT GT + | dgOp=GT GT + ) + | rangeOp=( RANGE_INCLUSIVE + | RANGE_EXCLUSIVE + ) + ) nls + right=expression #shiftExprAlt + + // boolean relational expressions (level 7) + | left=expression nls op=(AS | INSTANCEOF | NOT_INSTANCEOF) nls type #relationalExprAlt + | left=expression nls op=(LE | GE | GT | LT | IN | NOT_IN) nls right=expression #relationalExprAlt + + // equality/inequality (==/!=) (level 8) + | left=expression nls + op=( IDENTICAL + | NOT_IDENTICAL + | EQUAL + | NOTEQUAL + | SPACESHIP + ) nls + right=expression #equalityExprAlt + + // regex find and match (=~ and ==~) (level 8.5) + // jez: moved =~ closer to precedence of == etc, as... + // 'if (foo =~ "a.c")' is very close in intent to 'if (foo == "abc")' + | left=expression nls op=(REGEX_FIND | REGEX_MATCH) nls right=expression #regexExprAlt + + // bitwise or non-short-circuiting and (&) (level 9) + | left=expression nls op=BITAND nls right=expression #andExprAlt + + // exclusive or (^) (level 10) + | left=expression nls op=XOR nls right=expression #exclusiveOrExprAlt + + // bitwise or non-short-circuiting or (|) (level 11) + | left=expression nls op=BITOR nls right=expression #inclusiveOrExprAlt + + // logical and (&&) (level 12) + | left=expression nls op=AND nls right=expression #logicalAndExprAlt + + // logical or (||) (level 13) + | left=expression nls op=OR nls right=expression #logicalOrExprAlt + + // conditional test (level 14) + | <assoc=right> con=expression nls + ( QUESTION nls tb=expression nls COLON nls + | ELVIS nls + ) + fb=expression #conditionalExprAlt + + // assignment expression (level 15) + // "(a) = [1]" is a special case of multipleAssignmentExprAlt, it will be handle by assignmentExprAlt + | <assoc=right> left=variableNames nls op=ASSIGN nls right=statementExpression #multipleAssignmentExprAlt + | <assoc=right> left=expression nls + op=( ASSIGN + | ADD_ASSIGN + | SUB_ASSIGN + | MUL_ASSIGN + | DIV_ASSIGN + | AND_ASSIGN + | OR_ASSIGN + | XOR_ASSIGN + | RSHIFT_ASSIGN + | URSHIFT_ASSIGN + | LSHIFT_ASSIGN + | MOD_ASSIGN + | POWER_ASSIGN + | ELVIS_ASSIGN + ) nls + (statementExpression | standardLambda) #assignmentExprAlt + ; + +commandExpression + : pathExpression + ( + { SemanticPredicates.isFollowingMethodName($pathExpression.t) }? + argumentList + | + /* if pathExpression is a method call, no need to have any more arguments */ + ) + + commandArgument* + ; + +commandArgument + : primary + // what follows is either a normal argument, parens, + // an appended block, an index operation, or nothing + // parens (a b already processed): + // a b c() d e -> a(b).c().d(e) + // a b c()() d e -> a(b).c().call().d(e) + // index (a b already processed): + // a b c[x] d e -> a(b).c[x].d(e) + // a b c[x][y] d e -> a(b).c[x][y].d(e) + // block (a b already processed): + // a b c {x} d e -> a(b).c({x}).d(e) + // + // parens/block completes method call + // index makes method call to property get with index + // + ( pathElement+ + | argumentList + )? + ; + +/** + * A "path expression" is a name or other primary, possibly qualified by various + * forms of dot, and/or followed by various kinds of brackets. + * It can be used for value or assigned to, or else further qualified, indexed, or called. + * It is called a "path" because it looks like a linear path through a data structure. + * Examples: x.y, x?.y, x*.y, x.@y; x[], x[y], x[y,z]; x(), x(y), x(y,z); x{s}; a.b[n].c(x).d{s} + * (Compare to a C lvalue, or LeftHandSide in the JLS section 15.26.) + * General expressions are built up from path expressions, using operators like '+' and '='. + * + * t 0: primary, 1: namePart, 2: arguments, 3: closure, 4: indexPropertyArgs, 5: namedPropertyArgs + */ +pathExpression returns [int t] + : primary (pathElement { $t = $pathElement.t; })* + ; + +pathElement returns [int t] +locals[ boolean isInsideClosure ] +@init { + try { + $isInsideClosure = null != $closure::footprint; + } catch(NullPointerException e) { + $isInsideClosure = false; + } +} + : nls + + // AT: foo.@bar selects the field (or attribute), not property + ( SPREAD_DOT nls (AT | nonWildcardTypeArguments)? // Spread operator: x*.y === x?.collect{it.y} + | SAFE_DOT nls (AT | nonWildcardTypeArguments)? // Optional-null operator: x?.y === (x==null)?null:x.y + | METHOD_POINTER nls // Method pointer operator: foo.&y == foo.metaClass.getMethodPointer(foo, "y") + | METHOD_REFERENCE nls // Method reference: System.out::println + | DOT nls (AT | nonWildcardTypeArguments)? // The all-powerful dot. + ) + namePart + { $t = 1; } + + | arguments + { $t = 2; } + + // Can always append a block, as foo{bar} + | nls closure + { $t = 3; } + + // Element selection is always an option, too. + // In Groovy, the stuff between brackets is a general argument list, + // since the bracket operator is transformed into a method call. + | indexPropertyArgs + { $t = 4; } + + | namedPropertyArgs + { $t = 5; } + ; + +/** + * This is the grammar for what can follow a dot: x.a, x.@a, x.&a, x.'a', etc. + */ +namePart + : + ( identifier + + // foo.'bar' is in all ways same as foo.bar, except that bar can have an arbitrary spelling + | stringLiteral + + | dynamicMemberName + + /* just a PROPOSAL, which has not been implemented yet! + // PROPOSAL, DECIDE: Is this inline form of the 'with' statement useful? + // Definition: a.{foo} === {with(a) {foo}} + // May cover some path expression use-cases previously handled by dynamic scoping (closure delegates). + | block + */ + + // let's allow common keywords as property names + | keywords + ) + ; + +/** + * If a dot is followed by a parenthesized or quoted expression, the member is computed dynamically, + * and the member selection is done only at runtime. This forces a statically unchecked member access. + */ +dynamicMemberName + : parExpression + | gstring + ; + +/** An expression may be followed by [...]. + * Unlike Java, these brackets may contain a general argument list, + * which is passed to the array element operator, which can make of it what it wants. + * The brackets may also be empty, as in T[]. This is how Groovy names array types. + */ +indexPropertyArgs + : QUESTION? LBRACK expressionList[true]? RBRACK + ; + +namedPropertyArgs + : LBRACK mapEntryList RBRACK + ; + +primary + : identifier #identifierPrmrAlt + | literal #literalPrmrAlt + | gstring #gstringPrmrAlt + | NEW nls creator #newPrmrAlt + | THIS #thisPrmrAlt + | SUPER #superPrmrAlt + | parExpression #parenPrmrAlt + | closure #closurePrmrAlt + | lambda #lambdaPrmrAlt + | list #listPrmrAlt + | map #mapPrmrAlt + | builtInType #typePrmrAlt + ; + +list +locals[boolean empty = true] + : LBRACK + ( + expressionList[true] + { $empty = false; } + )? + ( + COMMA + { require(!$empty, "Empty list constructor should not contain any comma(,)", -1); } + )? + RBRACK + ; + +map + : LBRACK + ( mapEntryList COMMA? + | COLON + ) + RBRACK + ; + +mapEntryList + : mapEntry (COMMA mapEntry)* + ; + +mapEntry + : mapEntryLabel COLON nls expression + | MUL COLON nls expression + ; + +mapEntryLabel + : keywords + | primary + ; + +creator + : createdName + ( nls arguments anonymousInnerClassDeclaration[0]? + | (LBRACK expression RBRACK)+ (b+=LBRACK RBRACK)* + | (b+=LBRACK RBRACK)+ nls arrayInitializer + ) + ; + +arrayInitializer + : LBRACE nls variableInitializers? nls RBRACE + ; + +/** + * t 0: anonymous inner class; 1: anonymous enum + */ +anonymousInnerClassDeclaration[int t] + : classBody[0] + ; + +createdName + : primitiveType + | qualifiedClassName typeArgumentsOrDiamond? + ; + +nonWildcardTypeArguments + : LT nls typeList nls GT + ; + +typeArgumentsOrDiamond + : LT GT + | typeArguments + ; + +arguments + : LPAREN + ( enhancedArgumentList? + | enhancedArgumentList COMMA + ) + rparen + ; + +argumentList +options { baseContext = enhancedArgumentList; } + : argumentListElement + ( COMMA nls + argumentListElement + )* + ; + +enhancedArgumentList + : enhancedArgumentListElement + ( COMMA nls + enhancedArgumentListElement + )* + ; + +argumentListElement +options { baseContext = enhancedArgumentListElement; } + : expressionListElement[true] + | mapEntry + ; + +enhancedArgumentListElement + : expressionListElement[true] + | standardLambda + | mapEntry + ; + +stringLiteral + : StringLiteral + ; + +className + : CapitalizedIdentifier + ; + +identifier + : Identifier + | CapitalizedIdentifier + + | + // if 'static' followed by DOT, we can treat them as identifiers, e.g. static.unused = { -> } + { DOT == _input.LT(2).getType() }? + STATIC + ; + +builtInType + : BuiltInPrimitiveType + | VOID + ; + +keywords + : ABSTRACT + | AS + | ASSERT + | BREAK + | CASE + | CATCH + | CLASS + | CONST + | CONTINUE + | DEF + | DEFAULT + | DO + | ELSE + | ENUM + | EXTENDS + | FINAL + | FINALLY + | FOR + | GOTO + | IF + | IMPLEMENTS + | IMPORT + | IN + | INSTANCEOF + | INTERFACE + | NATIVE + | NEW + | PACKAGE + | RETURN + | STATIC + | STRICTFP + | SUPER + | SWITCH + | SYNCHRONIZED + | THIS + | THROW + | THROWS + | TRANSIENT + | TRAIT + | TRY + | VOLATILE + | WHILE + + | NullLiteral + | BooleanLiteral + + | BuiltInPrimitiveType + | VOID + + | PUBLIC + | PROTECTED + | PRIVATE + ; + +rparen + : RPAREN + | + // !!!Error Alternatives, impact the performance of parsing!!! + { require(false, "Missing ')'"); } + ; + +nls + : NL* + ; + +sep : SEMI NL* + | NL+ (SEMI NL*)* + ; + + http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/groovy/org/apache/groovy/parser/antlr4/util/GroovyTestRig.groovy ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/groovy/org/apache/groovy/parser/antlr4/util/GroovyTestRig.groovy b/subprojects/parser-antlr4/src/main/groovy/org/apache/groovy/parser/antlr4/util/GroovyTestRig.groovy new file mode 100644 index 0000000..798ed5f --- /dev/null +++ b/subprojects/parser-antlr4/src/main/groovy/org/apache/groovy/parser/antlr4/util/GroovyTestRig.groovy @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser.antlr4.util + +import groovy.util.logging.Log +import org.antlr.v4.gui.TestRig +import org.antlr.v4.runtime.ANTLRInputStream +import org.antlr.v4.runtime.CommonTokenStream +import org.apache.groovy.parser.antlr4.GroovyLangLexer +import org.apache.groovy.parser.antlr4.GroovyLangParser + +/** + * A basic debug tool for investigating the parse trees and tokens of Groovy source code + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +@Log +public class GroovyTestRig extends TestRig { + public GroovyTestRig(String[] args) throws Exception { + super(['Groovy', args.contains('-lexer') ? 'tokens' : 'compilationUnit', *args] as String[]); + } + + public void inspectParseTree() { + def inputFile = new File(this.inputFiles[0]); + + if (!(inputFile.exists() && inputFile.isFile())) { + log.info "Input file[${inputFile.absolutePath}] does not exist." + return; + } + + byte[] content = inputFile.bytes; + String text = new String(content, this.encoding ?: 'UTF-8'); + + GroovyLangLexer lexer = new GroovyLangLexer(new ANTLRInputStream(text)); + CommonTokenStream tokens = new CommonTokenStream(lexer); + GroovyLangParser parser = new GroovyLangParser(tokens); + + this.process(lexer, GroovyLangParser.class, parser, new ByteArrayInputStream(content), new StringReader(text)); + } + + public static void main(String[] args) { + if (args.length == 0) { + log.info "Usage: [-tokens] [-lexer] [-tree] [-gui] [-ps file.ps] [-encoding encodingname] [-trace] [-diagnostics] [-SLL] input-filename"; + return; + } + + if (args.every { it.startsWith('-') }) { + log.info "input-filename is required!" + return; + } + + GroovyTestRig groovyTestRig = new GroovyTestRig(args); + + groovyTestRig.inspectParseTree(); + } +} + http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/AbstractParser.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/AbstractParser.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/AbstractParser.java new file mode 100644 index 0000000..f216797 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/AbstractParser.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser; + +import groovy.lang.GroovyClassLoader; +import org.codehaus.groovy.ast.ModuleNode; +import org.codehaus.groovy.control.CompilerConfiguration; +import org.codehaus.groovy.control.ErrorCollector; +import org.codehaus.groovy.control.SourceUnit; + +import java.io.File; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * The base parser for creating a module node. + * The concrete compiler configuration can be specified by the sub-classes of the base parser(e.g. Antlr2Parser, Antlr4Parser) + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +public abstract class AbstractParser { + private static final Logger LOGGER = Logger.getLogger(AbstractParser.class.getName()); + + public ModuleNode parse(File file) { + if (null == file || !file.exists()) { + throw new IllegalArgumentException(file + " does not exist."); + } + + CompilerConfiguration configuration = this.getCompilerConfiguration(); + SourceUnit sourceUnit = new SourceUnit(file, configuration, new GroovyClassLoader(), new ErrorCollector(configuration)); + + return this.parse(sourceUnit); + } + + public ModuleNode parse(String name, String text) { + if (null == name) { + throw new IllegalArgumentException("name should not be null"); + } + + if (null == text) { + throw new IllegalArgumentException("text should not be null"); + } + + CompilerConfiguration configuration = this.getCompilerConfiguration(); + SourceUnit sourceUnit = new SourceUnit(name, text, configuration, new GroovyClassLoader(), new ErrorCollector(configuration)); + + return this.parse(sourceUnit); + } + + public ModuleNode parse(SourceUnit sourceUnit) { + try { + sourceUnit.parse(); + sourceUnit.completePhase(); + sourceUnit.nextPhase(); + sourceUnit.convert(); + + return sourceUnit.getAST(); + } catch (Exception e) { + LOGGER.log(Level.SEVERE, "Failed to parse " + sourceUnit.getName(), e); + + return null; + } + } + + protected abstract CompilerConfiguration getCompilerConfiguration(); +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr2Parser.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr2Parser.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr2Parser.java new file mode 100644 index 0000000..6567896 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr2Parser.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser; + +import org.codehaus.groovy.control.CompilerConfiguration; + +/** + * The Antlr2 parser for creating a module node. + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +public class Antlr2Parser extends AbstractParser { + @Override + protected CompilerConfiguration getCompilerConfiguration() { + return CompilerConfiguration.DEFAULT; + } +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr4Parser.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr4Parser.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr4Parser.java new file mode 100644 index 0000000..ebe569c --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/Antlr4Parser.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser; + +import org.apache.groovy.parser.antlr4.Antlr4PluginFactory; +import org.codehaus.groovy.control.CompilerConfiguration; + +/** + * The Antlr4 parser for creating a module node. + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +public class Antlr4Parser extends AbstractParser { + + @Override + protected CompilerConfiguration getCompilerConfiguration() { + CompilerConfiguration configuration = new CompilerConfiguration(CompilerConfiguration.DEFAULT); + configuration.setPluginFactory(new Antlr4PluginFactory()); + + return configuration; + } +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractLexer.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractLexer.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractLexer.java new file mode 100644 index 0000000..b3781b4 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractLexer.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser.antlr4; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.Lexer; + +/** + * Because antlr4 does not support generating lexer with specified interface, + * we have to create a super class for it and implement the interface. + */ +public abstract class AbstractLexer extends Lexer implements SyntaxErrorReportable { + public AbstractLexer(CharStream input) { + super(input); + } +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractParser.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractParser.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractParser.java new file mode 100644 index 0000000..95f493d --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/AbstractParser.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser.antlr4; + +import org.antlr.v4.runtime.Parser; +import org.antlr.v4.runtime.TokenStream; + +/** + * Because antlr4 does not support generating parser with specified interface, + * we have to create a super class for it and implement the interface. + */ +public abstract class AbstractParser extends Parser implements SyntaxErrorReportable { + public AbstractParser(TokenStream input) { + super(input); + } +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4ParserPlugin.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4ParserPlugin.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4ParserPlugin.java new file mode 100644 index 0000000..4d91d29 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4ParserPlugin.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser.antlr4; + +import org.codehaus.groovy.ast.ModuleNode; +import org.codehaus.groovy.control.CompilationFailedException; +import org.codehaus.groovy.control.ParserPlugin; +import org.codehaus.groovy.control.SourceUnit; +import org.codehaus.groovy.syntax.ParserException; +import org.codehaus.groovy.syntax.Reduction; + +/** + * A parser plugin for the new parser + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +public class Antlr4ParserPlugin implements ParserPlugin { + @Override + public Reduction parseCST(SourceUnit sourceUnit, java.io.Reader reader) throws CompilationFailedException { + return null; + } + + @Override + public ModuleNode buildAST(SourceUnit sourceUnit, java.lang.ClassLoader classLoader, Reduction cst) throws ParserException { + AstBuilder builder = new AstBuilder(sourceUnit, classLoader); + return builder.buildAST(); + } +} http://git-wip-us.apache.org/repos/asf/groovy/blob/2c01e99f/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4PluginFactory.java ---------------------------------------------------------------------- diff --git a/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4PluginFactory.java b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4PluginFactory.java new file mode 100644 index 0000000..10309d0 --- /dev/null +++ b/subprojects/parser-antlr4/src/main/java/org/apache/groovy/parser/antlr4/Antlr4PluginFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.groovy.parser.antlr4; + +import org.codehaus.groovy.control.ParserPlugin; +import org.codehaus.groovy.control.ParserPluginFactory; + +/** + * A parser plugin factory for the new parser + * + * @author <a href="mailto:realblue...@hotmail.com">Daniel.Sun</a> + * Created on 2016/08/14 + */ +public class Antlr4PluginFactory extends ParserPluginFactory { + @Override + public ParserPlugin createParserPlugin() { + return new Antlr4ParserPlugin(); + } +}