This is an automated email from the ASF dual-hosted git repository.

emilles pushed a commit to branch GroovyLexer4
in repository https://gitbox.apache.org/repos/asf/groovy.git

commit 87bf88b640c17d3247bfa796b1192c3a96b14ad0
Author: Eric Milles <[email protected]>
AuthorDate: Fri Mar 20 10:01:41 2020 -0500

    GroovyLexer: parse error for control and format characters
    
    Inspired by: https://stackoverflow.com/a/59984875
    
    See also: https://github.com/groovy/groovy-eclipse/issues/1060
---
 src/antlr/GroovyLexer.g4                           |  4 +--
 .../groovy/parser/antlr4/SyntaxErrorTest.groovy    | 32 ++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/antlr/GroovyLexer.g4 b/src/antlr/GroovyLexer.g4
index fd930cb..e238082 100644
--- a/src/antlr/GroovyLexer.g4
+++ b/src/antlr/GroovyLexer.g4
@@ -891,7 +891,7 @@ JavaLetter
     :   [a-zA-Z$_] // these are the "java letters" below 0x7F
     |   // covers all characters above 0x7F which are not a surrogate
         ~[\u0000-\u007F\uD800-\uDBFF]
-        { Character.isJavaIdentifierStart(_input.LA(-1)) }?
+        { Character.isJavaIdentifierStart(_input.LA(-1)) && 
!Character.isIdentifierIgnorable(_input.LA(-1)) }?
     |   // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
         [\uD800-\uDBFF] [\uDC00-\uDFFF]
         { Character.isJavaIdentifierStart(Character.toCodePoint((char) 
_input.LA(-2), (char) _input.LA(-1))) }?
@@ -907,7 +907,7 @@ JavaLetterOrDigit
     :   [a-zA-Z0-9$_] // these are the "java letters or digits" below 0x7F
     |   // covers all characters above 0x7F which are not a surrogate
         ~[\u0000-\u007F\uD800-\uDBFF]
-        { Character.isJavaIdentifierPart(_input.LA(-1)) }?
+        { Character.isJavaIdentifierPart(_input.LA(-1)) && 
!Character.isIdentifierIgnorable(_input.LA(-1)) }?
     |   // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
         [\uD800-\uDBFF] [\uDC00-\uDFFF]
         { Character.isJavaIdentifierPart(Character.toCodePoint((char) 
_input.LA(-2), (char) _input.LA(-1))) }?
diff --git 
a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy
 
b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy
index 6d15e98..eae298a 100644
--- 
a/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy
+++ 
b/subprojects/parser-antlr4/src/test/groovy/org/apache/groovy/parser/antlr4/SyntaxErrorTest.groovy
@@ -75,6 +75,38 @@ final class SyntaxErrorTest extends GroovyTestCase {
         TestUtils.doRunAndShouldFail('fail/UnexpectedCharacter_01x.groovy')
     }
 
+    void 'test groovy core - UnexpectedCharacter 2'() {
+        def err = expectParseError '''\
+            |def \u200Bname = null
+            |'''.stripMargin()
+
+        // TODO: Could the character be escaped in the error message?
+        assert err == '''\
+            |startup failed:
+            |test.groovy: 1: Unexpected input: 'def \u200B' @ line 1, column 5.
+            |   def \u200Bname = null
+            |       ^
+            |
+            |1 error
+            |'''.stripMargin()
+
+        //
+
+        err = expectParseError '''\
+            |def na\u200Bme = null
+            |'''.stripMargin()
+
+        // TODO: Could the character be escaped in the error message?
+        assert err == '''\
+            |startup failed:
+            |test.groovy: 1: Unexpected input: '\u200B'; Expecting <EOF> @ 
line 1, column 7.
+            |   def na\u200Bme = null
+            |         ^
+            |
+            |1 error
+            |'''.stripMargin()
+    }
+
     void 'test groovy core - ParExpression'() {
         TestUtils.doRunAndShouldFail('fail/ParExpression_01x.groovy')
         TestUtils.doRunAndShouldFail('fail/ParExpression_02x.groovy')

Reply via email to