This is an automated email from the ASF dual-hosted git repository.

lkishalmi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/netbeans.git


The following commit(s) were added to refs/heads/master by this push:
     new 595660f081 Made ANTLR Token LexerInputCharStream Friendly
595660f081 is described below

commit 595660f0811b0d24846ea91bd0a41060dcd652d7
Author: Laszlo Kishalmi <laszlo.kisha...@gmail.com>
AuthorDate: Fri Jan 13 10:31:19 2023 -0800

    Made ANTLR Token LexerInputCharStream Friendly
---
 .../netbeans/modules/languages/toml/TomlLexer.java |  4 +-
 .../spi/lexer/antlr4/AbstractAntlrLexerBridge.java | 74 ++++++++++++++++++++--
 .../modules/languages/antlr/v3/Antlr3Lexer.java    |  4 +-
 .../modules/languages/antlr/v4/Antlr4Lexer.java    |  4 +-
 4 files changed, 75 insertions(+), 11 deletions(-)

diff --git 
a/ide/languages.toml/src/org/netbeans/modules/languages/toml/TomlLexer.java 
b/ide/languages.toml/src/org/netbeans/modules/languages/toml/TomlLexer.java
index 451242b8f0..e4fc2f6b2e 100644
--- a/ide/languages.toml/src/org/netbeans/modules/languages/toml/TomlLexer.java
+++ b/ide/languages.toml/src/org/netbeans/modules/languages/toml/TomlLexer.java
@@ -37,8 +37,8 @@ public final class TomlLexer extends 
AbstractAntlrLexerBridge<org.tomlj.internal
     }
 
     @Override
-    protected Token<TomlTokenId> mapToken(int antlrTokenType) {
-        switch (antlrTokenType) {
+    protected Token<TomlTokenId> mapToken(org.antlr.v4.runtime.Token 
antlrToken) {
+        switch (antlrToken.getType()) {
             case EOF:
                 return null;
 
diff --git 
a/ide/lexer.antlr4/src/org/netbeans/spi/lexer/antlr4/AbstractAntlrLexerBridge.java
 
b/ide/lexer.antlr4/src/org/netbeans/spi/lexer/antlr4/AbstractAntlrLexerBridge.java
index 45ba32978d..1fdf66ab5c 100644
--- 
a/ide/lexer.antlr4/src/org/netbeans/spi/lexer/antlr4/AbstractAntlrLexerBridge.java
+++ 
b/ide/lexer.antlr4/src/org/netbeans/spi/lexer/antlr4/AbstractAntlrLexerBridge.java
@@ -20,10 +20,14 @@ package org.netbeans.spi.lexer.antlr4;
 
 import java.util.function.Function;
 import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonToken;
 import org.antlr.v4.runtime.misc.IntegerList;
 import org.netbeans.api.lexer.Token;
 import org.antlr.v4.runtime.Lexer;
 import static org.antlr.v4.runtime.Recognizer.EOF;
+import org.antlr.v4.runtime.TokenSource;
+import org.antlr.v4.runtime.misc.Interval;
+import org.antlr.v4.runtime.misc.Pair;
 import org.netbeans.api.lexer.TokenId;
 import org.netbeans.spi.lexer.LexerRestartInfo;
 import org.netbeans.spi.lexer.TokenFactory;
@@ -58,6 +62,8 @@ public abstract class AbstractAntlrLexerBridge<L extends 
Lexer, T extends TokenI
         this.tokenFactory = info.tokenFactory();
         this.input = new LexerInputCharStream(info.input());
         this.lexer = lexerCreator.apply(input);
+        lexer.setTokenFactory(FIXED_TOKEN_FACTORY);
+
         if (info.state() != null) {
             ((LexerState<L>) info.state()).restore(lexer);
         }
@@ -77,15 +83,15 @@ public abstract class AbstractAntlrLexerBridge<L extends 
Lexer, T extends TokenI
         } else {
             nextToken = nextRealToken();
         }
-        return nextToken.getType() != EOF ? mapToken(nextToken.getType()) : 
null;
+        return nextToken.getType() != EOF ? mapToken(nextToken) : null;
     }
 
     /**
      * Implementations shall provide a suitable mapping between ANTLR lexer
-     * token types and NetBeans lexer tokens. The mapping is usually many to 
one,
+     * tokens and NetBeans lexer tokens. The mapping is usually many to one,
      * could be implemented as:
      * <pre>{@code
-     * switch (antlrTokenType) {
+     * switch (antlrToken.getType()) {
      *      case DOC_COMMENT:
      *      case BLOCK_COMMENT:
      *      case LINE_COMMENT:
@@ -97,11 +103,11 @@ public abstract class AbstractAntlrLexerBridge<L extends 
Lexer, T extends TokenI
      *          return token(SomeTokenId.ERROR);
      *  }
      * }</pre>
-     * @param antlrTokenType the token type from the ANTLR Lexer
+     * @param antlrToken the token from the ANTLR Lexer
      *
      * @return a NetBeans lexer token.
      */
-    protected abstract Token<T> mapToken(int antlrTokenType);
+    protected abstract Token<T> mapToken(org.antlr.v4.runtime.Token 
antlrToken);
 
     @Override
     /**
@@ -182,4 +188,62 @@ public abstract class AbstractAntlrLexerBridge<L extends 
Lexer, T extends TokenI
         }
     }
 
+    private static final org.antlr.v4.runtime.TokenFactory<FixedToken> 
FIXED_TOKEN_FACTORY = new org.antlr.v4.runtime.TokenFactory<FixedToken>() {
+        @Override
+        public FixedToken create(Pair<TokenSource, CharStream> source, int 
type, String text, 
+                int channel, int start, int stop, int line, int 
charPositionInLine) {
+
+            FixedToken token = new FixedToken(source, type, channel, start, 
stop);
+            token.setLine(line);
+            token.setCharPositionInLine(charPositionInLine);
+            token.setText(text);
+            return token;
+        }
+
+        @Override
+        public FixedToken create(int type, String text) {
+            return new FixedToken(type, text);
+        }
+
+    };
+
+    private static final class FixedToken extends CommonToken {
+
+        public FixedToken(Pair<TokenSource, CharStream> source, int type, int 
channel, int start, int stop) {
+            super(source, type, channel, start, stop);
+        }
+
+        public FixedToken(int type, String text) {
+            super(type, text);
+        }
+
+       @Override
+        public String getText() {
+            if (text != null ) {
+                return text;
+            }
+
+            CharStream input = getInputStream();
+            if (input != null ) {
+                // The original implementation in CommonToken does not honor 
the
+                // contract with UnsupportedOperationException on 
CharStream.size()
+                // and CharStream.getText which renders CommonToken broken on
+                // getText() calls. That makes toString() unusable when using
+                // LexerInputCharStream as well.
+                //
+                // While the stream size is unknown, and the getText() is 
somewhat
+                // limited in the LexerInputCharStream implementation. There is
+                // a good chance that the following call would go through.
+                try {
+                    return input.getText(Interval.of(start, stop));
+                } catch (UnsupportedOperationException ex) {
+                    // The original implementation returns "<EOF>" when EOF
+                    // is reached. As the situation here is not really known
+                    // returning an "<N/A>" looks as good as "<EOF>"
+                    return "<N/A>";
+                }
+            }
+            return null;
+        }
+    }
 }
diff --git 
a/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v3/Antlr3Lexer.java
 
b/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v3/Antlr3Lexer.java
index 876c59f57c..6b84425a58 100644
--- 
a/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v3/Antlr3Lexer.java
+++ 
b/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v3/Antlr3Lexer.java
@@ -44,8 +44,8 @@ public final class Antlr3Lexer extends 
AbstractAntlrLexerBridge<ANTLRv3Lexer, An
     }
 
     @Override
-    protected Token<AntlrTokenId> mapToken(int antlrTokenType) {
-        switch (antlrTokenType) {
+    protected Token<AntlrTokenId> mapToken(org.antlr.v4.runtime.Token 
antlrToken) {
+        switch (antlrToken.getType()) {
             case TOKEN_REF:
                 return token(AntlrTokenId.TOKEN);
             case RULE_REF:
diff --git 
a/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v4/Antlr4Lexer.java
 
b/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v4/Antlr4Lexer.java
index 0dabcb6a71..ac46026c72 100644
--- 
a/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v4/Antlr4Lexer.java
+++ 
b/java/languages.antlr/src/org/netbeans/modules/languages/antlr/v4/Antlr4Lexer.java
@@ -44,8 +44,8 @@ public final class Antlr4Lexer extends 
AbstractAntlrLexerBridge<ANTLRv4Lexer, An
     }
 
     @Override
-    protected Token<AntlrTokenId> mapToken(int antlrTokenType) {
-        switch (antlrTokenType) {
+    protected Token<AntlrTokenId> mapToken(org.antlr.v4.runtime.Token 
antlrToken) {
+        switch (antlrToken.getType()) {
             case TOKEN_REF:
                 return token(TOKEN);
             case RULE_REF:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@netbeans.apache.org
For additional commands, e-mail: commits-h...@netbeans.apache.org

For further information about the NetBeans mailing lists, visit:
https://cwiki.apache.org/confluence/display/NETBEANS/Mailing+lists

Reply via email to