This is an automated email from the ASF dual-hosted git repository.
matthiasblaesing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/netbeans.git
The following commit(s) were added to refs/heads/master by this push:
new 18c2e51daf Stabelize html lexer in broken cases
new 5fc81d9194 Merge pull request #5756 from
matthiasblaesing/stabilize_html_lexer
18c2e51daf is described below
commit 18c2e51daf88fe1cbc881aebbb601f938c8f3bea
Author: Matthias Bläsing <[email protected]>
AuthorDate: Fri Mar 31 21:16:59 2023 +0200
Stabelize html lexer in broken cases
Faced with a structure like this:
"<Jm/ a=q</>>V />G>"
The html lexer hit an assertion as it tried to lex "a=q" as an
attribute value. The sequence "<Jm/" at first looks like the start of
a tag, but that is discarded once the "/" is read. Yet the lexer state
is not reset to INIT, but ISP_TAG_X.
Attribute lexing assumes, that a tag was lexed first and that that
information is still present, which not the case here.
---
.../src/org/netbeans/lib/html/lexer/HtmlLexer.java | 6 +-
.../org/netbeans/lib/html/lexer/HtmlLexerTest.java | 121 +++++++++++++--------
2 files changed, 77 insertions(+), 50 deletions(-)
diff --git a/ide/html.lexer/src/org/netbeans/lib/html/lexer/HtmlLexer.java
b/ide/html.lexer/src/org/netbeans/lib/html/lexer/HtmlLexer.java
index 63b49d336a..5d14036d95 100644
--- a/ide/html.lexer/src/org/netbeans/lib/html/lexer/HtmlLexer.java
+++ b/ide/html.lexer/src/org/netbeans/lib/html/lexer/HtmlLexer.java
@@ -792,14 +792,14 @@ public final class HtmlLexer implements
Lexer<HTMLTokenId> {
case ISP_TAG_X_ERROR:
if(isWS(actChar)) {
- lexerState = ISP_TAG_X;
+ lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1); //backup the WS
return token(HTMLTokenId.ERROR);
}
switch(actChar) {
case '/':
case '>':
- lexerState = ISP_TAG_X;
+ lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1); //lets reread the token again
return token(HTMLTokenId.ERROR);
}
@@ -822,7 +822,7 @@ public final class HtmlLexer implements Lexer<HTMLTokenId> {
lexerState = INIT;
return token(HTMLTokenId.TAG_CLOSE_SYMBOL);
default:
- lexerState = ISP_TAG_X;
+ lexerState = tag == null ? INIT : ISP_TAG_X;
input.backup(1);
return token(HTMLTokenId.ERROR);
}
diff --git
a/ide/html.lexer/test/unit/src/org/netbeans/lib/html/lexer/HtmlLexerTest.java
b/ide/html.lexer/test/unit/src/org/netbeans/lib/html/lexer/HtmlLexerTest.java
index 23de053f36..22dca42754 100644
---
a/ide/html.lexer/test/unit/src/org/netbeans/lib/html/lexer/HtmlLexerTest.java
+++
b/ide/html.lexer/test/unit/src/org/netbeans/lib/html/lexer/HtmlLexerTest.java
@@ -375,52 +375,6 @@ public class HtmlLexerTest extends NbTestCase {
"=|OPERATOR", "'test'|VALUE", " |WS", "/>|TAG_CLOSE_SYMBOL");
}
-
-
//--------------------------------------------------------------------------
-
- public static void checkTokens(String text, String... descriptions) {
- TokenHierarchy<String> th = TokenHierarchy.create(text,
HTMLTokenId.language());
- TokenSequence<HTMLTokenId> ts =
th.tokenSequence(HTMLTokenId.language());
-// System.out.println(ts);
- checkTokens(ts, descriptions);
- }
-
- public static void checkTokens(TokenSequence<HTMLTokenId> ts, String...
descriptions) {
- ts.moveStart();
- for(String descr : descriptions) {
- //parse description
- int slashIndex = descr.indexOf('|');
- assert slashIndex >= 0;
-
- String image = descr.substring(0, slashIndex);
- String id = descr.substring(slashIndex + 1);
-
- assertTrue(ts.moveNext());
- Token t = ts.token();
- assertNotNull(t);
-
- if(image.length() > 0) {
- assertEquals(image, t.text().toString());
- }
-
- if(id.length() > 0) {
- assertEquals(id, t.id().name());
- }
- }
-
- StringBuilder b = new StringBuilder();
- while(ts.moveNext()) {
- Token t = ts.token();
- b.append("\"");
- b.append(t.text());
- b.append('|');
- b.append(t.id().name());
- b.append("\"");
- b.append(", ");
- }
- assertTrue("There are some tokens left: " + b.toString(), b.length()
== 0);
- }
-
public void testScriptType_value() {
TokenHierarchy th = TokenHierarchy.create("<script
type=\"text/plain\">plain</script>", HTMLTokenId.language());
TokenSequence ts = th.tokenSequence();
@@ -473,5 +427,78 @@ public class HtmlLexerTest extends NbTestCase {
assertTrue("Couldn't find any SCRIPT token!", false);
}
-
+
+ public void testBrokenCases() {
+ // Two cases that were caught by randomness test and added here as
+ // regression check
+ assertLexesWithoutAssertion("u >z//<=>=>\n"
+ + "= \n"
+ + ">>>/>>//w>\n"
+ + " yl>be<<=k<uA> F > Y <<<\n"
+ + " >Jj/>k >> ==a <z<=z ><=r>>> =N///>>>/><< \n"
+ + "EN>/>r> >p<<<L = > =<g=<C /=/ \n"
+ + " q=R> >B >=>z= />> J o </>/>zn/><<> z/>/>G=>nm< ");
+ assertLexesWithoutAssertion("<Jm/ a=q</>>V />G>< qw\n"
+ + " ></> /F>=< g>ggh i <>PoU =< =eB< < / <<= >><==<>/>
Q></>N=>/>=<>w/>=p //>>sNk <<>=</> >\n"
+ + "== <>Np >= <<</>U yZT />=<e=< >");
+ }
+
+
//--------------------------------------------------------------------------
+
+ public static void checkTokens(String text, String... descriptions) {
+ TokenHierarchy<String> th = TokenHierarchy.create(text,
HTMLTokenId.language());
+ TokenSequence<HTMLTokenId> ts =
th.tokenSequence(HTMLTokenId.language());
+// System.out.println(ts);
+ checkTokens(ts, descriptions);
+ }
+
+ public static void checkTokens(TokenSequence<HTMLTokenId> ts, String...
descriptions) {
+ ts.moveStart();
+ for(String descr : descriptions) {
+ //parse description
+ int slashIndex = descr.indexOf('|');
+ assert slashIndex >= 0;
+
+ String image = descr.substring(0, slashIndex);
+ String id = descr.substring(slashIndex + 1);
+
+ assertTrue(ts.moveNext());
+ Token t = ts.token();
+ assertNotNull(t);
+
+ if(image.length() > 0) {
+ assertEquals(image, t.text().toString());
+ }
+
+ if(id.length() > 0) {
+ assertEquals(id, t.id().name());
+ }
+ }
+
+ StringBuilder b = new StringBuilder();
+ while(ts.moveNext()) {
+ Token t = ts.token();
+ b.append("\"");
+ b.append(t.text());
+ b.append('|');
+ b.append(t.id().name());
+ b.append("\"");
+ b.append(", ");
+ }
+ assertTrue("There are some tokens left: " + b.toString(), b.length()
== 0);
+ }
+
+ @SuppressWarnings({"AssertWithSideEffects", "NestedAssignment"})
+ public void assertLexesWithoutAssertion(String input) {
+ boolean assertionsEnabled = false;
+ assert assertionsEnabled = true;
+ assertTrue("Test must be run with assertions enabled",
assertionsEnabled);
+ TokenHierarchy<String> th = TokenHierarchy.create(input,
HTMLTokenId.language());
+ TokenSequence<HTMLTokenId> ts =
th.tokenSequence(HTMLTokenId.language());
+ // if there is no token sequence, lexing is obviously broken
+ assertNotNull(ts);
+ // iterate the full token sequence, lexing happens on demand, so the
+ // whole sequence needs to be read
+ while(ts.moveNext()) {};
}
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
For further information about the NetBeans mailing lists, visit:
https://cwiki.apache.org/confluence/display/NETBEANS/Mailing+lists