This enhances the HTML parser and makes it more fault tolerant. Until
now HTML like the following would lead to a borked element structure
and/or exceptions beeing thrown:
<ul>
<li>adsa</li>
</li></li>
</ul>
(too many </li> tags).
This patch makes the parser more resistent against such code and yields
the correct structure:
<ul>
<li>adsa</li>
</ul>
... Make the planet renderable again.
2006-11-15 Roman Kennke <[EMAIL PROTECTED]>
* javax/swing/text/html/HTMLDocument.java
(HTMLReader.ParagraphAction.end): Call super instead of blockClose()
directly.
(HTMLReader.ParagraphAction.start): Call super instead of blockOpen()
directly.
(HTMLReader.parseStack): Removed.
(HTMLReader.blockClose): Simply call addContent() with ' '
instead of doing more complicated stuff. Removed parseStack
handling.
(HTMLReader.blockOpen): Removed parseStack handling.
(getInsertingReader): Removed parseStack init.
* gnu/javax/swing/text/html/parser/htmlValidator.java
(closeTag): Return true only when the tag actually should be
closed.
* gnu/javax/swing/text/html/parser/support/Parser.java
(_handleEndTag): Only actually close the tag when the validator
allows it.
/Roman
Index: javax/swing/text/html/HTMLDocument.java
===================================================================
RCS file: /cvsroot/classpath/classpath/javax/swing/text/html/HTMLDocument.java,v
retrieving revision 1.49
diff -u -1 -5 -r1.49 HTMLDocument.java
--- javax/swing/text/html/HTMLDocument.java 11 Nov 2006 11:02:07 -0000 1.49
+++ javax/swing/text/html/HTMLDocument.java 15 Nov 2006 13:31:01 -0000
@@ -511,36 +511,30 @@
* @author Anthony Balkissoon abalkiss at redhat dot com
*/
public class HTMLReader extends HTMLEditorKit.ParserCallback
{
/**
* Holds the current character attribute set *
*/
protected MutableAttributeSet charAttr = new SimpleAttributeSet();
protected Vector parseBuffer = new Vector();
/**
* A stack for character attribute sets *
*/
Stack charAttrStack = new Stack();
-
- /**
- * The parse stack. This stack holds HTML.Tag objects that reflect the
- * current position in the parsing process.
- */
- Stack parseStack = new Stack();
/** A mapping between HTML.Tag objects and the actions that handle them **/
HashMap tagToAction;
/** Tells us whether we've received the '</html>' tag yet **/
boolean endHTMLEncountered = false;
/**
* Related to the constructor with explicit insertTag
*/
int popDepth;
/**
* Related to the constructor with explicit insertTag
*/
@@ -804,41 +798,41 @@
{
blockOpen(HTML.Tag.IMPLIED, new SimpleAttributeSet());
addSpecialElement(t, a);
blockClose(HTML.Tag.IMPLIED);
}
}
public class ParagraphAction extends BlockAction
{
/**
* This method is called when a start tag is seen for one of the types
* of tags associated with this Action.
*/
public void start(HTML.Tag t, MutableAttributeSet a)
{
- blockOpen(t, a);
+ super.start(t, a);
inParagraph = true;
}
/**
* Called when an end tag is seen for one of the types of tags associated
* with this Action.
*/
public void end(HTML.Tag t)
{
- blockClose(t);
+ super.end(t);
inParagraph = false;
}
}
/**
* This action is performed when a <pre> tag is parsed.
*/
public class PreAction extends BlockAction
{
/**
* This method is called when a start tag is seen for one of the types
* of tags associated with this Action.
*/
public void start(HTML.Tag t, MutableAttributeSet a)
{
@@ -1500,31 +1494,30 @@
/**
* Instructs the parse buffer to create a block element with the given
* attributes.
*
* @param t the tag that requires opening a new block
* @param attr the attribute set for the new block
*/
protected void blockOpen(HTML.Tag t, MutableAttributeSet attr)
{
if (inImpliedParagraph)
blockClose(HTML.Tag.IMPLIED);
DefaultStyledDocument.ElementSpec element;
- parseStack.push(t);
AbstractDocument.AttributeContext ctx = getAttributeContext();
AttributeSet copy = attr.copyAttributes();
copy = ctx.addAttribute(copy, StyleConstants.NameAttribute, t);
element = new DefaultStyledDocument.ElementSpec(copy,
DefaultStyledDocument.ElementSpec.StartTagType);
parseBuffer.addElement(element);
}
/**
* Instructs the parse buffer to close the block element associated with
* the given HTML.Tag
*
* @param t the HTML.Tag that is closing its block
*/
protected void blockClose(HTML.Tag t)
@@ -1534,45 +1527,36 @@
if (inImpliedParagraph)
{
inImpliedParagraph = false;
inParagraph = false;
if (t != HTML.Tag.IMPLIED)
blockClose(HTML.Tag.IMPLIED);
}
// If the previous tag is a start tag then we insert a synthetic
// content tag.
DefaultStyledDocument.ElementSpec prev;
prev = (DefaultStyledDocument.ElementSpec)
parseBuffer.get(parseBuffer.size() - 1);
if (prev.getType() == DefaultStyledDocument.ElementSpec.StartTagType)
{
- AbstractDocument.AttributeContext ctx = getAttributeContext();
- AttributeSet attributes = ctx.getEmptySet();
- attributes = ctx.addAttribute(attributes, StyleConstants.NameAttribute,
- HTML.Tag.CONTENT);
- element = new DefaultStyledDocument.ElementSpec(attributes,
- DefaultStyledDocument.ElementSpec.ContentType,
- new char[0], 0, 0);
- parseBuffer.add(element);
+ addContent(new char[]{' '}, 0, 1);
}
element = new DefaultStyledDocument.ElementSpec(null,
DefaultStyledDocument.ElementSpec.EndTagType);
parseBuffer.addElement(element);
- if (parseStack.size() > 0)
- parseStack.pop();
}
/**
* Adds text to the appropriate context using the current character
* attribute set.
*
* @param data the text to add
* @param offs the offset at which to add it
* @param length the length of the text to add
*/
protected void addContent(char[] data, int offs, int length)
{
addContent(data, offs, length, true);
}
@@ -1722,34 +1706,30 @@
{
if (t != HTML.Tag.BODY)
super.handleStartTag(t, a, pos);
}
/**
* Ignore BODY.
*/
public void handleEndTag(HTML.Tag t, int pos)
{
if (t != HTML.Tag.BODY)
super.handleEndTag(t, pos);
}
};
- // Set the parent HTML tag.
- reader.parseStack.push(parent.getAttributes().getAttribute(
- StyleConstants.NameAttribute));
-
return reader;
}
/**
* Gets the child element that contains the attribute with the value or null.
* Not thread-safe.
*
* @param e - the element to begin search at
* @param attribute - the desired attribute
* @param value - the desired value
* @return the element found with the attribute and value specified or null if
* it is not found.
*/
public Element getElement(Element e, Object attribute, Object value)
{
Index: gnu/javax/swing/text/html/parser/htmlValidator.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java,v
retrieving revision 1.5
diff -u -1 -5 -r1.5 htmlValidator.java
--- gnu/javax/swing/text/html/parser/htmlValidator.java 16 Jul 2006 18:25:53 -0000 1.5
+++ gnu/javax/swing/text/html/parser/htmlValidator.java 15 Nov 2006 13:31:02 -0000
@@ -141,31 +141,31 @@
{
h = (hTag) stack.getLast();
if (!h.forcibly_closed && !h.element.omitEnd())
s_error("Unclosed <" + h.tag + ">, closing at the end of stream");
handleSupposedEndTag(h.element);
closeTag(h.tgElement);
}
}
/**
* Remove the given tag from the stack or (if found) from the list
* of the forcibly closed tags.
*/
- public void closeTag(TagElement tElement)
+ public boolean closeTag(TagElement tElement)
{
HTML.Tag tag = tElement.getHTMLTag();
hTag x;
hTag close;
if (!stack.isEmpty())
{
ListIterator iter = stack.listIterator(stack.size());
while (iter.hasPrevious())
{
x = (hTag) iter.previous();
if (tag.equals(x.tag))
{
if (x.forcibly_closed && !x.element.omitEnd())
@@ -179,35 +179,36 @@
closing:
if (x.element.content != null)
{
iter = stack.listIterator(stack.size());
while (iter.hasPrevious())
{
close = (hTag) iter.previous();
if (close == x)
break closing;
handleSupposedEndTag(close.element);
iter.remove();
}
}
stack.remove(x);
- return;
+ return true;
}
}
}
s_error("Closing unopened <" + tag + ">");
+ return false;
}
/**
* Add the given HTML tag to the stack of the opened tags. Forcibly closes
* all tags in the stack that does not allow this tag in they content (error
* is reported).
* @param element
*/
public void openTag(TagElement tElement, htmlAttributeSet parameters)
{
// If this is a fictional call, the message from the parser
// has recursively returned - ignore.
if (tElement.fictional())
return;
Index: gnu/javax/swing/text/html/parser/support/Parser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/javax/swing/text/html/parser/support/Parser.java,v
retrieving revision 1.10
diff -u -1 -5 -r1.10 Parser.java
--- gnu/javax/swing/text/html/parser/support/Parser.java 15 Nov 2006 10:52:16 -0000 1.10
+++ gnu/javax/swing/text/html/parser/support/Parser.java 15 Nov 2006 13:31:02 -0000
@@ -1188,32 +1188,32 @@
}
catch (ChangedCharSetException ex)
{
error("Changed charset exception:", ex.getMessage());
}
}
/**
* A hooks for operations, preceeding call to handleEndTag().
* The method is called when the HTML closing tag
* is found. Calls handleTitle after closing the 'title' tag.
* @param The tag
*/
private void _handleEndTag(TagElement tag)
{
- validator.closeTag(tag);
- _handleEndTag_remaining(tag);
+ if (validator.closeTag(tag))
+ _handleEndTag_remaining(tag);
}
/**
* Actions that are also required if the closing action was
* initiated by the tag validator.
* Package-private to avoid an accessor method.
*/
void _handleEndTag_remaining(TagElement tag)
{
HTML.Tag h = tag.getHTMLTag();
handleEndTag(tag);
endTag(tag.fictional());
if (h.isPreformatted())
Index: gnu/javax/swing/text/html/parser/support/textPreProcessor.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java,v
retrieving revision 1.3
diff -u -1 -5 -r1.3 textPreProcessor.java
--- gnu/javax/swing/text/html/parser/support/textPreProcessor.java 3 Sep 2006 20:42:43 -0000 1.3
+++ gnu/javax/swing/text/html/parser/support/textPreProcessor.java 15 Nov 2006 13:31:02 -0000
@@ -53,46 +53,38 @@
* consumed. The content of the passed buffer is destroyed.
*
* @param a_text A text to pre-process.
*/
public char[] preprocess(StringBuffer a_text)
{
if (a_text.length() == 0)
return null;
char[] text = toCharArray(a_text);
int a = 0;
int b = text.length - 1;
// Remove leading/trailing whitespace, leaving at most one character
- try
- {
- while (Constants.bWHITESPACE.get(text[a])
- && Constants.bWHITESPACE.get(text[a + 1]))
- a++;
+ int len = text.length;
+ while (a + 1 < len && Constants.bWHITESPACE.get(text[a])
+ && Constants.bWHITESPACE.get(text[a + 1]))
+ a++;
- while (b > a && Constants.bWHITESPACE.get(text[b])
+ while (b > a && Constants.bWHITESPACE.get(text[b])
&& Constants.bWHITESPACE.get(text[b - 1]))
- b--;
- }
- catch (ArrayIndexOutOfBoundsException sx)
- {
- // A text fragment, consisting from spaces and line breaks only,
- // mutates into single space.
- return new char[] { ' ' };
- }
+ b--;
a_text.setLength(0);
boolean spacesWere = false;
boolean spaceNow;
char c;
chars: for (int i = a; i <= b; i++)
{
c = text[i];
spaceNow = Constants.bWHITESPACE.get(c);
if (spacesWere && spaceNow)
continue chars;
if (spaceNow)
a_text.append(' ');