Ok,
even though the patch didn't work - I applied it line by line for the
parser as I needed this thing working right now.
Can you redo the test patch in any case?
regards,
Martin
On 11/1/05, Martin Marinschek <[EMAIL PROTECTED]> wrote:
> Simon,
>
> I don't seem to be able to apply your patch again - an 'unknown line
> type was found in line 12'.
>
> Can you do it again - and attach it to our old jira-issue, I have
> reopened it for this purpose.
>
> regards,
>
> Martin
>
> On 11/1/05, Simon Kitching <[EMAIL PROTECTED]> wrote:
> > Martin Marinschek wrote:
> > > Don't stress yourself - it's just the nightly build, so not to big of a
> > > problem.
> >
> > Thanks, but it's hopefully done anyway.
> >
> > changes:
> > * Handle DOCTYPE and Processing Instruction commands in input HTML
> > * Track line# of input for error messages
> > * Remove some debugging printlns
> >
> > I can also provide a patch soon to format the code to the MyFaces
> > convention rather than the Sun convention if you wish. Sorry, my Eclipse
> > is set up to format stuff that way automatically and I forgot to
> > reformat before posting.
> >
> > Regards,
> >
> > Simon
> >
> >
> > Index: ReducedHTMLParser.java
> > ===================================================================
> > --- ReducedHTMLParser.java (revision 329922)
> > +++ ReducedHTMLParser.java (working copy)
> > @@ -49,6 +49,7 @@
> > private static final int STATE_IN_TAG = 2;
> >
> > private int offset;
> > + private int lineNumber;
> > private CharSequence seq;
> > private CallbackListener listener;
> >
> > @@ -75,15 +76,32 @@
> > return offset >= seq.length();
> > }
> >
> > + int getCurrentLineNumber() {
> > + return lineNumber;
> > + }
> > +
> > /**
> > * Advance the current parse position over any whitespace characters.
> > */
> > void consumeWhitespace() {
> > + boolean crSeen = false;
> > +
> > while (offset < seq.length()) {
> > char c = seq.charAt(offset);
> > if (!Character.isWhitespace(c)) {
> > break;
> > }
> > +
> > + // Track line number for error messages.
> > + if (c == '\r') {
> > + ++lineNumber;
> > + crSeen = true;
> > + } else if ((c == '\n') && !crSeen) {
> > + ++lineNumber;
> > + } else {
> > + crSeen = false;
> > + }
> > +
> > ++offset;
> > }
> > }
> > @@ -193,6 +211,10 @@
> > // TODO: should we consider a string to be terminated by a newline?
> > // that would help with runaway strings but I think that multiline
> > // strings *are* allowed...
> > + //
> > + // TODO: detect newlines within strings and increment lineNumber.
> > + // This isn't so important, though; they aren't common and being a
> > + // few lines out in an error message isn't serious either.
> > StringBuffer stringBuf = new StringBuffer();
> > boolean escaping = false;
> > while (!isFinished()) {
> > @@ -248,6 +270,8 @@
> > * @param s is a set of characters that should not be discarded.
> > */
> > void consumeExcept(String s) {
> > + boolean crSeen = false;
> > +
> > while (offset < seq.length()) {
> > char c = seq.charAt(offset);
> > if (s.indexOf(c) >= 0) {
> > @@ -255,6 +279,16 @@
> > return;
> > }
> >
> > + // Track line number for error messages.
> > + if (c == '\r') {
> > + ++lineNumber;
> > + crSeen = true;
> > + } else if ((c == '\n') && !crSeen) {
> > + ++lineNumber;
> > + } else {
> > + crSeen = false;
> > + }
> > +
> > ++offset;
> > }
> > }
> > @@ -269,6 +303,7 @@
> > int currentTagStart = -1;
> > String currentTagName = null;
> >
> > + lineNumber = 1;
> > offset = 0;
> > while (offset < seq.length())
> > {
> > @@ -282,6 +317,10 @@
> > if (consumeMatch("<!--")) {
> > // VERIFY: can "< ! --" start a comment?
> > state = STATE_IN_COMMENT;
> > + } else if (consumeMatch("<!")) {
> > + // xml processing instruction or <!DOCTYPE> tag
> > + // we don't need to actually do anything here
> > + log.debug("PI found at line " +
> > getCurrentLineNumber());
> > } else if (consumeMatch("</")) {
> > // VERIFY: is "< / foo >" a valid end-tag?
> >
> > @@ -306,10 +345,17 @@
> > // the current info until the end of this tag.
> > currentTagStart = offset - 1;
> > currentTagName = consumeElementName();
> > - state = STATE_IN_TAG;
> > + if (currentTagName == null) {
> > + log.warn("Invalid HTML; bare lessthan sign found
> > at line "
> > + + getCurrentLineNumber());
> > + // remain in STATE_READY; this isn't really the
> > start of
> > + // an xml element.
> > + } else {
> > + state = STATE_IN_TAG;
> > + }
> > } else {
> > // should never get here
> > - throw new Error("Internal error");
> > + throw new Error("Internal error at line " +
> > getCurrentLineNumber());
> > }
> >
> > continue;
> > @@ -378,7 +424,6 @@
> > */
> > void openedTag(int startOffset, int endOffset, String tagName) {
> > log.debug("Found open tag at " + startOffset + ":" + endOffset +
> > ":" + tagName);
> > - System.out.println("Found open tag at " + startOffset + ":" +
> > endOffset + ":" + tagName);
> >
> > if ("head".equalsIgnoreCase(tagName)) {
> > listener.openedStartTag(startOffset, HEAD_TAG);
> > @@ -394,7 +439,6 @@
> >
> > void closedTag(int startOffset, int endOffset, String tagName) {
> > log.debug("Found close tag at " + startOffset + ":" + endOffset +
> > ":" + tagName);
> > - System.out.println("Found close tag at " + startOffset + ":" +
> > endOffset + ":" + tagName);
> >
> > if ("head".equalsIgnoreCase(tagName)) {
> > listener.openedEndTag(startOffset, HEAD_TAG);
> >
> >
> > Index: ReducedHTMLParserTest.java
> > ===================================================================
> > --- ReducedHTMLParserTest.java (revision 329925)
> > +++ ReducedHTMLParserTest.java (working copy)
> > @@ -322,8 +322,19 @@
> > parser.consumeExcept("z");
> > }
> >
> > + // test parsing completes when invalid tag found.
> > + public void testParseBadTag() {
> > + String s = "xxxx \n\n <# \n\n";
> > + CallbackListener listener = new ParseCallbackListener();
> > + ReducedHTMLParser parser = new ReducedHTMLParser(s, listener);
> > +
> > + parser.parse();
> > + assertTrue(parser.isFinished());
> > + }
> > +
> > // test the full parse method
> > public void testParse() {
> > + String s0 = "<!DOCTYPE PUBLIC \"sss\" \"http:foo\">\n";
> > String s1 = "<html><head>";
> > String s2 = "\n<!-- a comment --><title>foo</title>";
> > String s3 = "</head>";
> > @@ -338,6 +349,7 @@
> > String s8 = "</body> </html>";
> >
> > StringBuffer buf = new StringBuffer();
> > + buf.append(s0);
> > buf.append(s1);
> > buf.append(s2);
> > buf.append(s3);
> > @@ -354,13 +366,13 @@
> >
> > // check that listener has correctly computed the offset to the
> > char just
> > // before the </head> tag starts.
> > - int afterHeadPos = s1.length();
> > + int afterHeadPos = s0.length() + s1.length();
> > assertEquals("Pos after <head> tag ", afterHeadPos,
> > listener.headerInsertPosition);
> >
> > - int beforeBodyPos = s1.length() + s2.length() + s3.length();
> > + int beforeBodyPos = afterHeadPos + s2.length() + s3.length();
> > assertEquals("Pos before <body> tag", beforeBodyPos,
> > listener.beforeBodyPosition);
> >
> > - int afterBodyPos = s1.length() + s2.length() + s3.length() +
> > s4.length();
> > + int afterBodyPos = beforeBodyPos + s4.length();
> > assertEquals("Pos after <body> tag", afterBodyPos,
> > listener.bodyInsertPosition);
> > }
> > }
> >
> >
> >
>
>
> --
>
> http://www.irian.at
> Your JSF powerhouse -
> JSF Trainings in English and German
>
--
http://www.irian.at
Your JSF powerhouse -
JSF Trainings in English and German