morten 01/11/22 05:08:23 Modified: java/src/org/apache/xalan/xsltc/compiler Choose.java Parser.java SyntaxTreeNode.java Text.java java/src/org/apache/xalan/xsltc/runtime TextOutput.java Log: Fix for a whole wack of bugs related to text-nodes and handling of whitespace and special characters. PR: bugzilla 1403, 1520, 3005, 3418 and 3690 Obtained from: n/a Submitted by: [EMAIL PROTECTED] Reviewed by: [EMAIL PROTECTED] Revision Changes Path 1.5 +4 -1 xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java Index: Choose.java =================================================================== RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- Choose.java 2001/10/29 11:47:25 1.4 +++ Choose.java 2001/11/22 13:08:23 1.5 @@ -1,5 +1,5 @@ /* - * @(#)$Id: Choose.java,v 1.4 2001/10/29 11:47:25 morten Exp $ + * @(#)$Id: Choose.java,v 1.5 2001/11/22 13:08:23 morten Exp $ * * The Apache Software License, Version 1.1 * @@ -112,6 +112,9 @@ error = new ErrorMsg(ErrorMsg.MULTIPLE_OTHERWISE_ERR, this); getParser().reportError(Constants.ERROR, error); } + } + else if (element instanceof Text) { + ((Text)element).ignore(); } // It is an error if we find some other element here else { 1.36 +16 -10 xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java Index: Parser.java =================================================================== RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java,v retrieving revision 1.35 retrieving revision 1.36 diff -u -r1.35 -r1.36 --- Parser.java 2001/11/09 15:14:21 1.35 +++ Parser.java 2001/11/22 13:08:23 1.36 @@ -1,5 +1,5 @@ /* - * @(#)$Id: Parser.java,v 1.35 2001/11/09 15:14:21 tmiller Exp $ + * @(#)$Id: Parser.java,v 1.36 2001/11/22 13:08:23 morten Exp $ * * The Apache Software License, Version 1.1 * @@ -1148,23 +1148,29 @@ String string = new String(ch, start, length); SyntaxTreeNode parent = (SyntaxTreeNode)_parentStack.peek(); + if (string.length() == 0) return; + // If this text occurs within an <xsl:text> element we append it // as-is to the existing text element if (parent instanceof Text) { - if (string.length() > 0) { - ((Text)parent).setText(string); - } + ((Text)parent).setText(string); + return; } + // Ignore text nodes that occur directly under <xsl:stylesheet> - else if (parent instanceof Stylesheet) { + if (parent instanceof Stylesheet) return; - } - // Add it as a regular text node otherwise - else { - if (string.trim().length() > 0) { - parent.addElement(new Text(string)); + SyntaxTreeNode bro = parent.lastChild(); + if ((bro != null) && (bro instanceof Text)) { + Text text = (Text)bro; + if (!text.isTextElement()) { + text.setText(string); + return; } } + + // Add it as a regular text node otherwise + parent.addElement(new Text(string)); } private String getTokenValue(String token) { 1.16 +10 -1 xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java Index: SyntaxTreeNode.java =================================================================== RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- SyntaxTreeNode.java 2001/11/08 10:23:39 1.15 +++ SyntaxTreeNode.java 2001/11/22 13:08:23 1.16 @@ -1,5 +1,5 @@ /* - * @(#)$Id: SyntaxTreeNode.java,v 1.15 2001/11/08 10:23:39 morten Exp $ + * @(#)$Id: SyntaxTreeNode.java,v 1.16 2001/11/22 13:08:23 morten Exp $ * * The Apache Software License, Version 1.1 * @@ -705,6 +705,15 @@ */ protected final Object elementAt(int pos) { return _contents.elementAt(pos); + } + + /** + * Returns this element's last child + * @return The child node. + */ + protected final SyntaxTreeNode lastChild() { + if (_contents.size() == 0) return null; + return (SyntaxTreeNode)_contents.lastElement(); } /** 1.8 +32 -5 xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java Index: Text.java =================================================================== RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- Text.java 2001/11/08 10:23:39 1.7 +++ Text.java 2001/11/22 13:08:23 1.8 @@ -1,5 +1,5 @@ /* - * @(#)$Id: Text.java,v 1.7 2001/11/08 10:23:39 morten Exp $ + * @(#)$Id: Text.java,v 1.8 2001/11/22 13:08:23 morten Exp $ * * The Apache Software License, Version 1.1 * @@ -71,13 +71,17 @@ import org.apache.xalan.xsltc.compiler.util.*; final class Text extends Instruction { + private String _text; private boolean _escaping = true; + private boolean _ignore = false; + private boolean _textElement = false; /** * Create a blank Text syntax tree node. */ public Text() { + _textElement = true; } /** @@ -117,10 +121,33 @@ public void parseContents(Parser parser) { final String str = getAttribute("disable-output-escaping"); - if ((str != null) && (str.equals("yes"))) { - _escaping = false; - } + if ((str != null) && (str.equals("yes"))) _escaping = false; + parseChildren(parser); + + if (_text == null) { + _ignore = true; + } + else if (_textElement) { + if (_text.length() == 0) _ignore = true; + } + else if (getParent() instanceof LiteralElement) { + LiteralElement element = (LiteralElement)getParent(); + String space = element.getAttribute("xml:space"); + if ((space == null) || (!space.equals("preserve"))) + if (_text.trim().length() == 0) _ignore = true; + } + else { + if (_text.trim().length() == 0) _ignore = true; + } + } + + public void ignore() { + _ignore = true; + } + + public boolean isTextElement() { + return _textElement; } protected boolean contextDependent() { @@ -131,7 +158,7 @@ final ConstantPoolGen cpg = classGen.getConstantPool(); final InstructionList il = methodGen.getInstructionList(); - if (_text != null && _text.length() > 0) { + if (!_ignore) { // Turn off character escaping if so is wanted. final int esc = cpg.addInterfaceMethodref(OUTPUT_HANDLER, "setEscaping", "(Z)Z"); 1.44 +127 -56 xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java Index: TextOutput.java =================================================================== RCS file: /home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java,v retrieving revision 1.43 retrieving revision 1.44 diff -u -r1.43 -r1.44 --- TextOutput.java 2001/11/21 10:54:01 1.43 +++ TextOutput.java 2001/11/22 13:08:23 1.44 @@ -1,5 +1,5 @@ /* - * @(#)$Id: TextOutput.java,v 1.43 2001/11/21 10:54:01 morten Exp $ + * @(#)$Id: TextOutput.java,v 1.44 2001/11/22 13:08:23 morten Exp $ * * The Apache Software License, Version 1.1 * @@ -124,7 +124,12 @@ private static final int BEGCOMM_length = BEGCOMM.length; private static final int ENDCOMM_length = ENDCOMM.length; - private static final String EMPTYSTRING = ""; + private static final String EMPTYSTRING = ""; + private static final String HREF_STR = "href"; + private static final String SRC_STR = "str"; + private static final String CHAR_ESC_START = "&#"; + private static final String CDATA_ESC_START = "]]>&#"; + private static final String CDATA_ESC_END = ";<![CDATA["; private AttributeList _attributes = new AttributeList(); private String _elementName = null; @@ -361,54 +366,6 @@ } /** - * Utility method - escape special characters and pass to SAX handler - */ - private void escapeCharacters(char[] ch, int off, int len) - throws SAXException { - - int limit = off + len; - int offset = off; - - if (limit > ch.length) limit = ch.length;; - - // Step through characters and escape all special characters - for (int i = off; i < limit; i++) { - switch (ch[i]) { - case '&': - _saxHandler.characters(ch, offset, i - offset); - _saxHandler.characters(AMP, 0, AMP_length); - offset = i + 1; - break; - /* Quotes should only be escaped inside attribute values - case '"': - _saxHandler.characters(ch, offset, i - offset); - _saxHandler.characters(QUOTE, 0, QUOTE_length); - offset = i + 1; - break; - */ - case '<': - _saxHandler.characters(ch, offset, i - offset); - _saxHandler.characters(LT, 0, LT_length); - offset = i + 1; - break; - case '>': - _saxHandler.characters(ch, offset, i - offset); - _saxHandler.characters(GT, 0, GT_length); - offset = i + 1; - break; - case '\u00a0': - _saxHandler.characters(ch, offset, i - offset); - _saxHandler.characters(NBSP, 0, NBSP_length); - offset = i + 1; - break; - } - // TODO - more characters need escaping!!! - } - // Output remaining characters (that do not need escaping). - if (offset < limit) _saxHandler.characters(ch, offset, limit - offset); - } - - /** * Utility method - pass a whole charactes as CDATA to SAX handler */ private void startCDATA(char[] ch, int off, int len) throws SAXException { @@ -464,7 +421,10 @@ startCDATA(ch, off, len); // Output characters escaped if required. else if (_escapeChars) - escapeCharacters(ch, off, len); + if (_cdataTagOpen) + escapeCDATA(ch, off, len); + else + escapeCharacters(ch, off, len); // Output the chracters as the are if not. else _saxHandler.characters(ch, off, len); @@ -582,9 +542,92 @@ } /** + * Utility method - escape special characters and pass to SAX handler + */ + private void escapeCharacters(char[] ch, int off, int len) + throws SAXException { + + int limit = off + len; + int offset = off; + + if (limit > ch.length) limit = ch.length;; + + // Step through characters and escape all special characters + for (int i = off; i < limit; i++) { + switch (ch[i]) { + case '&': + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(AMP, 0, AMP_length); + offset = i + 1; + break; + case '<': + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(LT, 0, LT_length); + offset = i + 1; + break; + case '>': + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(GT, 0, GT_length); + offset = i + 1; + break; + case '\u00a0': + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(NBSP, 0, NBSP_length); + offset = i + 1; + break; + default: + // Escape all characters not in the basic ASCII character set + // to simple (hexadecimal) character references + if (ch[i] > '\u00ff') { + StringBuffer buf = new StringBuffer(CHAR_ESC_START); + buf.append(Integer.toString((int)ch[i])); + buf.append(';'); + final String esc = buf.toString(); + final char[] chars = esc.toCharArray(); + final int strlen = esc.length(); + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(chars, 0, strlen); + offset = i + 1; + } + } + } + // Output remaining characters (that do not need escaping). + if (offset < limit) _saxHandler.characters(ch, offset, limit - offset); + } + + /** + * Utility method - escape special characters and pass to SAX handler + */ + private void escapeCDATA(char[] ch, int off, int len) + throws SAXException { + + int limit = off + len; + int offset = off; + + if (limit > ch.length) limit = ch.length;; + + // Step through characters and escape all special characters + for (int i = off; i < limit; i++) { + if (ch[i] > '\u00ff') { + StringBuffer buf = new StringBuffer(CDATA_ESC_START); + buf.append(Integer.toString((int)ch[i])); + buf.append(CDATA_ESC_END); + final String esc = buf.toString(); + final char[] chars = esc.toCharArray(); + final int strlen = esc.length(); + _saxHandler.characters(ch, offset, i - offset); + _saxHandler.characters(chars, 0, strlen); + offset = i + 1; + } + } + // Output remaining characters (that do not need escaping). + if (offset < limit) _saxHandler.characters(ch, offset, limit - offset); + } + + /** * This method escapes special characters used in attribute values */ - private String escapeChars(String value) { + private String escapeString(String value) { int i; char[] ch = value.toCharArray(); @@ -628,6 +671,33 @@ } /** + * This method escapes special characters used in HTML attribute values + */ + private String escapeAttr(String base) { + + final int len = base.length() - 1; + final String str = """; + int pos; + + while ((pos = base.indexOf('"')) > -1) { + if (pos == 0) { + final String after = base.substring(1); + base = str + after; + } + else if (pos == len) { + final String before = base.substring(0, pos); + base = before + str; + } + else { + final String before = base.substring(0, pos); + final String after = base.substring(pos+1); + base = before + str + after; + } + } + return base; + } + + /** * Replaces whitespaces in a URL with '%20' */ private String quickAndDirtyUrlEncode(String base) { @@ -707,7 +777,7 @@ } else { // Output as regular attribute - _attributes.add(expandAttribute(name), escapeChars(value)); + _attributes.add(expandAttribute(name), escapeString(value)); } return; case HTML: @@ -722,10 +792,11 @@ // we do not change the meaning of the URL. // URL-encode href attributes in HTML output - if (name.toLowerCase().equals("href")) - _attributes.add(name,quickAndDirtyUrlEncode(escapeChars(value))); + final String tmp = name.toLowerCase(); + if (tmp.equals(HREF_STR) || tmp.equals(SRC_STR)) + _attributes.add(name,quickAndDirtyUrlEncode(escapeAttr(value))); else - + _attributes.add(expandAttribute(name), escapeAttr(value)); return; } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]