morten 01/11/22 05:08:23
Modified: java/src/org/apache/xalan/xsltc/compiler Choose.java
Parser.java SyntaxTreeNode.java Text.java
java/src/org/apache/xalan/xsltc/runtime TextOutput.java
Log:
Fix for a whole wack of bugs related to text-nodes and handling of whitespace
and special characters.
PR: bugzilla 1403, 1520, 3005, 3418 and 3690
Obtained from: n/a
Submitted by: [EMAIL PROTECTED]
Reviewed by: [EMAIL PROTECTED]
Revision Changes Path
1.5 +4 -1
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java
Index: Choose.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Choose.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- Choose.java 2001/10/29 11:47:25 1.4
+++ Choose.java 2001/11/22 13:08:23 1.5
@@ -1,5 +1,5 @@
/*
- * @(#)$Id: Choose.java,v 1.4 2001/10/29 11:47:25 morten Exp $
+ * @(#)$Id: Choose.java,v 1.5 2001/11/22 13:08:23 morten Exp $
*
* The Apache Software License, Version 1.1
*
@@ -112,6 +112,9 @@
error = new ErrorMsg(ErrorMsg.MULTIPLE_OTHERWISE_ERR, this);
getParser().reportError(Constants.ERROR, error);
}
+ }
+ else if (element instanceof Text) {
+ ((Text)element).ignore();
}
// It is an error if we find some other element here
else {
1.36 +16 -10
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java
Index: Parser.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Parser.java,v
retrieving revision 1.35
retrieving revision 1.36
diff -u -r1.35 -r1.36
--- Parser.java 2001/11/09 15:14:21 1.35
+++ Parser.java 2001/11/22 13:08:23 1.36
@@ -1,5 +1,5 @@
/*
- * @(#)$Id: Parser.java,v 1.35 2001/11/09 15:14:21 tmiller Exp $
+ * @(#)$Id: Parser.java,v 1.36 2001/11/22 13:08:23 morten Exp $
*
* The Apache Software License, Version 1.1
*
@@ -1148,23 +1148,29 @@
String string = new String(ch, start, length);
SyntaxTreeNode parent = (SyntaxTreeNode)_parentStack.peek();
+ if (string.length() == 0) return;
+
// If this text occurs within an <xsl:text> element we append it
// as-is to the existing text element
if (parent instanceof Text) {
- if (string.length() > 0) {
- ((Text)parent).setText(string);
- }
+ ((Text)parent).setText(string);
+ return;
}
+
// Ignore text nodes that occur directly under <xsl:stylesheet>
- else if (parent instanceof Stylesheet) {
+ if (parent instanceof Stylesheet) return;
- }
- // Add it as a regular text node otherwise
- else {
- if (string.trim().length() > 0) {
- parent.addElement(new Text(string));
+ SyntaxTreeNode bro = parent.lastChild();
+ if ((bro != null) && (bro instanceof Text)) {
+ Text text = (Text)bro;
+ if (!text.isTextElement()) {
+ text.setText(string);
+ return;
}
}
+
+ // Add it as a regular text node otherwise
+ parent.addElement(new Text(string));
}
private String getTokenValue(String token) {
1.16 +10 -1
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java
Index: SyntaxTreeNode.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/SyntaxTreeNode.java,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- SyntaxTreeNode.java 2001/11/08 10:23:39 1.15
+++ SyntaxTreeNode.java 2001/11/22 13:08:23 1.16
@@ -1,5 +1,5 @@
/*
- * @(#)$Id: SyntaxTreeNode.java,v 1.15 2001/11/08 10:23:39 morten Exp $
+ * @(#)$Id: SyntaxTreeNode.java,v 1.16 2001/11/22 13:08:23 morten Exp $
*
* The Apache Software License, Version 1.1
*
@@ -705,6 +705,15 @@
*/
protected final Object elementAt(int pos) {
return _contents.elementAt(pos);
+ }
+
+ /**
+ * Returns this element's last child
+ * @return The child node.
+ */
+ protected final SyntaxTreeNode lastChild() {
+ if (_contents.size() == 0) return null;
+ return (SyntaxTreeNode)_contents.lastElement();
}
/**
1.8 +32 -5
xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java
Index: Text.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/compiler/Text.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- Text.java 2001/11/08 10:23:39 1.7
+++ Text.java 2001/11/22 13:08:23 1.8
@@ -1,5 +1,5 @@
/*
- * @(#)$Id: Text.java,v 1.7 2001/11/08 10:23:39 morten Exp $
+ * @(#)$Id: Text.java,v 1.8 2001/11/22 13:08:23 morten Exp $
*
* The Apache Software License, Version 1.1
*
@@ -71,13 +71,17 @@
import org.apache.xalan.xsltc.compiler.util.*;
final class Text extends Instruction {
+
private String _text;
private boolean _escaping = true;
+ private boolean _ignore = false;
+ private boolean _textElement = false;
/**
* Create a blank Text syntax tree node.
*/
public Text() {
+ _textElement = true;
}
/**
@@ -117,10 +121,33 @@
public void parseContents(Parser parser) {
final String str = getAttribute("disable-output-escaping");
- if ((str != null) && (str.equals("yes"))) {
- _escaping = false;
- }
+ if ((str != null) && (str.equals("yes"))) _escaping = false;
+
parseChildren(parser);
+
+ if (_text == null) {
+ _ignore = true;
+ }
+ else if (_textElement) {
+ if (_text.length() == 0) _ignore = true;
+ }
+ else if (getParent() instanceof LiteralElement) {
+ LiteralElement element = (LiteralElement)getParent();
+ String space = element.getAttribute("xml:space");
+ if ((space == null) || (!space.equals("preserve")))
+ if (_text.trim().length() == 0) _ignore = true;
+ }
+ else {
+ if (_text.trim().length() == 0) _ignore = true;
+ }
+ }
+
+ public void ignore() {
+ _ignore = true;
+ }
+
+ public boolean isTextElement() {
+ return _textElement;
}
protected boolean contextDependent() {
@@ -131,7 +158,7 @@
final ConstantPoolGen cpg = classGen.getConstantPool();
final InstructionList il = methodGen.getInstructionList();
- if (_text != null && _text.length() > 0) {
+ if (!_ignore) {
// Turn off character escaping if so is wanted.
final int esc = cpg.addInterfaceMethodref(OUTPUT_HANDLER,
"setEscaping", "(Z)Z");
1.44 +127 -56
xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java
Index: TextOutput.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/xsltc/runtime/TextOutput.java,v
retrieving revision 1.43
retrieving revision 1.44
diff -u -r1.43 -r1.44
--- TextOutput.java 2001/11/21 10:54:01 1.43
+++ TextOutput.java 2001/11/22 13:08:23 1.44
@@ -1,5 +1,5 @@
/*
- * @(#)$Id: TextOutput.java,v 1.43 2001/11/21 10:54:01 morten Exp $
+ * @(#)$Id: TextOutput.java,v 1.44 2001/11/22 13:08:23 morten Exp $
*
* The Apache Software License, Version 1.1
*
@@ -124,7 +124,12 @@
private static final int BEGCOMM_length = BEGCOMM.length;
private static final int ENDCOMM_length = ENDCOMM.length;
- private static final String EMPTYSTRING = "";
+ private static final String EMPTYSTRING = "";
+ private static final String HREF_STR = "href";
+ private static final String SRC_STR = "str";
+ private static final String CHAR_ESC_START = "&#";
+ private static final String CDATA_ESC_START = "]]>&#";
+ private static final String CDATA_ESC_END = ";<![CDATA[";
private AttributeList _attributes = new AttributeList();
private String _elementName = null;
@@ -361,54 +366,6 @@
}
/**
- * Utility method - escape special characters and pass to SAX handler
- */
- private void escapeCharacters(char[] ch, int off, int len)
- throws SAXException {
-
- int limit = off + len;
- int offset = off;
-
- if (limit > ch.length) limit = ch.length;;
-
- // Step through characters and escape all special characters
- for (int i = off; i < limit; i++) {
- switch (ch[i]) {
- case '&':
- _saxHandler.characters(ch, offset, i - offset);
- _saxHandler.characters(AMP, 0, AMP_length);
- offset = i + 1;
- break;
- /* Quotes should only be escaped inside attribute values
- case '"':
- _saxHandler.characters(ch, offset, i - offset);
- _saxHandler.characters(QUOTE, 0, QUOTE_length);
- offset = i + 1;
- break;
- */
- case '<':
- _saxHandler.characters(ch, offset, i - offset);
- _saxHandler.characters(LT, 0, LT_length);
- offset = i + 1;
- break;
- case '>':
- _saxHandler.characters(ch, offset, i - offset);
- _saxHandler.characters(GT, 0, GT_length);
- offset = i + 1;
- break;
- case '\u00a0':
- _saxHandler.characters(ch, offset, i - offset);
- _saxHandler.characters(NBSP, 0, NBSP_length);
- offset = i + 1;
- break;
- }
- // TODO - more characters need escaping!!!
- }
- // Output remaining characters (that do not need escaping).
- if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
- }
-
- /**
* Utility method - pass a whole charactes as CDATA to SAX handler
*/
private void startCDATA(char[] ch, int off, int len) throws SAXException
{
@@ -464,7 +421,10 @@
startCDATA(ch, off, len);
// Output characters escaped if required.
else if (_escapeChars)
- escapeCharacters(ch, off, len);
+ if (_cdataTagOpen)
+ escapeCDATA(ch, off, len);
+ else
+ escapeCharacters(ch, off, len);
// Output the chracters as the are if not.
else
_saxHandler.characters(ch, off, len);
@@ -582,9 +542,92 @@
}
/**
+ * Utility method - escape special characters and pass to SAX handler
+ */
+ private void escapeCharacters(char[] ch, int off, int len)
+ throws SAXException {
+
+ int limit = off + len;
+ int offset = off;
+
+ if (limit > ch.length) limit = ch.length;;
+
+ // Step through characters and escape all special characters
+ for (int i = off; i < limit; i++) {
+ switch (ch[i]) {
+ case '&':
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(AMP, 0, AMP_length);
+ offset = i + 1;
+ break;
+ case '<':
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(LT, 0, LT_length);
+ offset = i + 1;
+ break;
+ case '>':
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(GT, 0, GT_length);
+ offset = i + 1;
+ break;
+ case '\u00a0':
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(NBSP, 0, NBSP_length);
+ offset = i + 1;
+ break;
+ default:
+ // Escape all characters not in the basic ASCII character set
+ // to simple (hexadecimal) character references
+ if (ch[i] > '\u00ff') {
+ StringBuffer buf = new StringBuffer(CHAR_ESC_START);
+ buf.append(Integer.toString((int)ch[i]));
+ buf.append(';');
+ final String esc = buf.toString();
+ final char[] chars = esc.toCharArray();
+ final int strlen = esc.length();
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(chars, 0, strlen);
+ offset = i + 1;
+ }
+ }
+ }
+ // Output remaining characters (that do not need escaping).
+ if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
+ }
+
+ /**
+ * Utility method - escape special characters and pass to SAX handler
+ */
+ private void escapeCDATA(char[] ch, int off, int len)
+ throws SAXException {
+
+ int limit = off + len;
+ int offset = off;
+
+ if (limit > ch.length) limit = ch.length;;
+
+ // Step through characters and escape all special characters
+ for (int i = off; i < limit; i++) {
+ if (ch[i] > '\u00ff') {
+ StringBuffer buf = new StringBuffer(CDATA_ESC_START);
+ buf.append(Integer.toString((int)ch[i]));
+ buf.append(CDATA_ESC_END);
+ final String esc = buf.toString();
+ final char[] chars = esc.toCharArray();
+ final int strlen = esc.length();
+ _saxHandler.characters(ch, offset, i - offset);
+ _saxHandler.characters(chars, 0, strlen);
+ offset = i + 1;
+ }
+ }
+ // Output remaining characters (that do not need escaping).
+ if (offset < limit) _saxHandler.characters(ch, offset, limit - offset);
+ }
+
+ /**
* This method escapes special characters used in attribute values
*/
- private String escapeChars(String value) {
+ private String escapeString(String value) {
int i;
char[] ch = value.toCharArray();
@@ -628,6 +671,33 @@
}
/**
+ * This method escapes special characters used in HTML attribute values
+ */
+ private String escapeAttr(String base) {
+
+ final int len = base.length() - 1;
+ final String str = """;
+ int pos;
+
+ while ((pos = base.indexOf('"')) > -1) {
+ if (pos == 0) {
+ final String after = base.substring(1);
+ base = str + after;
+ }
+ else if (pos == len) {
+ final String before = base.substring(0, pos);
+ base = before + str;
+ }
+ else {
+ final String before = base.substring(0, pos);
+ final String after = base.substring(pos+1);
+ base = before + str + after;
+ }
+ }
+ return base;
+ }
+
+ /**
* Replaces whitespaces in a URL with '%20'
*/
private String quickAndDirtyUrlEncode(String base) {
@@ -707,7 +777,7 @@
}
else {
// Output as regular attribute
- _attributes.add(expandAttribute(name), escapeChars(value));
+ _attributes.add(expandAttribute(name), escapeString(value));
}
return;
case HTML:
@@ -722,10 +792,11 @@
// we do not change the meaning of the URL.
// URL-encode href attributes in HTML output
- if (name.toLowerCase().equals("href"))
-
_attributes.add(name,quickAndDirtyUrlEncode(escapeChars(value)));
+ final String tmp = name.toLowerCase();
+ if (tmp.equals(HREF_STR) || tmp.equals(SRC_STR))
+ _attributes.add(name,quickAndDirtyUrlEncode(escapeAttr(value)));
else
-
+ _attributes.add(expandAttribute(name), escapeAttr(value));
return;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]