elena 2003/11/18 14:59:50
Modified: java/src/org/apache/xml/serialize BaseMarkupSerializer.java
XML11Serializer.java XMLSerializer.java
Log:
We need to escape CR during text normalization and TAB, CR, NL during attribute
value serialization to allow roundtripping [1], [2]. Similar escaping is done for
XML11 serialization.
Also checking for "unbound-prefix-in-entity-reference" [2]. Many thanks to Neil
Delima
who helped with providing this fix.
[1] http://www.w3.org/TR/xslt-xquery-serialization/#xml-output
[2] http://www.w3.org/TR/2003/CR-DOM-Level-3-LS-20031107/
Revision Changes Path
1.48 +26 -31
xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java
Index: BaseMarkupSerializer.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/BaseMarkupSerializer.java,v
retrieving revision 1.47
retrieving revision 1.48
diff -u -r1.47 -r1.48
--- BaseMarkupSerializer.java 17 Nov 2003 13:47:36 -0000 1.47
+++ BaseMarkupSerializer.java 18 Nov 2003 22:59:50 -0000 1.48
@@ -1048,13 +1048,8 @@
switch (code) {
case NodeFilter.FILTER_REJECT:
case NodeFilter.FILTER_SKIP: {
- // REVISIT: the constant FILTER_SKIP should be changed
when new
- // DOM LS specs gets published
-
- // skip the text node
break;
}
-
default: {
characters(text);
}
@@ -1079,9 +1074,6 @@
switch (code) {
case NodeFilter.FILTER_REJECT:
case NodeFilter.FILTER_SKIP: {
- // REVISIT: the constant FILTER_SKIP should be changed
when new
- // DOM LS specs gets published
-
// skip the CDATA node
return;
}
@@ -1111,9 +1103,6 @@
switch (code) {
case NodeFilter.FILTER_REJECT:
case NodeFilter.FILTER_SKIP: {
- // REVISIT: the constant FILTER_SKIP should be
changed when new
- // DOM LS specs gets published
-
// skip the comment node
return;
}
@@ -1142,9 +1131,6 @@
return; // remove the node
}
case NodeFilter.FILTER_SKIP: {
- // REVISIT: the constant FILTER_SKIP should be changed
when new
- // DOM LS specs gets published
-
child = node.getFirstChild();
while ( child != null ) {
serializeNode( child );
@@ -1154,19 +1140,13 @@
}
default: {
- child = node.getFirstChild();
- if (child !=null) {
- _printer.printText("&");
- _printer.printText(node.getNodeName());
- _printer.printText(";");
- }
- return;
+ // fall through
}
}
- }
-
+ }
child = node.getFirstChild();
if ( child == null || (fFeatures !=null &&
getFeature(Constants.DOM_ENTITIES))){
+ checkUnboundNamespacePrefixedNode(node);
_printer.printText("&");
_printer.printText(node.getNodeName());
_printer.printText(";");
@@ -1540,9 +1520,7 @@
_printer.printText(";<![CDATA[");
}
else {
- _printer.printText("&#x");
- _printer.printText(Integer.toHexString(supplemental));
- _printer.printText(";");
+ printHex(supplemental);
}
}
}
@@ -1690,12 +1668,19 @@
_printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
}
} else {
- // The character is not printable, print as character reference.
- _printer.printText( "&#x" );
- _printer.printText(Integer.toHexString(ch));
- _printer.printText( ';' );
+ printHex(ch);
}
}
+
+ /**
+ * Escapes chars
+ */
+ final void printHex( int ch) throws IOException {
+ _printer.printText( "&#x" );
+ _printer.printText(Integer.toHexString(ch));
+ _printer.printText( ';' );
+
+ }
/**
@@ -1878,4 +1863,14 @@
throw new IOException(message);
}
}
+
+ /**
+ * DOM level 3:
+ * Check a node to determine if it contains unbound namespace prefixes.
+ *
+ * @param node The node to check for unbound namespace prefices
+ */
+ protected void checkUnboundNamespacePrefixedNode (Node node) throws
IOException{
+
+ }
}
1.7 +29 -36
xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java
Index: XML11Serializer.java
===================================================================
RCS file:
/home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XML11Serializer.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- XML11Serializer.java 17 Oct 2003 17:41:41 -0000 1.6
+++ XML11Serializer.java 18 Nov 2003 22:59:50 -0000 1.7
@@ -311,7 +311,19 @@
}
continue;
}
- printXMLChar(ch, false);
+ if (ch == '\n' || ch == '\r' || ch == '\t' || ch == 0x0085 || ch ==
0x2028){
+ printHex(ch);
+ } else if (ch == '<') {
+ _printer.printText("<");
+ } else if (ch == '&') {
+ _printer.printText("&");
+ } else if (ch == '"') {
+ _printer.printText(""");
+ } else if ((ch >= ' ' && _encodingInfo.isPrintable((char)
ch))) {
+ _printer.printText((char) ch);
+ } else {
+ printHex(ch);
+ }
}
}
@@ -376,9 +388,11 @@
// note that this "int" should, in all cases, be a char.
// REVISIT: make it a char...
- protected final void printXMLChar( int ch, boolean keepQuot ) throws
IOException {
-
- if ( ch == '<') {
+ protected final void printXMLChar( int ch ) throws IOException {
+
+ if (ch == '\r' || ch == 0x0085 || ch == 0x2028) {
+ printHex(ch);
+ } else if ( ch == '<') {
_printer.printText("<");
} else if (ch == '&') {
_printer.printText("&");
@@ -386,15 +400,10 @@
// character sequence "]]>" can't appear in content, therefore
// we should escape '>'
_printer.printText(">");
- } else if ( ch == '"' && !keepQuot) {
- _printer.printText(""");
} else if ( _encodingInfo.isPrintable((char)ch) &&
XML11Char.isXML11ValidLiteral(ch)) {
_printer.printText((char)ch);
} else {
- // The character is not printable, print as character reference.
- _printer.printText( "&#x" );
- _printer.printText(Integer.toHexString(ch));
- _printer.printText( ';' );
+ printHex(ch);
}
}
@@ -419,9 +428,7 @@
_printer.printText(";<![CDATA[");
}
else {
- _printer.printText("&#x");
- _printer.printText(Integer.toHexString(supplemental));
- _printer.printText(";");
+ printHex(supplemental);
}
}
}
@@ -456,7 +463,7 @@
if ( unescaped && XML11Char.isXML11ValidLiteral(ch)) {
_printer.printText( ch );
} else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
@@ -475,18 +482,11 @@
}
continue;
}
- // Nonterminal S is unchanged in XML 1.1, so NEL (0x85)
- // and LSEP (0x2028) are not space characters.
- //
- // REVISIT: NEL and LSEP need to be escaped in order for
- // them to be roundtripped, otherwise they will be
- // normalized to LF when the document is read back. - mrglavas
- if ( XMLChar.isSpace(ch))
- _printer.printSpace();
- else if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
+
+ if ( unescaped && XML11Char.isXML11ValidLiteral(ch) )
_printer.printText( ch );
else
- printXMLChar( ch, true);
+ printXMLChar( ch);
}
}
}
@@ -518,7 +518,7 @@
if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
_printer.printText( ch );
else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
@@ -539,18 +539,11 @@
}
continue;
}
- // Nonterminal S is unchanged in XML 1.1, so NEL (0x85)
- // and LSEP (0x2028) are not space characters.
- //
- // REVISIT: NEL and LSEP need to be escaped in order for
- // them to be roundtripped, otherwise they will be
- // normalized to LF when the document is read back. - mrglavas
- if (XMLChar.isSpace(ch))
- _printer.printSpace();
- else if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
+
+ if ( unescaped && XML11Char.isXML11ValidLiteral(ch))
_printer.printText( ch );
else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
}
}
1.55 +104 -32 xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java
Index: XMLSerializer.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xml/serialize/XMLSerializer.java,v
retrieving revision 1.54
retrieving revision 1.55
diff -u -r1.54 -r1.55
--- XMLSerializer.java 23 Sep 2003 21:42:31 -0000 1.54
+++ XMLSerializer.java 18 Nov 2003 22:59:50 -0000 1.55
@@ -1235,30 +1235,42 @@
}
//
- // overwrite printing functions to make sure serializer prints out valid XML
+ // Printing attribute value
//
- protected void printEscaped( String source ) throws IOException {
+ protected void printEscaped(String source) throws IOException {
int length = source.length();
- for ( int i = 0 ; i < length ; ++i ) {
+ for (int i = 0; i < length; ++i) {
int ch = source.charAt(i);
if (!XMLChar.isValid(ch)) {
- if (++i <length) {
+ if (++i < length) {
surrogates(ch, source.charAt(i));
} else {
- fatalError("The character '"+(char)ch+"' is an invalid XML
character");
+ fatalError("The character '" + (char) ch + "' is an invalid XML
character");
}
continue;
}
- printXMLChar(ch, false);
+ // escape NL, CR, TAB
+ if (ch == '\n' || ch == '\r' || ch == '\t') {
+ printHex(ch);
+ } else if (ch == '<') {
+ _printer.printText("<");
+ } else if (ch == '&') {
+ _printer.printText("&");
+ } else if (ch == '"') {
+ _printer.printText(""");
+ } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch))) {
+ _printer.printText((char) ch);
+ } else {
+ printHex(ch);
+ }
}
}
- // note that this "int" should, in all cases, be a char.
- // if printing out attribute values, the double quotes mark is converted to
- // quot, otherwise keep quotations as is.
- protected void printXMLChar( int ch, boolean keepQuot) throws IOException {
-
- if ( ch == '<') {
+ /** print text data */
+ protected void printXMLChar( int ch) throws IOException {
+ if (ch == '\r') {
+ printHex(ch);
+ } else if ( ch == '<') {
_printer.printText("<");
} else if (ch == '&') {
_printer.printText("&");
@@ -1266,17 +1278,11 @@
// character sequence "]]>" can't appear in content, therefore
// we should escape '>'
_printer.printText(">");
- } else if ( ch == '"' && ! keepQuot) {
- _printer.printText(""");
- } else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch)) ||
- ch == '\n' || ch == '\r' || ch == '\t' ) {
- // REVISIT: new line characters must be escaped
+ } else if ( ch == '\n' || ch == '\t' ||
+ ( ch >= ' ' && _encodingInfo.isPrintable((char)ch))) {
_printer.printText((char)ch);
} else {
- // The character is not printable, print as character reference.
- _printer.printText( "&#x" );
- _printer.printText(Integer.toHexString(ch));
- _printer.printText( ';' );
+ printHex(ch);
}
}
@@ -1304,7 +1310,7 @@
if ( unescaped ) {
_printer.printText( ch );
} else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
@@ -1323,12 +1329,11 @@
}
continue;
}
- if ( XMLChar.isSpace(ch))
- _printer.printSpace();
- else if ( unescaped )
+
+ if ( unescaped )
_printer.printText( ch );
else
- printXMLChar( ch, true);
+ printXMLChar( ch);
}
}
}
@@ -1360,7 +1365,7 @@
if ( unescaped )
_printer.printText( ch );
else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
} else {
// Not preserving spaces: print one part at a time, and
@@ -1381,16 +1386,83 @@
}
continue;
}
- if ( XMLChar.isSpace(ch))
- _printer.printSpace();
- else if ( unescaped )
+ if ( unescaped )
_printer.printText( ch );
else
- printXMLChar( ch, true );
+ printXMLChar( ch );
}
}
}
+
+ /**
+ * DOM Level 3:
+ * Check a node to determine if it contains unbound namespace prefixes.
+ *
+ * @param node The node to check for unbound namespace prefices
+ */
+ protected void checkUnboundNamespacePrefixedNode (Node node) throws
IOException{
+
+ if (fNamespaces) {
+
+ if (DEBUG) {
+
System.out.println("==>serializeNode("+node.getNodeName()+") [Entity Reference -
Namespaces on]");
+ System.out.println("==>Declared Prefix Count: " +
fNSBinder.getDeclaredPrefixCount());
+ System.out.println("==>Node Name: " +
node.getNodeName());
+ System.out.println("==>First Child Node Name: " +
node.getFirstChild().getNodeName());
+ System.out.println("==>First Child Node Prefix: " +
node.getFirstChild().getPrefix());
+ System.out.println("==>First Child Node NamespaceURI:
" + node.getFirstChild().getNamespaceURI());
+ }
+
+
+ Node child, next;
+ for (child = node.getFirstChild(); child != null; child = next) {
+ next = child.getNextSibling();
+ if (DEBUG) {
+
System.out.println("==>serializeNode("+child.getNodeName()+") [Child Node]");
+
System.out.println("==>serializeNode("+child.getPrefix()+") [Child Node Prefix]");
+ }
+
+ //If a NamespaceURI is not declared for the current
+ //node's prefix, raise a fatal error.
+ String prefix = child.getPrefix();
+ if (fNSBinder.getURI(prefix) == null && prefix != null) {
+ fatalError("The replacement text of the entity
node '"
+ + node.getNodeName()
+ + "' contains an
element node '"
+ + child.getNodeName()
+ + "' with an
undeclared prefix '"
+ + prefix + "'.");
+ }
+
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+
+ NamedNodeMap attrs = child.getAttributes();
+
+ for (int i = 0; i< attrs.getLength(); i++ ) {
+
+ String attrPrefix =
attrs.item(i).getPrefix();
+ if (fNSBinder.getURI(attrPrefix) == null
&& attrPrefix != null) {
+ fatalError("The replacement
text of the entity node '"
+ +
node.getNodeName()
+ + "'
contains an element node '"
+ +
child.getNodeName()
+ + "'
with an attribute '"
+ +
attrs.item(i).getNodeName()
+ + "'
an undeclared prefix '"
+ +
attrPrefix + "'.");
+ }
+
+ }
+
+ }
+
+ if (child.hasChildNodes()) {
+ checkUnboundNamespacePrefixedNode(child);
+ }
+ }
+ }
+ }
public boolean reset() {
super.reset();
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]