mrglavas 2003/09/04 21:42:01 Modified: java/samples/sax Writer.java Log: Support printing of XML 1.1 documents, CDATA sections, and print space and end of line characters appearing in content so that they can be roundtripped. Fixed a bug with escaping characters in comments. Revision Changes Path 1.8 +128 -27 xml-xerces/java/samples/sax/Writer.java Index: Writer.java =================================================================== RCS file: /home/cvs/xml-xerces/java/samples/sax/Writer.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- Writer.java 7 May 2002 10:47:48 -0000 1.7 +++ Writer.java 5 Sep 2003 04:42:01 -0000 1.8 @@ -2,7 +2,7 @@ * The Apache Software License, Version 1.1 * * - * Copyright (c) 1999-2002 The Apache Software Foundation. All rights + * Copyright (c) 1999-2003 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,6 +57,8 @@ package sax; +import java.lang.reflect.Method; + import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; @@ -65,6 +67,7 @@ import sax.helpers.AttributesImpl; import org.xml.sax.Attributes; +import org.xml.sax.Locator; import org.xml.sax.Parser; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; @@ -145,6 +148,15 @@ /** Element depth. */ protected int fElementDepth; + + /** Document locator. */ + protected Locator fLocator; + + /** Processing XML 1.1 document. */ + protected boolean fXML11; + + /** In CDATA section. */ + protected boolean fInCDATA; // // Constructors @@ -187,17 +199,19 @@ // // ContentHandler methods // - + + /** Set Document Locator. */ + public void setDocumentLocator(Locator locator) { + fLocator = locator; + } // setDocumentLocator(Locator) + /** Start document. */ public void startDocument() throws SAXException { fElementDepth = 0; - - if (!fCanonical) { - fOut.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); - fOut.flush(); - } - + fXML11 = false; + fInCDATA = false; + } // startDocument() /** Processing instruction. */ @@ -221,6 +235,27 @@ public void startElement(String uri, String local, String raw, Attributes attrs) throws SAXException { + // Root Element + if (fElementDepth == 0) { + if (fLocator != null) { + fXML11 = "1.1".equals(getVersion()); + fLocator = null; + } + + // The XML declaration cannot be printed in startDocument because + // the version reported by the Locator cannot be relied on until after + // the XML declaration in the instance document has been read. + if (!fCanonical) { + if (fXML11) { + fOut.println("<?xml version=\"1.1\" encoding=\"UTF-8\"?>"); + } + else { + fOut.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); + } + fOut.flush(); + } + } + fElementDepth++; fOut.print('<'); fOut.print(raw); @@ -231,7 +266,7 @@ fOut.print(' '); fOut.print(attrs.getQName(i)); fOut.print("=\""); - normalizeAndPrint(attrs.getValue(i)); + normalizeAndPrint(attrs.getValue(i), true); fOut.print('"'); } } @@ -244,7 +279,14 @@ public void characters(char ch[], int start, int length) throws SAXException { - normalizeAndPrint(ch, start, length); + if (!fInCDATA) { + normalizeAndPrint(ch, start, length, false); + } + else { + for (int i = 0; i < length; ++i) { + fOut.print(ch[start+i]); + } + } fOut.flush(); } // characters(char[],int,int); @@ -313,17 +355,27 @@ /** Start CDATA section. */ public void startCDATA() throws SAXException { + if (!fCanonical) { + fOut.print("<![CDATA["); + fInCDATA = true; + } } // startCDATA() /** End CDATA section. */ public void endCDATA() throws SAXException { + if (!fCanonical) { + fInCDATA = false; + fOut.print("]]>"); + } } // endCDATA() /** Comment. */ public void comment(char ch[], int start, int length) throws SAXException { if (!fCanonical && fElementDepth > 0) { fOut.print("<!--"); - normalizeAndPrint(ch, start, length); + for (int i = 0; i < length; ++i) { + fOut.print(ch[start+i]); + } fOut.print("-->"); fOut.flush(); } @@ -358,25 +410,25 @@ } // sortAttributes(AttributeList):AttributeList /** Normalizes and prints the given string. */ - protected void normalizeAndPrint(String s) { + protected void normalizeAndPrint(String s, boolean isAttValue) { int len = (s != null) ? s.length() : 0; for (int i = 0; i < len; i++) { char c = s.charAt(i); - normalizeAndPrint(c); + normalizeAndPrint(c, isAttValue); } - } // normalizeAndPrint(String) + } // normalizeAndPrint(String,boolean) /** Normalizes and prints the given array of characters. */ - protected void normalizeAndPrint(char[] ch, int offset, int length) { + protected void normalizeAndPrint(char[] ch, int offset, int length, boolean isAttValue) { for (int i = 0; i < length; i++) { - normalizeAndPrint(ch[offset + i]); + normalizeAndPrint(ch[offset + i], isAttValue); } - } // normalizeAndPrint(char[],int,int) + } // normalizeAndPrint(char[],int,int,boolean) /** Normalizes and print the given character. */ - protected void normalizeAndPrint(char c) { + protected void normalizeAndPrint(char c, boolean isAttValue) { switch (c) { case '<': { @@ -392,25 +444,53 @@ break; } case '"': { - fOut.print("""); + // A '"' that appears in character data + // does not need to be escaped. + if (isAttValue) { + fOut.print("""); + } + else { + fOut.print("\""); + } break; } - case '\r': + case '\r': { + // If CR is part of the document's content, it + // must not be printed as a literal otherwise + // it would be normalized to LF when the document + // is reparsed. + fOut.print("
"); + break; + } case '\n': { if (fCanonical) { - fOut.print("&#"); - fOut.print(Integer.toString(c)); - fOut.print(';'); + fOut.print("
"); break; } // else, default print char } default: { - fOut.print(c); + // In XML 1.1, control chars in the ranges [#x1-#x1F, #x7F-#x9F] must be escaped. + // + // Escape space characters that would be normalized to #x20 in attribute values + // when the document is reparsed. + // + // Escape NEL (0x85) and LSEP (0x2028) that appear in content + // if the document is XML 1.1, since they would be normalized to LF + // when the document is reparsed. + if (fXML11 && ((c >= 0x01 && c <= 0x1F && c != 0x09 && c != 0x0A) + || (c >= 0x7F && c <= 0x9F) || c == 0x2028) + || isAttValue && (c == 0x09 || c == 0x0A)) { + fOut.print("&#x"); + fOut.print(Integer.toHexString(c).toUpperCase()); + fOut.print(";"); + } + else { + fOut.print(c); + } } } - - } // normalizeAndPrint(char) + } // normalizeAndPrint(char,boolean) /** Prints the error message. */ protected void printError(String type, SAXParseException ex) { @@ -435,6 +515,27 @@ System.err.flush(); } // printError(String,SAXParseException) + + /** Extracts the XML version from the Locator. */ + protected String getVersion() { + if (fLocator == null) { + return null; + } + String version = null; + Method getXMLVersion = null; + try { + getXMLVersion = fLocator.getClass().getMethod("getXMLVersion", new Class[]{}); + // If Locator implements Locator2, this method will exist. + if (getXMLVersion != null) { + version = (String) getXMLVersion.invoke(fLocator, null); + } + } + catch (Exception e) { + // Either this locator object doesn't have + // this method, or we're on an old JDK. + } + return version; + } // getVersion() // // Main
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]