cziegeler 2003/02/26 07:20:45
Modified: src/java/org/apache/cocoon/xml/dom DOMStreamer.java src/java/org/apache/cocoon/serialization AbstractTextSerializer.java src/java/org/apache/cocoon/xml XMLUtils.java Log: Applying patch from Bruno Dumon ([EMAIL PROTECTED]) that is a workaround for Xalan bug in serializing dom to sax Revision Changes Path 1.7 +514 -46 xml-cocoon2/src/java/org/apache/cocoon/xml/dom/DOMStreamer.java Index: DOMStreamer.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/xml/dom/DOMStreamer.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- DOMStreamer.java 31 Jan 2003 22:52:00 -0000 1.6 +++ DOMStreamer.java 26 Feb 2003 15:20:44 -0000 1.7 @@ -51,102 +51,570 @@ package org.apache.cocoon.xml.dom; import org.apache.cocoon.xml.AbstractXMLProducer; -import org.apache.cocoon.xml.EmbeddedXMLPipe; import org.apache.cocoon.xml.XMLConsumer; -import org.w3c.dom.Node; +import org.apache.cocoon.xml.EmbeddedXMLPipe; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; import org.xml.sax.ext.LexicalHandler; +import org.w3c.dom.*; +import javax.xml.transform.TransformerFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXResult; +import javax.xml.transform.dom.DOMSource; +import java.util.Iterator; +import java.util.Map; +import java.util.HashMap; /** * The <code>DOMStreamer</code> is a utility class that will generate SAX * events from a W3C DOM Document. * - * @author <a href="mailto:[EMAIL PROTECTED]">Carsten Ziegeler</a> - * @author <a href="mailto:[EMAIL PROTECTED]">Pierpaolo Fumagalli</a> - * (Apache Software Foundation, Exoffice Technologies) + * <p>The DOMStreamer uses a different strategy based on the value of the + * normalizeNamespacesOn property: + * <ul> + * <li>if true (the default), the DOMStreamer will normalize namespace + * declarations (i.e. add missing xmlns attributes or correct them). See + * also [EMAIL PROTECTED] NamespaceNormalizingDOMStreamer}. + * <li>if false, the standard JAXP identity transformer is used. + * </ul> + * * @version CVS $Id$ */ public class DOMStreamer extends AbstractXMLProducer { - /** The transformer factory shared by all instances */ - protected static TransformerFactory factory = TransformerFactory.newInstance(); + /** Indicates whether namespace normalization should happen. */ + protected boolean normalizeNamespacesOn = true; - /** The private transformer for this instance */ - protected Transformer transformer; + /** DOMStreamer used in case of namespace normalization. */ + protected NamespaceNormalizingDOMStreamer namespaceNormalizingDOMStreamer = new NamespaceNormalizingDOMStreamer(); + + /** DOMStreamer used when namespace normalization should not explicitely happen. */ + protected DefaultDOMStreamer defaultDOMStreamer = new DefaultDOMStreamer(); + + /** The transformer factory shared by all instances (only used by DefaultDOMStreamer) */ + protected static TransformerFactory factory = TransformerFactory.newInstance(); /** - * Create a new <code>DOMStreamer</code> instance. + * Create a new <code>NamespaceNormalizingDOMStreamer</code> instance. */ public DOMStreamer() { super(); } /** - * Create a new <code>DOMStreamer</code> instance. + * Create a new <code>NamespaceNormalizingDOMStreamer</code> instance. */ public DOMStreamer(XMLConsumer consumer) { this(consumer, consumer); } /** - * Create a new <code>DOMStreamer</code> instance. + * Create a new <code>NamespaceNormalizingDOMStreamer</code> instance. */ public DOMStreamer(ContentHandler content) { - this(content,null); + this(content, null); if (content instanceof LexicalHandler) { - this.setLexicalHandler((LexicalHandler)content); + setLexicalHandler((LexicalHandler) content); } } /** - * Create a new <code>DOMStreamer</code> instance. + * Create a new <code>NamespaceNormalizingDOMStreamer</code> instance. */ public DOMStreamer(ContentHandler content, LexicalHandler lexical) { this(); - super.setContentHandler(content); - super.setLexicalHandler(lexical); + setContentHandler(content); + setLexicalHandler(lexical); } /** * Start the production of SAX events. */ - public void stream(Node node) - throws SAXException { - if (this.transformer == null) { - try { - this.transformer = factory.newTransformer(); - } catch (TransformerConfigurationException e) { - getLogger().error("DOMStreamer", e); - throw new SAXException(e); + public void stream(Node node) throws SAXException { + if (normalizeNamespacesOn) + namespaceNormalizingDOMStreamer.stream(node); + else + defaultDOMStreamer.stream(node); + } + + public boolean isNormalizeNamespacesOn() { + return normalizeNamespacesOn; + } + + public void setNormalizeNamespacesOn(boolean normalizeNamespacesOn) { + this.normalizeNamespacesOn = normalizeNamespacesOn; + } + + public void recycle() { + super.recycle(); + namespaceNormalizingDOMStreamer.recycle(); + normalizeNamespacesOn = true; + } + + /** + * Streams a DOM tree to SAX events and normalizes namespace declarations on the way. + * + * <p>The code in this class is based on the org.apache.xml.utils.TreeWalker class from Xalan, + * though it differs in some important ways. + * + * <p>This class will automatically fix up ("normalize") namespace declarations + * while streaming to SAX. The original DOM-tree is not modified. The algorithm + * used is described in + * <a href="http://www.w3.org/TR/2002/WD-DOM-Level-3-Core-20021022/namespaces-algorithms.html#normalizeDocumentAlgo">an appendix of the DOM Level 3 spec</a>. + * + * <p>Additionally, this class does not support serializing DOM Level 1 trees. In + * other words, the localName property of elements and attributes should always have + * a value. + * + * <p>This class will NOT check the correctness of namespaces, e.g. it will not + * check that the "xml" prefix is not misused etc. + * + * <p>This class has of course more overhead then the normal DOM-streamer, so only + * use it in cases where it's needed. + * + * @author Bruno Dumon (bruno at outerthought dot org) + * @author Xalan team + */ + public class NamespaceNormalizingDOMStreamer { + /** + * Information about the current element. Used to remember the localName, qName + * and namespaceURI for generating the endElement event, and holds the namespaces + * declared on the element. This extra class is needed because we don't want to + * modify the DOM-tree itself. The currentElementInfo has a pointer to its parent + * elementInfo. + */ + protected NamespaceNormalizingDOMStreamer.ElementInfo currentElementInfo = null; + + /** Counter used when generating new namespace prefixes. */ + protected int newPrefixCounter = 0; + + public void recycle() { + currentElementInfo = null; + newPrefixCounter = 0; + } + + /** + * Start the production of SAX events. + * + * <p>Perform a pre-order traversal non-recursive style. + * + * <p>Note that TreeWalker assumes that the subtree is intended to represent + * a complete (though not necessarily well-formed) document and, during a + * traversal, startDocument and endDocument will always be issued to the + * SAX listener. + * + * @param pos Node in the tree where to start traversal + * + */ + protected void stream(Node pos) throws SAXException { + + contentHandler.startDocument(); + + Node top = pos; + + while (null != pos) { + startNode(pos); + + Node nextNode = pos.getFirstChild(); + + while (null == nextNode) { + endNode(pos); + + if (top.equals(pos)) + break; + + nextNode = pos.getNextSibling(); + + if (null == nextNode) { + pos = pos.getParentNode(); + + if ((null == pos) || (top.equals(pos))) { + if (null != pos) + endNode(pos); + + nextNode = null; + + break; + } + } + } + + pos = nextNode; } + contentHandler.endDocument(); } - DOMSource source = new DOMSource(node); - ContentHandler handler; - if (node.getNodeType() == Node.DOCUMENT_NODE) { - // Pass all SAX events - handler = super.contentHandler; - } else { - // Strip start/endDocument - handler = new EmbeddedXMLPipe(super.contentHandler); - } - - SAXResult result = new SAXResult(handler); - result.setLexicalHandler(super.lexicalHandler); - - try { - transformer.transform(source, result); - } catch (TransformerException e) { - getLogger().error("DOMStreamer", e); - throw new SAXException(e); + private final void dispatachChars(Node node) throws SAXException { + String data = ((Text) node).getData(); + contentHandler.characters(data.toCharArray(), 0, data.length()); + } + + /** + * Start processing given node + * + * @param node Node to process + */ + protected void startNode(Node node) throws SAXException { + + switch (node.getNodeType()) { + case Node.COMMENT_NODE: + { + if (lexicalHandler != null) { + String data = ((Comment) node).getData(); + lexicalHandler.comment(data.toCharArray(), 0, data.length()); + } + } + break; + case Node.DOCUMENT_FRAGMENT_NODE: + + // ??; + break; + case Node.DOCUMENT_NODE: + + break; + case Node.ELEMENT_NODE: + NamedNodeMap atts = node.getAttributes(); + int nAttrs = atts.getLength(); + + // create a list of localy declared namespace prefixes + currentElementInfo = new NamespaceNormalizingDOMStreamer.ElementInfo(currentElementInfo); + for (int i = 0; i < nAttrs; i++) { + Node attr = atts.item(i); + String attrName = attr.getNodeName(); + + if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) { + int index; + String prefix = (index = attrName.indexOf(":")) < 0 + ? "" : attrName.substring(index + 1); + + currentElementInfo.put(prefix, attr.getNodeValue()); + } + } + + String namespaceURI = node.getNamespaceURI(); + String prefix = node.getPrefix(); + String localName = node.getLocalName(); + + if (localName == null) + throw new SAXException("[NamespaceNormalizingDOMStreamer] Encountered a DOM Element without a localName. DOM Level 1 trees are not supported by this DOMStreamer."); + + if (namespaceURI != null) { + // no prefix means: make this the default namespace + if (prefix == null) + prefix = ""; + + // check that is declared + String uri = currentElementInfo.findNamespaceURI(prefix); + if (uri != null && uri.equals(namespaceURI)) { + // System.out.println("namespace is declared"); + // prefix is declared correctly, do nothing + } else if (uri != null) { + // System.out.println("prefix is declared with other namespace, overwriting it"); + // prefix exists but is bound to another namespace, overwrite it + currentElementInfo.put(prefix, namespaceURI); + } else { + // System.out.println("prefix is not yet declared, declaring it now"); + currentElementInfo.put(prefix, namespaceURI); + } + } else { + // element has no namespace + // check if there is a default namespace, if so undeclare it + String uri = currentElementInfo.findNamespaceURI(""); + if (uri != null && !uri.equals("")) { + // System.out.println("undeclaring default namespace"); + currentElementInfo.put("", ""); + } + } + + // SAX uses empty string to denote no namespace, while DOM uses null. + if (namespaceURI == null) + namespaceURI = ""; + + String qName; + if (prefix != null && prefix.length() > 0) + qName = prefix + ":" + localName; + else + qName = localName; + + // make the attributes + AttributesImpl newAttrs = new AttributesImpl(); + for (int i = 0; i < nAttrs; i++) { + Node attr = atts.item(i); + String attrName = attr.getNodeName(); + String attrPrefix = null; + + if (attr.getLocalName() == null) + throw new SAXException("[NamespaceNormalizingDOMStreamer] Encountered an attribute without a local name, this DOM streamer does not support that."); + + // only do non-namespace attributes + if (!(attrName.equals("xmlns") || attrName.startsWith("xmlns:"))) { + if (attr.getNamespaceURI() != null) { + String declaredUri = currentElementInfo.findNamespaceURI(attr.getPrefix()); + // if the prefix is null, or the prefix has not been declared, or conflicts with an in-scope binding + if (declaredUri == null || !declaredUri.equals(attr.getNamespaceURI())) { + String availablePrefix = currentElementInfo.findPrefix(attr.getNamespaceURI()); + if (availablePrefix != null) + attrPrefix = availablePrefix; + else { + if (attr.getPrefix() != null && declaredUri == null) { + // prefix is not null and is not yet declared: declare it + currentElementInfo.put(prefix, attr.getNamespaceURI()); + } else { + // attribute has no prefix (which is not allowed for namespaced attributes) or + // the prefix is already bound to something else: generate a new prefix + newPrefixCounter++; + attrPrefix = "NS" + newPrefixCounter; + currentElementInfo.put(attrPrefix, attr.getNamespaceURI()); + } + } + } else { + attrPrefix = attr.getPrefix(); + } + } + + String attrNamespaceURI = attr.getNamespaceURI() != null ? attr.getNamespaceURI() : ""; + String attrQName; + if (attrPrefix != null) + attrQName = attrPrefix + ":" + attr.getLocalName(); + else + attrQName = attr.getLocalName(); + newAttrs.addAttribute(attrNamespaceURI, attr.getLocalName(), attrQName, "CDATA", attr.getNodeValue()); + } + } + + // add local namespace declaration and fire startPrefixMapping events + if (currentElementInfo.namespaceDeclarations != null && currentElementInfo.namespaceDeclarations.size() > 0) { + Iterator localNsDeclIt = currentElementInfo.namespaceDeclarations.entrySet().iterator(); + while (localNsDeclIt.hasNext()) { + Map.Entry entry = (Map.Entry) localNsDeclIt.next(); + String pr = (String) entry.getKey(); + String ns = (String) entry.getValue(); + String pr1 = pr.equals("") ? "xmlns" : pr; + String qn = pr.equals("") ? "xmlns" : "xmlns:" + pr; + newAttrs.addAttribute("", pr1, qn, "CDATA", ns); + // System.out.println("starting prefix mapping for prefix " + pr + " for " + ns); + contentHandler.startPrefixMapping(prefix, ns); + } + } + + contentHandler.startElement(namespaceURI, localName, qName, newAttrs); + + currentElementInfo.localName = localName; + currentElementInfo.namespaceURI = namespaceURI; + currentElementInfo.qName = qName; + break; + case Node.PROCESSING_INSTRUCTION_NODE: + { + ProcessingInstruction pi = (ProcessingInstruction) node; + contentHandler.processingInstruction(pi.getNodeName(), pi.getData()); + } + break; + case Node.CDATA_SECTION_NODE: + { + if (lexicalHandler != null) + lexicalHandler.startCDATA(); + + dispatachChars(node); + + if (lexicalHandler != null) + lexicalHandler.endCDATA(); + } + break; + case Node.TEXT_NODE: + { + dispatachChars(node); + } + break; + case Node.ENTITY_REFERENCE_NODE: + { + EntityReference eref = (EntityReference) node; + + if (lexicalHandler != null) { + lexicalHandler.startEntity(eref.getNodeName()); + } else { + // warning("Can not output entity to a pure SAX ContentHandler"); + } + } + break; + default : + } + } + + + /** + * End processing of given node + * + * @param node Node we just finished processing + */ + protected void endNode(Node node) throws org.xml.sax.SAXException { + + switch (node.getNodeType()) { + case Node.DOCUMENT_NODE: + break; + + case Node.ELEMENT_NODE: + contentHandler.endElement(currentElementInfo.namespaceURI, + currentElementInfo.localName, currentElementInfo.qName); + + // generate endPrefixMapping events if needed + if (currentElementInfo.namespaceDeclarations != null && currentElementInfo.namespaceDeclarations.size() > 0) { + Iterator namespaceIt = currentElementInfo.namespaceDeclarations.entrySet().iterator(); + while (namespaceIt.hasNext()) { + Map.Entry entry = (Map.Entry) namespaceIt.next(); + contentHandler.endPrefixMapping((String) entry.getKey()); + //System.out.println("ending prefix mapping " + (String) entry.getKey()); + } + } + + currentElementInfo = currentElementInfo.parent; + break; + case Node.CDATA_SECTION_NODE: + break; + case Node.ENTITY_REFERENCE_NODE: + { + EntityReference eref = (EntityReference) node; + + if (lexicalHandler != null) { + lexicalHandler.endEntity(eref.getNodeName()); + } + } + break; + default : + } + } + + public class ElementInfo { + public String localName; + public String namespaceURI; + public String qName; + public Map namespaceDeclarations = null; + public NamespaceNormalizingDOMStreamer.ElementInfo parent; + + public ElementInfo(NamespaceNormalizingDOMStreamer.ElementInfo parent) { + this.parent = parent; + } + + /** + * Declare a new namespace prefix on this element, possibly overriding + * an existing one. + */ + public void put(String prefix, String namespaceURI) { + if (namespaceDeclarations == null) + namespaceDeclarations = new HashMap(); + namespaceDeclarations.put(prefix, namespaceURI); + } + + /** + * Finds a prefix declared on this element. + */ + public String getPrefix(String namespaceURI) { + if (namespaceDeclarations == null || namespaceDeclarations.size() == 0) + return null; + // note: there could be more than one prefix for the same namespaceURI, but + // we return the first found one. + Iterator it = namespaceDeclarations.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = (Map.Entry) it.next(); + if (entry.getValue().equals(namespaceURI)) + return (String) entry.getKey(); + } + return null; + } + + /** + * Finds a namespace URI declared on this element. + */ + public String getNamespaceURI(String prefix) { + if (namespaceDeclarations == null || namespaceDeclarations.size() == 0) + return null; + + return (String) namespaceDeclarations.get(prefix); + } + + /** + * Finds a prefix declaration on this element or containing elements. + */ + public String findPrefix(String namespaceURI) { + if (namespaceDeclarations == null || namespaceDeclarations.size() == 0) + return null; + + String prefix = getPrefix(namespaceURI); + if (prefix != null) + return prefix; + if (parent != null) + return parent.findPrefix(namespaceURI); + else + return null; + } + + /** + * Finds a namespace declaration on this element or containing elements. + */ + public String findNamespaceURI(String prefix) { + if (namespaceDeclarations == null || namespaceDeclarations.size() == 0) + return null; + + String uri = (String) namespaceDeclarations.get(prefix); + if (uri != null) + return uri; + else if (parent != null) + return parent.findNamespaceURI(prefix); + else + return null; + } + } + } + + /** + * The <code>DefaultDOMStreamer</code> is a utility class that will generate SAX + * events from a W3C DOM Document. + * + * @author <a href="mailto:[EMAIL PROTECTED]">Carsten Ziegeler</a> + * @author <a href="mailto:[EMAIL PROTECTED]">Pierpaolo Fumagalli</a> + * (Apache Software Foundation, Exoffice Technologies) + */ + public class DefaultDOMStreamer { + + /** The private transformer for this instance */ + protected Transformer transformer; + + /** + * Start the production of SAX events. + */ + public void stream(Node node) + throws SAXException { + if (this.transformer == null) { + try { + this.transformer = factory.newTransformer(); + } catch (TransformerConfigurationException e) { + getLogger().error("DefaultDOMStreamer", e); + throw new SAXException(e); + } + } + DOMSource source = new DOMSource(node); + + ContentHandler handler; + if (node.getNodeType() == Node.DOCUMENT_NODE) { + // Pass all SAX events + handler = contentHandler; + } else { + // Strip start/endDocument + handler = new EmbeddedXMLPipe(contentHandler); + } + + SAXResult result = new SAXResult(handler); + result.setLexicalHandler(lexicalHandler); + + try { + transformer.transform(source, result); + } catch (TransformerException e) { + getLogger().error("DefaultDOMStreamer", e); + throw new SAXException(e); + } } } } 1.21 +2 -6 xml-cocoon2/src/java/org/apache/cocoon/serialization/AbstractTextSerializer.java Index: AbstractTextSerializer.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/serialization/AbstractTextSerializer.java,v retrieving revision 1.20 retrieving revision 1.21 diff -u -r1.20 -r1.21 --- AbstractTextSerializer.java 31 Jan 2003 22:51:54 -0000 1.20 +++ AbstractTextSerializer.java 26 Feb 2003 15:20:44 -0000 1.21 @@ -174,11 +174,7 @@ */ protected TransformerHandler getTransformerHandler() throws javax.xml.transform.TransformerException { - // return this.getTransformerFactory().newTransformerHandler(); - // FIXME - This is a workaround for bug #5779 of Xalan - return this.getTransformerFactory().newTransformerHandler( - new javax.xml.transform.stream.StreamSource(new java.io.StringReader(org.apache.cocoon.xml.XMLUtils.xalanBugStylesheet))); - // End workaround + return this.getTransformerFactory().newTransformerHandler(); } /** 1.15 +11 -17 xml-cocoon2/src/java/org/apache/cocoon/xml/XMLUtils.java Index: XMLUtils.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/xml/XMLUtils.java,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- XMLUtils.java 26 Feb 2003 15:02:39 -0000 1.14 +++ XMLUtils.java 26 Feb 2003 15:20:45 -0000 1.15 @@ -57,11 +57,10 @@ import java.util.Properties; import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TransformerHandler; import javax.xml.transform.stream.StreamResult; -import javax.xml.transform.stream.StreamSource; import org.apache.cocoon.ProcessingException; import org.apache.cocoon.xml.dom.DOMStreamer; @@ -213,12 +212,6 @@ return format; } - // FIXME - for Xalan bug - public static String xalanBugStylesheet = "<?xml version=\"1.0\"?><xsl:stylesheet version=\"1.0\""+ - " xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">"+ - "<xsl:template match=\"node()|@*\"><xsl:copy>"+ - "<xsl:apply-templates select=\"node()|@*\"/>"+ - "</xsl:copy></xsl:template></xsl:stylesheet>"; /** * Serialize a DOM node to a String. * The format of the output can be specified with the properties. @@ -229,16 +222,17 @@ try { if (node == null) return ""; StringWriter writer = new StringWriter(); - Transformer transformer; - // transformer = TransformerFactory.newInstance().newTransformer(); - // FIXME - This is a workaround for bug #5779 of Xalan - transformer = TransformerFactory.newInstance().newTransformer(new StreamSource(new java.io.StringReader(xalanBugStylesheet))); - // End workaround - transformer.setOutputProperties(format); - transformer.transform(new DOMSource(node), new StreamResult(writer)); + TransformerHandler transformerHandler; + transformerHandler = ((SAXTransformerFactory)TransformerFactory.newInstance()).newTransformerHandler(); + transformerHandler.getTransformer().setOutputProperties(format); + transformerHandler.setResult(new StreamResult(writer)); + DOMStreamer domStreamer = new DOMStreamer(transformerHandler, transformerHandler); + domStreamer.stream(node); return writer.toString(); } catch (javax.xml.transform.TransformerException local) { throw new ProcessingException("TransformerException: " + local, local); + } catch (SAXException local) { + throw new ProcessingException("SAXException while streaming DOM node to SAX: " + local, local); } }