Hello again! A few hours ago, I wrote: […] > > I’m attaching my test code, which I hope is correct and readable.
It seems that the list software stripped off the attachment with the test program, so here it is again, inline: :-/ // This document is encoded in UTF-8, with no BOM and with LF line endings. import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.PrintStream; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Text; public class TransformerTest { /** convenience method, prints a byte array in human-readable ASCII chars */ private static void printArray(byte[] data, PrintStream out) { if (data == null) { out.println("(null)"); return; } if (data.length == 0) { out.println("(empty array)"); return; } for (int i = 0; ; ) { byte b = data[i]; if (b >= 0x20 && b <= 0x7F) { out.print(" '" + (char)b + "'"); } else { out.format("0x%02X", (int)b & 0xFF); } if (++i == data.length) break; if ((i % 16) == 0) { out.println(','); } else { out.print(", "); } } out.println(); } /** * parses a {@code Document} from an {@code InputStream} (and not from a * {@code StringReader}, in order to make sure it’s not a problem when * parsing internal Unicode {@code String}s) */ private static Document makeDocument() throws Exception { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); byte[] docBytes = { '<', '?', 'x', 'm', 'l', ' ', 'v', 'e', 'r', 's', 'i', 'o', 'n', '=', '"', '1', '.', '0', '"', ' ', // document is encoded in US-ASCII, but labelled as a superset 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g', '=', '"', 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '"', '?', '>', '<', 'x', '/', '>' }; return builder.parse(new ByteArrayInputStream(docBytes)); } /** prepares a list of transformer factories to test */ private static List<TransformerFactory> makeTransformerFactories() { String[] facNames = { "org.apache.xalan.processor.TransformerFactoryImpl", "org.apache.xalan.xsltc.trax.TransformerFactoryImpl", "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl" }; List<TransformerFactory> transFactories = new ArrayList<>(); for (String facName : facNames) { try { TransformerFactory factory = TransformerFactory.newInstance(facName, null); transFactories.add(factory); } catch (TransformerFactoryConfigurationError ex) { System.out.println("cannot build instance of " + facName); } } //transFactories.add(TransformerFactory.newInstance()); // default impl return transFactories; } private static byte[] transformWith(TransformerFactory transFac, Document doc) throws Exception { Transformer trans = transFac.newTransformer(); trans.setOutputProperty(OutputKeys.METHOD, "xml"); trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); System.out.println("transformer factory: " + transFac.getClass().getName()); System.out.println("transformer: " + trans.getClass().getName()); ByteArrayOutputStream bout = new ByteArrayOutputStream(128); trans.transform(new DOMSource(doc), new StreamResult(bout)); byte[] data = bout.toByteArray(); return data; } public static void main(String[] t) throws Exception { Document doc = makeDocument(); System.out.println( "encoding declared in document : " + doc.getXmlEncoding()); // now insert a text node containing a character which can be // represented in ISO-8859-1, as well as (differently) in UTF-8, but not // in US-ASCII Element dstRoot = doc.getDocumentElement(); Text text = doc.createTextNode("schön"); dstRoot.insertBefore(text, null); System.out.println("root element text: " + dstRoot.getTextContent()); List<TransformerFactory> factories = makeTransformerFactories(); for (TransformerFactory factory : factories) { System.out.println(); byte[] bytes = transformWith(factory, doc); System.out.println("resulting bytes:"); printArray(bytes, System.out); } } } -- Nico