http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlParser.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParser.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParser.java index 85af340..378c22b 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParser.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParser.java @@ -23,7 +23,6 @@ import javax.xml.stream.*; import org.apache.juneau.*; import org.apache.juneau.annotation.*; -import org.apache.juneau.internal.*; import org.apache.juneau.parser.*; import org.apache.juneau.transform.*; import org.apache.juneau.xml.annotation.*; @@ -81,11 +80,7 @@ public class XmlParser extends ReaderParser { String wrapperAttr = (isRoot && session.isPreserveRootElement()) ? r.getName().getLocalPart() : null; String typeAttr = r.getAttributeValue(null, session.getBeanTypePropertyName()); int jsonType = getJsonType(typeAttr); - String b = r.getAttributeValue(session.getXsiNs(), "nil"); - if (b == null) - b = r.getAttributeValue(null, "nil"); - boolean isNull = b != null && b.equals("true"); - String elementName = session.decodeString(r.getLocalName()); + String elementName = session.getElementName(r); if (jsonType == 0) { if (elementName == null || elementName.equals(currAttr)) jsonType = UNKNOWN; @@ -97,7 +92,7 @@ public class XmlParser extends ReaderParser { if (breg.hasName(typeAttr)) { sType = eType = (ClassMeta<T>)breg.getClassMeta(typeAttr); - } else if (breg.hasName(elementName)) { + } else if (elementName != null && breg.hasName(elementName) && ! elementName.equals(currAttr)) { sType = eType = (ClassMeta<T>)breg.getClassMeta(elementName); } @@ -107,13 +102,6 @@ public class XmlParser extends ReaderParser { r.nextTag(); // Discard end tag return null; } - if (isNull) { - while (true) { - int e = r.next(); - if (e == END_ELEMENT) - return null; - } - } if (sType.isObject()) { if (jsonType == OBJECT) { @@ -125,22 +113,22 @@ public class XmlParser extends ReaderParser { } else if (jsonType == ARRAY) o = parseIntoCollection(session, r, new ObjectList(session), object(), pMeta); else if (jsonType == STRING) { - o = session.decodeString(r); + o = session.getElementText(r); if (sType.isChar()) o = o.toString().charAt(0); } else if (jsonType == NUMBER) - o = parseNumber(session.decodeText(r), null); + o = parseNumber(session.getElementText(r), null); else if (jsonType == BOOLEAN) - o = Boolean.parseBoolean(session.decodeText(r)); + o = Boolean.parseBoolean(session.getElementText(r)); else if (jsonType == UNKNOWN) o = getUnknown(session, r); } else if (sType.isBoolean()) { - o = Boolean.parseBoolean(session.decodeText(r)); + o = Boolean.parseBoolean(session.getElementText(r)); } else if (sType.isCharSequence()) { - o = session.decodeString(r); + o = session.getElementText(r); } else if (sType.isChar()) { - String s = session.decodeString(r); + String s = session.getElementText(r); o = s.length() == 0 ? 0 : s.charAt(0); } else if (sType.isMap()) { Map m = (sType.canCreateNewInstance(outer) ? (Map)sType.newInstance(outer) : new ObjectMap(session)); @@ -151,7 +139,7 @@ public class XmlParser extends ReaderParser { Collection l = (sType.canCreateNewInstance(outer) ? (Collection)sType.newInstance(outer) : new ObjectList(session)); o = parseIntoCollection(session, r, l, sType.getElementType(), pMeta); } else if (sType.isNumber()) { - o = parseNumber(session.decodeText(r), (Class<? extends Number>)sType.getInnerClass()); + o = parseNumber(session.getElementText(r), (Class<? extends Number>)sType.getInnerClass()); } else if (sType.canCreateNewInstanceFromObjectMap(outer)) { ObjectMap m = new ObjectMap(session); parseIntoMap(session, r, m, string(), object(), pMeta); @@ -174,9 +162,9 @@ public class XmlParser extends ReaderParser { ArrayList l = (ArrayList)parseIntoCollection(session, r, new ArrayList(), sType.getElementType(), pMeta); o = session.toArray(sType, l); } else if (sType.canCreateNewInstanceFromString(outer)) { - o = sType.newInstanceFromString(outer, session.decodeString(r)); + o = sType.newInstanceFromString(outer, session.getElementText(r)); } else if (sType.canCreateNewInstanceFromNumber(outer)) { - o = sType.newInstanceFromNumber(session, outer, parseNumber(session.decodeText(r), sType.getNewInstanceFromNumberClass())); + o = sType.newInstanceFromNumber(session, outer, parseNumber(session.getElementText(r), sType.getNewInstanceFromNumberClass())); } else { throw new ParseException(session, "Class ''{0}'' could not be instantiated. Reason: ''{1}'', property: ''{2}''", sType.getInnerClass().getName(), sType.getNotABeanReason(), pMeta == null ? null : pMeta.getName()); } @@ -207,7 +195,7 @@ public class XmlParser extends ReaderParser { String currAttr; if (event == START_ELEMENT) { depth++; - currAttr = session.decodeString(r.getLocalName()); + currAttr = session.getElementName(r); K key = convertAttrToType(session, m, currAttr, keyType); V value = parseAnything(session, valueType, currAttr, r, m, false, pMeta); setName(valueType, value, currAttr); @@ -288,7 +276,7 @@ public class XmlParser extends ReaderParser { XmlBeanMeta xmlMeta = bMeta.getExtendedMeta(XmlBeanMeta.class); for (int i = 0; i < r.getAttributeCount(); i++) { - String key = session.decodeString(r.getAttributeLocalName(i)); + String key = session.getAttributeName(r, i); String val = r.getAttributeValue(i); BeanPropertyMeta bpm = xmlMeta.getPropertyMeta(key); if (bpm == null) { @@ -307,8 +295,11 @@ public class XmlParser extends ReaderParser { BeanPropertyMeta cp = xmlMeta.getContentProperty(); XmlFormat cpf = xmlMeta.getContentFormat(); + boolean trim = cp == null || ! cpf.isOneOf(MIXED_PWS, TEXT_PWS); ClassMeta<?> cpcm = (cp == null ? session.object() : cp.getClassMeta()); - StringBuilder sb = (cpf != null && cpf.isOneOf(TEXT,XMLTEXT) ? session.getStringBuilder() : null); + StringBuilder sb = null; + BeanRegistry breg = cp == null ? null : cp.getBeanRegistry(); + LinkedList<Object> l = null; int depth = 0; do { @@ -317,41 +308,68 @@ public class XmlParser extends ReaderParser { // We only care about text in MIXED mode. // Ignore if in ELEMENTS mode. if (event == CHARACTERS) { - if (cpf == MIXED && cp != null) { - if (cpcm.isCollectionOrArray()) - cp.add(m, session.decodeString(r.getText())); - else - cp.set(m, session.decodeString(r.getText())); - } else if (sb != null) { - String s = r.getText(); - if (! StringUtils.isEmpty(s)) + if (cp != null && cpf.isOneOf(MIXED, MIXED_PWS)) { + if (cpcm.isCollectionOrArray()) { + if (l == null) + l = new LinkedList<Object>(); + l.add(session.getText(r, false)); + } else { + cp.set(m, session.getText(r, trim)); + } + } else if (cpf != ELEMENTS) { + String s = session.getText(r, trim); + if (s != null) { + if (sb == null) + sb = session.getStringBuilder(); sb.append(s); + } } else { // Do nothing...we're in ELEMENTS mode. } } else if (event == START_ELEMENT) { - if (cpf == TEXT) { - throw new ParseException("Element found where simple text was expected. {0}", XmlUtils.toReadableEvent(r)); - } else if (cpf == XMLTEXT && sb != null) { - sb.append(session.elementAsString(r)); + if (cp != null && cpf.isOneOf(TEXT, TEXT_PWS)) { + String s = session.parseText(r); + if (s != null) { + if (sb == null) + sb = session.getStringBuilder(); + sb.append(s); + } + depth--; + } else if (cpf == XMLTEXT) { + if (sb == null) + sb = session.getStringBuilder(); + sb.append(session.getElementAsString(r)); depth++; - } else if (cpf == MIXED && cp != null) { - if (cpcm.isCollectionOrArray()) - cp.add(m, parseAnything(session, cpcm.getElementType(), cp.getName(), r, m.getBean(false), false, cp)); - else - cp.set(m, parseAnything(session, cpcm, cp.getName(), r, m.getBean(false), false, cp)); - } else if (cpf == ELEMENTS && cp != null) { + } else if (cp != null && cpf.isOneOf(MIXED, MIXED_PWS)) { + if (session.isWhitespaceElement(r) && (breg == null || ! breg.hasName(r.getLocalName()))) { + if (cpcm.isCollectionOrArray()) { + if (l == null) + l = new LinkedList<Object>(); + l.add(session.parseWhitespaceElement(r)); + } else { + cp.set(m, session.parseWhitespaceElement(r)); + } + } else { + if (cpcm.isCollectionOrArray()) { + if (l == null) + l = new LinkedList<Object>(); + l.add(parseAnything(session, cpcm.getElementType(), cp.getName(), r, m.getBean(false), false, cp)); + } else { + cp.set(m, parseAnything(session, cpcm, cp.getName(), r, m.getBean(false), false, cp)); + } + } + } else if (cp != null && cpf == ELEMENTS) { cp.add(m, parseAnything(session, cpcm.getElementType(), cp.getName(), r, m.getBean(false), false, cp)); } else { - currAttr = session.decodeString(r.getLocalName()); + currAttr = session.getElementName(r); BeanPropertyMeta pMeta = xmlMeta.getPropertyMeta(currAttr); if (pMeta == null) { if (m.getMeta().isSubTyped()) { Object value = parseAnything(session, string(), currAttr, r, m.getBean(false), false, null); m.put(currAttr, value); } else { - Location l = r.getLocation(); - onUnknownProperty(session, currAttr, m, l.getLineNumber(), l.getColumnNumber()); + Location loc = r.getLocation(); + onUnknownProperty(session, currAttr, m, loc.getLineNumber(), loc.getColumnNumber()); skipCurrentTag(r); } } else { @@ -363,7 +381,7 @@ public class XmlParser extends ReaderParser { setName(et, value, currAttr); pMeta.add(m, value); } else if (xf == ATTR) { - pMeta.set(m, session.decodeString(r.getAttributeValue(0))); + pMeta.set(m, session.getAttributeValue(r, 0)); r.nextTag(); } else { ClassMeta<?> cm = pMeta.getClassMeta(); @@ -376,18 +394,25 @@ public class XmlParser extends ReaderParser { } } else if (event == END_ELEMENT) { if (depth > 0) { - if (cpf == XMLTEXT && sb != null) - sb.append(session.elementAsString(r)); + if (cpf == XMLTEXT) { + if (sb == null) + sb = session.getStringBuilder(); + sb.append(session.getElementAsString(r)); + } else throw new ParseException("End element found where one was not expected. {0}", XmlUtils.toReadableEvent(r)); } depth--; } else { - throw new ParseException("Unexpected event type: {0}", event); + throw new ParseException("Unexpected event type: {0}", XmlUtils.toReadableEvent(r)); } } while (depth >= 0); + if (sb != null && cp != null) - cp.set(m, session.decodeString(sb.toString())); + cp.set(m, sb.toString()); + else if (l != null && cp != null) + cp.set(m, XmlUtils.collapseTextNodes(l)); + session.returnStringBuilder(sb); return m; } @@ -413,7 +438,7 @@ public class XmlParser extends ReaderParser { if (r.getAttributeCount() > 0) { m = new ObjectMap(session); for (int i = 0; i < r.getAttributeCount(); i++) { - String key = session.decodeString(r.getAttributeLocalName(i)); + String key = session.getAttributeName(r, i); String val = r.getAttributeValue(i); if (! key.equals(session.getBeanTypePropertyName())) m.put(key, val); @@ -439,7 +464,7 @@ public class XmlParser extends ReaderParser { String currAttr; if (event == START_ELEMENT) { depth++; - currAttr = session.decodeString(r.getLocalName()); + currAttr = session.getElementName(r); String key = convertAttrToType(session, null, currAttr, string()); Object value = parseAnything(session, object(), currAttr, r, null, false, null); if (m.containsKey(key)) {
http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserContext.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserContext.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserContext.java index 7b4ae26..3e67c33 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserContext.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserContext.java @@ -48,13 +48,6 @@ import org.apache.juneau.parser.*; * <td><jk>true</jk></td> * </tr> * <tr> - * <td>{@link #XML_trimWhitespace}</td> - * <td>Trim whitespace from text elements.</td> - * <td><code>Boolean<code></td> - * <td><jk>true</jk></td> - * <td><jk>true</jk></td> - * </tr> - * <tr> * <td>{@link #XML_validating}</td> * <td>Enable validation.</td> * <td><code>Boolean<code></td> @@ -116,20 +109,6 @@ public class XmlParserContext extends ParserContext { public static final String XML_xsiNs = "XmlParser.xsiNs"; /** - * <b>Configuration property:</b> Trim whitespace from text elements. - * <p> - * <ul> - * <li><b>Name:</b> <js>"XmlParser.trimWhitespace"</js> - * <li><b>Data type:</b> <code>Boolean</code> - * <li><b>Default:</b> <jk>true</jk> - * <li><b>Session-overridable:</b> <jk>true</jk> - * </ul> - * <p> - * If <jk>true</jk>, whitespace in text elements will be automatically trimmed. - */ - public static final String XML_trimWhitespace = "XmlParser.trimWhitespace"; - - /** * <b>Configuration property:</b> Enable validation. * <p> * <ul> @@ -219,7 +198,6 @@ public class XmlParserContext extends ParserContext { final String xsiNs; final boolean - trimWhitespace, validating, preserveRootElement; final XMLReporter reporter; @@ -236,7 +214,6 @@ public class XmlParserContext extends ParserContext { public XmlParserContext(ContextFactory cf) { super(cf); xsiNs = cf.getProperty(XML_xsiNs, String.class, "http://www.w3.org/2001/XMLSchema-instance"); - trimWhitespace = cf.getProperty(XML_trimWhitespace, boolean.class, true); validating = cf.getProperty(XML_validating, boolean.class, false); preserveRootElement = cf.getProperty(XML_preserveRootElement, boolean.class, false); reporter = cf.getProperty(XML_reporter, XMLReporter.class, null); @@ -249,7 +226,6 @@ public class XmlParserContext extends ParserContext { return super.asMap() .append("XmlParserContext", new ObjectMap() .append("xsiNs", xsiNs) - .append("trimWhitespace", trimWhitespace) .append("validating", validating) .append("preserveRootElement", preserveRootElement) .append("reporter", reporter) http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserSession.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserSession.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserSession.java index 5aeb93d..fce4243 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserSession.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlParserSession.java @@ -13,6 +13,7 @@ package org.apache.juneau.xml; import static org.apache.juneau.xml.XmlParserContext.*; +import static javax.xml.stream.XMLStreamConstants.*; import java.io.*; import java.lang.reflect.*; @@ -35,7 +36,6 @@ public class XmlParserSession extends ParserSession { private final String xsiNs; private final boolean - trimWhitespace, validating, preserveRootElement; private final XMLReporter reporter; @@ -70,7 +70,6 @@ public class XmlParserSession extends ParserSession { super(ctx, op, input, javaMethod, outer, locale, timeZone); if (op == null || op.isEmpty()) { xsiNs = ctx.xsiNs; - trimWhitespace = ctx.trimWhitespace; validating = ctx.validating; reporter = ctx.reporter; resolver = ctx.resolver; @@ -78,7 +77,6 @@ public class XmlParserSession extends ParserSession { preserveRootElement = ctx.preserveRootElement; } else { xsiNs = op.getString(XML_xsiNs, ctx.xsiNs); - trimWhitespace = op.getBoolean(XML_trimWhitespace, ctx.trimWhitespace); validating = op.getBoolean(XML_validating, ctx.validating); reporter = (XMLReporter)op.get(XML_reporter, ctx.reporter); resolver = (XMLResolver)op.get(XML_resolver, ctx.resolver); @@ -117,7 +115,7 @@ public class XmlParserSession extends ParserSession { XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_VALIDATING, validating); factory.setProperty(XMLInputFactory.IS_COALESCING, true); - factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true); + factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true); // This usually has no effect anyway. if (factory.isPropertySupported(XMLInputFactory.REPORTER) && reporter != null) factory.setProperty(XMLInputFactory.REPORTER, reporter); if (factory.isPropertySupported(XMLInputFactory.RESOLVER) && resolver != null) @@ -139,15 +137,15 @@ public class XmlParserSession extends ParserSession { /** * Decodes and trims the specified string. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. * * @param s The string to be decoded. * @return The decoded string. */ public final String decodeString(String s) { - if (s == null || s.isEmpty()) - return s; - if (trimWhitespace) - s = s.trim(); + if (s == null) + return null; sb.setLength(0); s = XmlUtils.decode(s, sb); if (isTrimStrings()) @@ -156,43 +154,93 @@ public class XmlParserSession extends ParserSession { } /** - * Shortcut for calling <code>decodeString(r.getElementText());</code>. + * Returns the name of the current XML element. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. + * + * @param r The reader to read from. + * @return The decoded element name. + * @throws XMLStreamException + */ + public final String getElementName(XMLStreamReader r) throws XMLStreamException { + return decodeString(r.getLocalName()); + } + + /** + * Returns the name of the specified attribute on the current XML element. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. + * + * @param r The reader to read from. + * @param i The attribute index. + * @return The decoded attribute name. + * @throws XMLStreamException + */ + public final String getAttributeName(XMLStreamReader r, int i) throws XMLStreamException { + return decodeString(r.getAttributeLocalName(i)); + } + + /** + * Returns the value of the specified attribute on the current XML element. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. + * + * @param r The reader to read from. + * @param i The attribute index. + * @return The decoded attribute value. + * @throws XMLStreamException + */ + public final String getAttributeValue(XMLStreamReader r, int i) throws XMLStreamException { + return decodeString(r.getAttributeValue(i)); + } + + /** + * Returns the text content of the current XML element. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. + * <p> + * Leading and trailing whitespace (unencoded) will be trimmed from the result. * * @param r The reader to read the element text from. - * @return The decoded text. + * @return The decoded text. <jk>null</jk> if the text consists of the sequence <js>'_x0000_'</js>. * @throws XMLStreamException */ - public final String decodeString(XMLStreamReader r) throws XMLStreamException { - return decodeString(r.getElementText()); + public String getElementText(XMLStreamReader r) throws XMLStreamException { + String s = r.getElementText().trim(); + return decodeString(s); } /** - * Decodes the specified literal (e.g. <js>"true"</js>, <js>"123"</js>). + * Returns the content of the current CHARACTERS node. + * <p> + * Any <js>'_x####_'</js> sequences in the string will be decoded. * <p> - * Unlike <code>decodeString(String)</code>, the input string is ALWAYS trimmed before decoding, and - * NEVER trimmed after decoding. + * Leading and trailing whitespace (unencoded) will be trimmed from the result. * - * @param s The string to trim. - * @return The trimmed string, or <jk>null</jk> if the string was <jk>null</jk>. + * @param r The reader to read the element text from. + * @param trim If <jk>true</jk>, trim the contents of the text node BEFORE decoding escape sequences. + * Typically <jk>true</jk> for {@link XmlFormat#MIXED_PWS} and {@link XmlFormat#TEXT_PWS}. + * @return The decoded text. <jk>null</jk> if the text consists of the sequence <js>'_x0000_'</js>. + * @throws XMLStreamException */ - public final String decodeLiteral(String s) { - if (s == null || s.isEmpty()) - return s; - s = s.trim(); - sb.setLength(0); - s = XmlUtils.decode(s, sb); - return s; + public String getText(XMLStreamReader r, boolean trim) throws XMLStreamException { + String s = r.getText(); + if (trim) + s = s.trim(); + if (s.isEmpty()) + return null; + return decodeString(s); } /** - * Shortcut for calling <code>decodeLiteral(r.getElementText());</code>. + * Shortcut for calling <code>getText(r, <jk>true</jk>);</code>. * * @param r The reader to read the element text from. - * @return The decoded text. + * @return The decoded text. <jk>null</jk> if the text consists of the sequence <js>'_x0000_'</js>. * @throws XMLStreamException */ - public final String decodeText(XMLStreamReader r) throws XMLStreamException { - return decodeLiteral(r.getElementText()); + public String getText(XMLStreamReader r) throws XMLStreamException { + return getText(r, true); } /** @@ -204,7 +252,7 @@ public class XmlParserSession extends ParserSession { * @return The event as XML. * @throws RuntimeException if the event is not a start or end tag. */ - public final String elementAsString(XMLStreamReader r) { + public final String getElementAsString(XMLStreamReader r) { int t = r.getEventType(); if (t > 2) throw new RuntimeException("Invalid event type on stream reader for elementToString() method: " + XmlUtils.toReadableEvent(r)); @@ -218,6 +266,66 @@ public class XmlParserSession extends ParserSession { } /** + * Parses the current element as text. + * Note that this is different than {@link #getText(XMLStreamReader)} since it + * assumes that we're pointing to a whitespace element. + * + * @param r + * @return The parsed text. + * @throws XMLStreamException + */ + public String parseText(XMLStreamReader r) throws XMLStreamException { + StringBuilder sb2 = getStringBuilder(); + + int depth = 0; + while (true) { + int et = r.getEventType(); + if (et == START_ELEMENT) { + sb2.append(getElementAsString(r)); + depth++; + } else if (et == CHARACTERS) { + sb2.append(getText(r)); + } else if (et == END_ELEMENT) { + sb2.append(getElementAsString(r)); + depth--; + if (depth <= 0) + break; + } + et = r.next(); + } + String s = sb2.toString(); + returnStringBuilder(sb2); + return s; + } + + /** + * Returns <jk>true</jk> if the current element is a whitespace element. + * <p> + * For the XML parser, this always returns <jk>false</jk>. + * However, the HTML parser defines various whitespace elements such as <js>"br"</js> and <js>"sp"</js>. + * + * @param r The XML stream reader to read the current event from. + * @return <jk>true</jk> if the current element is a whitespace element. + */ + public boolean isWhitespaceElement(XMLStreamReader r) { + return false; + } + + /** + * Parses the current whitespace element. + * <p> + * For the XML parser, this always returns <jk>null</jk> since there is no concept of a whitespace element. + * However, the HTML parser defines various whitespace elements such as <js>"br"</js> and <js>"sp"</js>. + * + * @param r The XML stream reader to read the current event from. + * @return The whitespace character or characters. + * @throws XMLStreamException + */ + public String parseWhitespaceElement(XMLStreamReader r) throws XMLStreamException { + return null; + } + + /** * Silently closes the XML stream. */ @Override /* ParserContext */ http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlSchemaSerializer.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlSchemaSerializer.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlSchemaSerializer.java index 2e107a5..4a0938e 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlSchemaSerializer.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlSchemaSerializer.java @@ -336,7 +336,7 @@ public class XmlSchemaSerializer extends XmlSerializer { // This element can have mixed content if: // 1) It's a generic Object (so it can theoretically be anything) // 2) The bean has a property defined with @XmlFormat.CONTENT. - if ((xbm != null && (xbm.getContentFormat() != null && xbm.getContentFormat().isOneOf(TEXT,MIXED,XMLTEXT))) || ! cm.isMapOrBean()) + if ((xbm != null && (xbm.getContentFormat() != null && xbm.getContentFormat().isOneOf(TEXT,TEXT_PWS,MIXED,MIXED_PWS,XMLTEXT))) || ! cm.isMapOrBean()) w.attr("mixed", "true"); w.cTag().nl(); http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlSerializer.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlSerializer.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlSerializer.java index cf5f007..0d07a99 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlSerializer.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlSerializer.java @@ -279,6 +279,7 @@ public class XmlSerializer extends WriterSerializer { * @param addNamespaceUris Flag indicating that namespace URIs need to be added. * @param format The format to serialize the output to. * @param isMixed We're serializing mixed content, so don't use whitespace. + * @param preserveWhitespace <jk>true</jk> if we're serializing {@link XmlFormat#MIXED_PWS} or {@link XmlFormat#TEXT_PWS}. * @param pMeta The bean property metadata if this is a bean property being serialized. * @return The same writer passed in so that calls to the writer can be chained. * @throws Exception If a problem occurred trying to convert the output. @@ -293,6 +294,7 @@ public class XmlSerializer extends WriterSerializer { boolean addNamespaceUris, XmlFormat format, boolean isMixed, + boolean preserveWhitespace, BeanPropertyMeta pMeta) throws Exception { JsonType type = null; // The type string (e.g. <type> or <x x='type'> @@ -376,7 +378,7 @@ public class XmlSerializer extends WriterSerializer { type = STRING; } - if (format.isOneOf(MIXED,TEXT,XMLTEXT) && type.isOneOf(NULL,STRING,NUMBER,BOOLEAN)) + if (format.isOneOf(MIXED,MIXED_PWS,TEXT,TEXT_PWS,XMLTEXT) && type.isOneOf(NULL,STRING,NUMBER,BOOLEAN)) isCollapsed = true; // Is there a name associated with this bean? @@ -453,7 +455,7 @@ public class XmlSerializer extends WriterSerializer { if (format == XMLTEXT) out.append(o); else - out.encodeText(session.trim(o)); + out.text(o, preserveWhitespace); } else if (sType.isNumber() || sType.isBoolean()) { out.append(o); } else if (sType.isMap() || (wType != null && wType.isMap())) { @@ -483,7 +485,7 @@ public class XmlSerializer extends WriterSerializer { if (format == XMLTEXT) out.append(session.toString(o)); else - out.encodeText(session.toString(o)); + out.text(session.toString(o)); } } @@ -528,7 +530,7 @@ public class XmlSerializer extends WriterSerializer { hasChildren = true; out.append('>').nlIf(! isMixed); } - serializeAnything(session, out, value, valueType, session.toString(k), null, false, XmlFormat.DEFAULT, isMixed, null); + serializeAnything(session, out, value, valueType, session.toString(k), null, false, XmlFormat.DEFAULT, isMixed, false, null); } return hasChildren ? CR_ELEMENTS : CR_EMPTY; } @@ -595,7 +597,7 @@ public class XmlSerializer extends WriterSerializer { } } - boolean hasContent = false; + boolean hasContent = false, preserveWhitespace = false; for (BeanPropertyValue p : lp) { BeanPropertyMeta pMeta = p.getMeta(); @@ -607,8 +609,14 @@ public class XmlSerializer extends WriterSerializer { contentType = p.getClassMeta(); hasContent = true; cf = xbm.getContentFormat(); - if (cf.isOneOf(MIXED,TEXT,XMLTEXT)) + if (cf.isOneOf(MIXED,MIXED_PWS,TEXT,TEXT_PWS,XMLTEXT)) isMixed = true; + if (cf.isOneOf(MIXED_PWS, TEXT_PWS)) + preserveWhitespace = true; + if (contentType.isCollection() && ((Collection)content).isEmpty()) + hasContent = false; + else if (contentType.isArray() && Array.getLength(content) == 0) + hasContent = false; } else if (elements.contains(n) || collapsedElements.contains(n)) { String key = p.getName(); Object value = p.getValue(); @@ -625,10 +633,10 @@ public class XmlSerializer extends WriterSerializer { } XmlBeanPropertyMeta xbpm = pMeta.getExtendedMeta(XmlBeanPropertyMeta.class); - serializeAnything(session, out, value, cMeta, key, xbpm.getNamespace(), false, xbpm.getXmlFormat(), isMixed, pMeta); + serializeAnything(session, out, value, cMeta, key, xbpm.getNamespace(), false, xbpm.getXmlFormat(), isMixed, false, pMeta); } } - if ((! hasContent) || session.canIgnoreValue(string(), null, content)) + if (! hasContent) return (hasChildren ? CR_ELEMENTS : CR_EMPTY); out.append('>').nlIf(! isMixed); @@ -639,22 +647,22 @@ public class XmlSerializer extends WriterSerializer { Collection c = (Collection)content; for (Iterator i = c.iterator(); i.hasNext();) { Object value = i.next(); - serializeAnything(session, out, value, contentType.getElementType(), null, null, false, cf, isMixed, null); + serializeAnything(session, out, value, contentType.getElementType(), null, null, false, cf, isMixed, preserveWhitespace, null); } } else if (contentType.isArray()) { Collection c = toList(Object[].class, content); for (Iterator i = c.iterator(); i.hasNext();) { Object value = i.next(); - serializeAnything(session, out, value, contentType.getElementType(), null, null, false, cf, isMixed, null); + serializeAnything(session, out, value, contentType.getElementType(), null, null, false, cf, isMixed, preserveWhitespace, null); } } else { - serializeAnything(session, out, content, contentType, null, null, false, cf, isMixed, null); + serializeAnything(session, out, content, contentType, null, null, false, cf, isMixed, preserveWhitespace, null); } } else { if (! session.isTrimNulls()) { if (! isMixed) out.i(session.indent); - out.encodeText(content); + out.text(content); if (! isMixed) out.nl(); } @@ -688,7 +696,7 @@ public class XmlSerializer extends WriterSerializer { for (Iterator i = c.iterator(); i.hasNext();) { Object value = i.next(); - serializeAnything(session, out, value, eeType, eName, eNs, false, XmlFormat.DEFAULT, isMixed, null); + serializeAnything(session, out, value, eeType, eName, eNs, false, XmlFormat.DEFAULT, isMixed, false, null); } return out; } @@ -742,7 +750,7 @@ public class XmlSerializer extends WriterSerializer { XmlSerializerSession s = (XmlSerializerSession)session; if (s.isEnableNamespaces() && s.isAutoDetectNamespaces()) findNsfMappings(s, o); - serializeAnything(s, s.getWriter(), o, null, null, null, s.isEnableNamespaces() && s.isAddNamespaceUrlsToRoot(), XmlFormat.DEFAULT, false, null); + serializeAnything(s, s.getWriter(), o, null, null, null, s.isEnableNamespaces() && s.isAddNamespaceUrlsToRoot(), XmlFormat.DEFAULT, false, false, null); } @Override /* Serializer */ http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlUtils.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlUtils.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlUtils.java index 4ca4b20..8220ef3 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlUtils.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlUtils.java @@ -12,12 +12,9 @@ // *************************************************************************************************************************** package org.apache.juneau.xml; -import static javax.xml.stream.XMLStreamConstants.*; - import java.io.*; import java.util.*; -import javax.xml.namespace.*; import javax.xml.stream.*; import org.apache.juneau.*; @@ -34,15 +31,12 @@ public final class XmlUtils { //-------------------------------------------------------------------------------- /** - * Encodes invalid XML text characters. - * <p> - * Encodes <js>'&'</js>, <js>'<'</js>, and <js>'>'</js> as XML entities.<br> - * Encodes any other invalid XML text characters to <code>_x####_</code> sequences. + * Encodes invalid XML text characters to <code>_x####_</code> sequences. * * @param o The object being encoded. * @return The encoded string. */ - public static final String encodeText(Object o) { + public static final String encodeInvalidCharsForText(Object o) { if (o == null) return "_x0000_"; @@ -50,166 +44,81 @@ public final class XmlUtils { String s = o.toString(); try { - if (needsTextEncoding(s)) - return encodeTextInner(new StringBuilderWriter(s.length()*2), s).toString(); - } catch (IOException e) { - throw new RuntimeException(e); // Never happens - } - - return s; - } - - /** - * Same as {@link #encodeText(Object)}, but does not convert <js>'&'</js>, <js>'<'</js>, and <js>'>'</js> - * to entities. - * - * @param o The object being encoded. - * @return The encoded string. - */ - public static final String encodeTextInvalidChars(Object o) { - - if (o == null) - return "_x0000_"; - - String s = o.toString(); - - try { - if (needsTextEncoding(s)) - return encodeTextInvalidCharsInner(new StringBuilderWriter(s.length()*2), s).toString(); + if (! needsTextEncoding(s)) + return s; + final int len = s.length(); + StringWriter sw = new StringWriter(s.length()*2); + for (int i = 0; i < len; i++) { + char c = s.charAt(i); + if ((i == 0 || i == len-1) && Character.isWhitespace(c)) + appendPaddedHexChar(sw, c); + else if (c == '_' && isEscapeSequence(s,i)) + appendPaddedHexChar(sw, c); + else if (isValidXmlCharacter(c)) + sw.append(c); + else + appendPaddedHexChar(sw, c); + } + return sw.toString(); } catch (IOException e) { throw new RuntimeException(e); // Never happens } - - return s; } /** * Encodes any invalid XML text characters to <code>_x####_</code> sequences and sends the response * to the specified writer. + * Encodes <js>'&'</js>, <js>'<'</js>, and <js>'>'</js> as XML entities.<br> + * Encodes invalid XML text characters to <code>_x####_</code> sequences. * * @param w The writer to send the output to. * @param o The object being encoded. + * @param trim Trim the text before serializing it. + * @param preserveWhitespace Specifies whether we're in preserve-whitespace mode. + * (e.g. {@link XmlFormat#MIXED_PWS} or {@link XmlFormat#TEXT_PWS}. + * If <jk>true</jk>, leading and trailing whitespace characters will be encoded. * @return The same writer passed in. * @throws IOException Thrown from the writer. */ - public static final Writer encodeText(Writer w, Object o) throws IOException { + public static final Writer encodeText(Writer w, Object o, boolean trim, boolean preserveWhitespace) throws IOException { if (o == null) return w.append("_x0000_"); String s = o.toString(); - - if (needsTextEncoding(s)) - return encodeTextInner(w, s); - - w.append(s); - - return w; - } - - /** - * Same as {@link #encodeText(Object)}, but does not convert <js>'&'</js>, <js>'<'</js>, and <js>'>'</js> - * to entities. - * - * @param w The writer to write to. - * @param o The object being encoded. - * @return The encoded string. - * @throws IOException - */ - public static final Writer encodeTextInvalidChars(Writer w, Object o) throws IOException { - - if (o == null) - return w.append("_x0000_"); - - String s = o.toString(); - - if (needsTextEncoding(s)) - return encodeTextInvalidCharsInner(w, s); - - w.append(s); - - return w; - } - - /** - * Same as {@link #encodeText(Object)}, but only converts <js>'&'</js>, <js>'<'</js>, and <js>'>'</js> - * to entities. - * - * @param w The writer to write to. - * @param o The object being encoded. - * @return The encoded string. - * @throws IOException - */ - public static final Writer encodeTextXmlChars(Writer w, Object o) throws IOException { - if (o == null) - return w; - - String s = o.toString(); - - if (needsTextEncoding(s)) - return encodeTextXmlCharsInner(w, s); - - w.append(s); - - return w; - - } - - private static final Writer encodeTextInner(Writer w, String s) throws IOException { - final int len = s.length(); - for (int i = 0; i < len; i++) { - char c = s.charAt(i); - if (c == '&') - w.append("&"); - else if (c == '<') - w.append("<"); - else if (c == '>') - w.append(">"); - else if (c == '_' && isEscapeSequence(s,i)) - appendPaddedHexChar(w, c); - else if ((i == 0 || i == len-1) && Character.isWhitespace(c)) - appendPaddedHexChar(w, c); - else if (isValidXmlCharacter(c)) - w.append(c); - else if (c == 0x09 || c == 0x0A || c == 0x0D) - w.append("�").append(Integer.toHexString(c)).append(";"); - else - appendPaddedHexChar(w, c); + if (s.isEmpty()) + return w.append("_xE000_"); + if (trim) + s = s.trim(); + + if (needsTextEncoding(s)) { + final int len = s.length(); + for (int i = 0; i < len; i++) { + char c = s.charAt(i); + if ((i == 0 || i == len-1) && Character.isWhitespace(c) && ! preserveWhitespace) + appendPaddedHexChar(w, c); + else if (c == '&') + w.append("&"); + else if (c == '<') + w.append("<"); + else if (c == '>') + w.append(">"); + else if (c == '_' && isEscapeSequence(s,i)) + appendPaddedHexChar(w, c); + else if (isValidXmlCharacter(c)) + w.append(c); + else if (c == 0x09 || c == 0x0A || c == 0x0D) + w.append("�").append(Integer.toHexString(c)).append(";"); + else + appendPaddedHexChar(w, c); + } + } else { + w.append(s); } - return w; - } - private static final Writer encodeTextInvalidCharsInner(Writer w, String s) throws IOException { - final int len = s.length(); - for (int i = 0; i < len; i++) { - char c = s.charAt(i); - if ((i == 0 || i == len-1) && Character.isWhitespace(c)) - appendPaddedHexChar(w, c); - else if (c == '_' && isEscapeSequence(s,i)) - appendPaddedHexChar(w, c); - else if (isValidXmlCharacter(c)) - w.append(c); - else - appendPaddedHexChar(w, c); - } return w; } - private static final Writer encodeTextXmlCharsInner(Writer w, String s) throws IOException { - final int len = s.length(); - for (int i = 0; i < len; i++) { - char c = s.charAt(i); - if (c == '&') - w.append("&"); - else if (c == '<') - w.append("<"); - else if (c == '>') - w.append(">"); - else - w.append(c); - } - return w; - } private static final boolean needsTextEncoding(String s) { // See if we need to convert the string. @@ -253,10 +162,12 @@ public final class XmlUtils { int x = Integer.parseInt(s.substring(i+2, i+6), 16); // If we find _x0000_, then that means a null. + // If we find _xE000_, then that means an empty string. if (x == 0) return null; + else if (x != 0xE000) + sb.append((char)x); - sb.append((char)x); i+=6; } else { sb.append(c); @@ -266,6 +177,34 @@ public final class XmlUtils { } + /** + * Given a list of Strings and other Objects, combines Strings that are next to each other in the list. + * + * @param l The list of text nodes to collapse. + * @return The same list. + */ + public static LinkedList<Object> collapseTextNodes(LinkedList<Object> l) { + + String prev = null; + for (ListIterator<Object> i = l.listIterator(); i.hasNext();) { + Object o = i.next(); + if (o instanceof String) { + if (prev == null) + prev = o.toString(); + else { + prev += o; + i.remove(); + i.previous(); + i.remove(); + i.add(prev); + } + } else { + prev = null; + } + } + return l; + } + //-------------------------------------------------------------------------------- // Encode XML attributes //-------------------------------------------------------------------------------- @@ -285,35 +224,33 @@ public final class XmlUtils { String s = o.toString(); - if (needsAttributeEncoding(s)) - return encodeAttrInner(w, s); + if (needsAttributeEncoding(s)) { + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + if (c == '&') + w.append("&"); + else if (c == '<') + w.append("<"); + else if (c == '>') + w.append(">"); + else if (c == '\'') + w.append("'"); + else if (c == '"') + w.append("""); + else if (c == '_' && isEscapeSequence(s,i)) + appendPaddedHexChar(w, c); + else if (isValidXmlCharacter(c)) + w.append(c); + else + appendPaddedHexChar(w, c); + } + } else { + w.append(s); + } - w.append(s); return w; } - private static final Writer encodeAttrInner(Writer w, String s) throws IOException { - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - if (c == '&') - w.append("&"); - else if (c == '<') - w.append("<"); - else if (c == '>') - w.append(">"); - else if (c == '\'') - w.append("'"); - else if (c == '"') - w.append("""); - else if (c == '_' && isEscapeSequence(s,i)) - appendPaddedHexChar(w, c); - else if (isValidXmlCharacter(c)) - w.append(c); - else - appendPaddedHexChar(w, c); - } - return w; - } private static boolean needsAttributeEncoding(String s) { // See if we need to convert the string. @@ -364,7 +301,8 @@ public final class XmlUtils { return "_x0000_"; String s = o.toString(); - + if (s.isEmpty()) + return "_xE000_"; try { if (needsElementNameEncoding(s)) return encodeElementNameInner(new StringBuilderWriter(s.length() * 2), s).toString(); @@ -425,48 +363,6 @@ public final class XmlUtils { // Other methods //-------------------------------------------------------------------------------- - /** - * Utility method for reading XML mixed content from an XML element and returning it as text. - * - * @param r The reader to read from. - * @return The contents read as a string. - * @throws XMLStreamException - * @throws IOException - */ - public static String readXmlContents(XMLStreamReader r) throws XMLStreamException, IOException { - StringWriter sw = new StringWriter(); - XmlWriter w = new XmlWriter(sw, false, false, '"', null, null, false, null); - try { - int depth = 0; - do { - int event = r.next(); - if (event == START_ELEMENT) { - depth++; - QName n = r.getName(); - w.oTag(n.getPrefix(), n.getLocalPart()); - for (int i = 0; i < r.getNamespaceCount(); i++) - w.attr(r.getNamespacePrefix(i), "xmlns", r.getNamespaceURI(i)); - for (int i = 0; i < r.getAttributeCount(); i++) - w.attr(r.getAttributePrefix(i), r.getAttributeLocalName(i), r.getAttributeValue(i)); - w.append('>'); - } else if (r.hasText()) { - w.encodeTextXmlChars(r.getText()); - } else if (event == ATTRIBUTE) { - // attributes handled above. - } else if (event == END_ELEMENT) { - QName n = r.getName(); - if (depth > 0) - w.eTag(n.getPrefix(), n.getLocalPart()); - depth--; - } - if (depth < 0) - return sw.toString(); - } while (true); - } finally { - w.close(); - } - } - // Returns true if the specified character can safely be used in XML text or an attribute. private static final boolean isValidXmlCharacter(char c) { return (c >= 0x20 && c <= 0xD7FF) /*|| c == 0xA || c == 0xD*/ || (c >= 0xE000 && c <= 0xFFFD); @@ -493,17 +389,8 @@ public final class XmlUtils { // Converts an integer to a hexadecimal string padded to 4 places. private static final Writer appendPaddedHexChar(Writer out, int num) throws IOException { out.append("_x"); - char[] n = new char[4]; - int a = num%16; - n[3] = (char)(a > 9 ? 'A'+a-10 : '0'+a); - int base = 16; - for (int i = 1; i < 4; i++) { - a = (num/base)%16; - base <<= 4; - n[3-i] = (char)(a > 9 ? 'A'+a-10 : '0'+a); - } - for (int i = 0; i < 4; i++) - out.append(n[i]); + for (char c : StringUtils.toHex(num)) + out.append(c); return out.append('_'); } http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/XmlWriter.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/XmlWriter.java b/juneau-core/src/main/java/org/apache/juneau/xml/XmlWriter.java index 7610c1c..281e664 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/XmlWriter.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/XmlWriter.java @@ -15,6 +15,7 @@ package org.apache.juneau.xml; import java.io.*; import org.apache.juneau.serializer.*; +import org.apache.juneau.xml.annotation.*; /** * Specialized writer for serializing XML. @@ -58,7 +59,7 @@ public class XmlWriter extends SerializerWriter { if (enableNs && ns != null && ! (ns.isEmpty() || ns.equals(defaultNsPrefix))) append(ns).append(':'); if (needsEncoding) - encodeElement(name); + XmlUtils.encodeElementName(out, name); else append(name); return this; @@ -164,7 +165,7 @@ public class XmlWriter extends SerializerWriter { if (enableNs && ns != null && ! (ns.isEmpty() || ns.equals(defaultNsPrefix))) append(ns).append(':'); if (needsEncoding) - encodeElement(name); + XmlUtils.encodeElementName(out, name); else append(name); return append('/').append('>'); @@ -323,7 +324,7 @@ public class XmlWriter extends SerializerWriter { if (enableNs && ns != null && ! (ns.isEmpty() || ns.equals(defaultNsPrefix))) append(ns).append(':'); if (needsEncoding) - encodeElement(name); + XmlUtils.encodeElementName(out, name); else append(name); return append('>'); @@ -519,46 +520,27 @@ public class XmlWriter extends SerializerWriter { } /** - * Serializes and encodes the specified object as valid XML text. - * - * @param o The object being serialized. - * @return This object (for method chaining). - * @throws IOException If a problem occurred. - */ - public XmlWriter encodeText(Object o) throws IOException { - XmlUtils.encodeText(this, o); - return this; - } - - /** - * Serializes and encodes the specified object as valid XML text. - * <p> - * Does NOT encode XML characters (<js>'<'</js>, <js>'>'</js>, and <js>'&'</js>). - * <p> - * Use on XML text that you just want to replace invalid XML characters with <js>"_x####_"</js> sequences. + * Shortcut for calling <code>text(o, <jk>false</jk>);</code> * * @param o The object being serialized. * @return This object (for method chaining). * @throws IOException If a problem occurred. */ - public XmlWriter encodeTextInvalidChars(Object o) throws IOException { - XmlUtils.encodeTextInvalidChars(this, o); + public XmlWriter text(Object o) throws IOException { + text(o, false); return this; } /** * Serializes and encodes the specified object as valid XML text. - * <p> - * Only encodes XML characters (<js>'<'</js>, <js>'>'</js>, and <js>'&'</js>). - * <p> - * Use on XML text where the invalid characters have already been replaced. * * @param o The object being serialized. + * @param preserveWhitespace If <jk>true</jk>, then we're serializing {@link XmlFormat#MIXED_PWS} or {@link XmlFormat#TEXT_PWS} content. * @return This object (for method chaining). - * @throws IOException If a problem occurred. + * @throws IOException */ - public XmlWriter encodeTextXmlChars(Object o) throws IOException { - XmlUtils.encodeTextXmlChars(this, o); + public XmlWriter text(Object o, boolean preserveWhitespace) throws IOException { + XmlUtils.encodeText(this, o, trimStrings, preserveWhitespace); return this; } @@ -574,18 +556,6 @@ public class XmlWriter extends SerializerWriter { return this; } - /** - * Serializes and encodes the specified object as valid XML element name. - * - * @param o The object being serialized. - * @return This object (for method chaining). - * @throws IOException If a problem occurred. - */ - public XmlWriter encodeElement(Object o) throws IOException { - XmlUtils.encodeElementName(out, o); - return this; - } - @Override /* SerializerWriter */ public XmlWriter cr(int depth) throws IOException { super.cr(depth); http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/annotation/XmlFormat.java ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/annotation/XmlFormat.java b/juneau-core/src/main/java/org/apache/juneau/xml/annotation/XmlFormat.java index 98140b6..905cc40 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/annotation/XmlFormat.java +++ b/juneau-core/src/main/java/org/apache/juneau/xml/annotation/XmlFormat.java @@ -79,6 +79,13 @@ public enum XmlFormat { MIXED, /** + * Same as {@link XmlFormat#MIXED}, but whitespace in text nodes are not trimmed during parsing. + * <p> + * An example use is HTML5 <xt><pre></xt> where whitespace should not be discarded. + */ + MIXED_PWS, + + /** * Render property value as the text content of the element. * <p> * Similar to {@link #MIXED} but value must be a single value, not a collection. @@ -90,6 +97,11 @@ public enum XmlFormat { TEXT, /** + * Same as {@link XmlFormat#TEXT}, but whitespace in text node is not trimmed during parsing. + */ + TEXT_PWS, + + /** * Same as {@link #TEXT} except the content is expected to be fully-formed XML that will * get serialized as-is. * <p> http://git-wip-us.apache.org/repos/asf/incubator-juneau/blob/21c0e1ea/juneau-core/src/main/java/org/apache/juneau/xml/package.html ---------------------------------------------------------------------- diff --git a/juneau-core/src/main/java/org/apache/juneau/xml/package.html b/juneau-core/src/main/java/org/apache/juneau/xml/package.html index d6b48b9..9c23da6 100644 --- a/juneau-core/src/main/java/org/apache/juneau/xml/package.html +++ b/juneau-core/src/main/java/org/apache/juneau/xml/package.html @@ -1124,6 +1124,10 @@ </table> <p> On a side note, characters that cannot be represented in XML 1.0 are encoded using a simple encoding. + Note in the examples below, some characters such as <js>'\n'</js>, <js>'\t</js>', and <js>'\r'</js> + can be represented as XML entities when used in text but not in element names. Other characters such as + <js>'\b'</js> and <js>'\f'</js> cannot be encoded in XML 1.0 at all without inventing our own notation. + Whitespace characters in element names are encoded as well as whitespace end characters in text. </p> <table class='styled' style='width:auto'> <tr> @@ -1133,30 +1137,36 @@ <tr> <td class='code'> <jk>class</jk> BeanWithSpecialCharacters { - <jk>public</jk> String a = <js>"\n\b\f\t"</js>; + <jk>public</jk> String a = <js>" \b\f\n\t\r "</js>; } </td> <td class='code'><xt> <object> - <a><xv>_x000A__x0008__x000C__x0009_</xv></a> + <a><xv>_x0020_ _x0008__x000C_&#x000a;&#x0009;&#x000d; _x0020_</xv></a> </object> </xt></td> </tr> <tr> <td class='code'> - <ja>@Bean</ja>(typeName=<js>"$#!"</js>) + <ja>@Bean</ja>(typeName=<js>" \b\f\n\t\r "</js>) <jk>class</jk> BeanWithNamesWithSpecialCharacters { - <ja>@BeanProperty</ja>(name=<js>"*()"</js>) - <jk>public</jk> String a = <js>"\n\b\f\t"</js>; + <ja>@BeanProperty</ja>(name=<js>" \b\f\n\t\r "</js>) + <jk>public</jk> String a = <js>" \b\f\n\t\r "</js>; } </td> <td class='code'><xt> - <_x0024__x0023__x0021_> - <_x002A__x0028__x0029_><xv>_x000A__x0008__x000C__x0009_</xv></_x002A__x0028__x0029_> - </_x0024__x0023__x0021_> + <_x0020__x0020__x0008__x000C__x000A__x0009__x000D__x0020__x0020_> + <_x0020__x0020__x0008__x000C__x000A__x0009__x000D__x0020__x0020_><xv>_x0020_ _x0008__x000C_&#x000a;&#x0009;&#x000d; _x0020_</xv></_x0020__x0020__x0008__x000C__x000A__x0009__x000D__x0020__x0020_> + </_x0020__x0020__x0008__x000C__x000A__x0009__x000D__x0020__x0020_> </xt></td> </tr> </table> + <p> + While it's true that these characters CAN be represented in XML 1.1, it's impossible to parse XML 1.1 text in + Java without the XML containing an XML declaration. + Unfortunately, this, and the uselessness of the {@link javax.xml.stream.XMLInputFactory#IS_REPLACING_ENTITY_REFERENCES} setting in Java + forced us to make some hard design decisions that may not be the most elegant. + </p> </div> @@ -1539,6 +1549,10 @@ This format particularly useful when combined with bean dictionaries to produce mixed content. <br>The bean dictionary isn't used during serialization, but it is needed during parsing to resolve bean types. </p> + <p> + The {@link org.apache.juneau.xml.annotation.XmlFormat#MIXED_PWS} format identical to {@link org.apache.juneau.xml.annotation.XmlFormat#MIXED} + except whitespace characters are preserved in the output. + </p> <table class='styled' style='width:auto'> <tr> <th>Data type</th> @@ -1598,6 +1612,272 @@ </tr> </table> <p> + Whitespace (tabs and newlines) are not added to MIXED child nodes in readable-output mode. + This helps ensures strings in the serialized output can be losslessly parsed back into their original forms when they contain whitespace characters. + If the {@link javax.xml.stream.XMLInputFactory#IS_REPLACING_ENTITY_REFERENCES} setting was not useless in Java, we could support lossless + readable XML for MIXED content. But as of Java 8, it still does not work. + </p> + <p> + XML suffers from other deficiencies as well that affect MIXED content. For example, <xt><X></X></xt> and <xt><X/></xt> are + equivalent in XML and indistinguishable by the Java XML parsers. This makes it impossible to differentiate between an empty element and an + element containing an empty string. This causes empty strings to get lost in translation. To alleviate this, we use the constructs <js>"_xE000_"</js> to + represent an empty string, and <js>"_x0020_"</js> to represent leading and trailing spaces. + </p> + <p> + The examples below show how whitespace is handled under various circumstances: + </p> + <table class='styled' style='width:auto'> + <tr> + <th>Data type</th> + <th>XML</th> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT</jsf>) + <jk>public</jk> String a = <jk>null</jk>; + } + </td> + <td class='code'><xt> + <X/> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT</jsf>) + <jk>public</jk> String a = <js>""</js>; + } + </td> + <td class='code'><xt> + <X><xv>_xE000_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT</jsf>) + <jk>public</jk> String a = <js>" "</js>; + } + </td> + <td class='code'><xt> + <X><xv>_x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT</jsf>) + <jk>public</jk> String a = <js>" "</js>; + } + </td> + <td class='code'><xt> + <X><xv>_x0020__x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT</jsf>) + <jk>public</jk> String a = <js>" foobar "</js>; + } + </td> + <td class='code'><xt> + <X><xv>_x0020_ foobar _x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT_PWS</jsf>) + <jk>public</jk> String a = <jk>null</jk>; + } + </td> + <td class='code'><xt> + <X/> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT_PWS</jsf>) + <jk>public</jk> String a = <js>""</js>; + } + </td> + <td class='code'><xt> + <X><xv>_xE000_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT_PWS</jsf>) + <jk>public</jk> String a = <js>" "</js>; + } + </td> + <td class='code'><xt> + <X><xv> </xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT_PWS</jsf>) + <jk>public</jk> String a = <js>" "</js>; + } + </td> + <td class='code'><xt> + <X><xv> </xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>TEXT_PWS</jsf>) + <jk>public</jk> String a = <js>" foobar "</js>; + } + </td> + <td class='code'><xt> + <X><xv> foobar </xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED</jsf>) + <jk>public</jk> String[] a = <jk>null</jk>; + } + </td> + <td class='code'><xt> + <X/> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>""</js>}; + } + </td> + <td class='code'><xt> + <X><xv>_xE000_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>" "</js>}; + } + </td> + <td class='code'><xt> + <X><xv>_x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>" "</js>}; + } + </td> + <td class='code'><xt> + <X><xv>_x0020__x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{ + <js>" foobar "</js> + }; + } + </td> + <td class='code'><xt> + <X><xv>_x0020_ foobar _x0020_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED_PWS</jsf>) + <jk>public</jk> String[] a = <jk>null</jk>; + } + </td> + <td class='code'><xt> + <X/> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED_PWS</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>""</js>}; + } + </td> + <td class='code'><xt> + <X><xv>_xE000_</xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED_PWS</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>" "</js>}; + } + </td> + <td class='code'><xt> + <X><xv> </xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED_PWS</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{<js>" "</js>}; + } + </td> + <td class='code'><xt> + <X><xv> </xv></X> + </xt></td> + </tr> + <tr> + <td class='code'> + <ja>@Bean</ja>(typeName=<js>"X"</js>) + <jk>class</jk> MyBean { + <ja>@Xml</ja>(format=XmlFormat.<jsf>MIXED_PWS</jsf>) + <jk>public</jk> String a[] = <jk>new</jk> String[]{ + <js>" foobar "</js> + }; + } + </td> + <td class='code'><xt> + <X><xv> foobar </xv></X> + </xt></td> + </tr> + </table> + + <p> It should be noted that when using <jsf>MIXED</jsf>, you are not guaranteed to parse back the exact same content since side-by-side strings in the content will end up concatenated when parsed. </p> @@ -1605,6 +1885,7 @@ The {@link org.apache.juneau.xml.annotation.XmlFormat#TEXT} format is similar to {@link org.apache.juneau.xml.annotation.XmlFormat#MIXED} except it's meant for solitary objects that get serialized as simple child text nodes. <br>Any object that can be serialize to a <code>String</code> can be used. + <br>The {@link org.apache.juneau.xml.annotation.XmlFormat#TEXT_PWS} is the same except whitespace is preserved in the output. </p> <table class='styled' style='width:auto'> <tr> @@ -2560,6 +2841,7 @@ When the map or list type is not specified, or is the abstract <code>Map</code>, <code>Collection</code>, or <code>List</code> types, the parser will use <code>ObjectMap</code> and <code>ObjectList</code> by default. </p> + </div> <!-- ======================================================================================================== --> @@ -2989,4 +3271,4 @@ <p align="center"><i><b>*** fÃn ***</b></i></p> </body> -</html> \ No newline at end of file +</html> \ No newline at end of file
