Author: natalia Date: Sun Mar 9 17:52:12 2008 New Revision: 635409 URL: http://svn.apache.org/viewvc?rev=635409&view=rev Log: Fix for XML escaping
Added: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java (with props) Modified: xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java Added: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java?rev=635409&view=auto ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java (added) +++ xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java Sun Mar 9 17:52:12 2008 @@ -0,0 +1,225 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $Id$ + */ + +package org.apache.xindice.util; + +/** + * Set of XML-related utilities. + * + * @version $Revision$, $Date$ + */ +public class XMLUtilities { + private static final String REPLACEMENT = "�"; + + /** + * Converts input text into its XML representation by escaping all special symbols, + * if any are present. + * + * @param value Input array + * @param offset Start position in the array + * @param length Number of characters to process + * @param strict Method will throw an exception when it encounter illegal surrogate + * character if <code>strict</code> is true, otherwise illegal surrogate character + * will be replaced by character \uFFFD. + * @return String with all the special symbols escaped + * @throws XindiceRuntimeException If <code>strict</code> is true and <code>value</code> + * contains illegal surrogate character + */ + public static String escape(char[] value, int offset, int length, boolean strict) { + StringBuffer buf = new StringBuffer(); + int start = offset; + int blockLength = 0; + + for (int i = offset; i < length; i++) { + String outval = escape(value[i], strict); + + if (outval == null) { + if (isLeadingSurrogate(value[i])) { + if (i + 1 < length && isTrailingSurrogate(value[i + 1])) { + outval = getSurrogateValue(value[i], value[i + 1]); + i++; + } else { + if (strict) { + throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(value[i]) + ";" + + "must be followed by trailing surrogate"); + } else { + outval = REPLACEMENT; + } + } + } else { + blockLength++; + } + } + + if (outval != null) { + if (blockLength > 0) { + buf.append(value, start, blockLength); + } + buf.append(outval); + start = i + 1; + blockLength = 0; + } + } + + if (blockLength > 0 && start > offset) { + buf.append(value, start, blockLength); + } + + return buf.length() > 0 ? buf.toString() : new String(value, offset, length); + } + + /** + * Converts input text into its XML representation by escaping all special symbols, + * if any are present. + * + * @param value Input array + * @param offset Start position in the array + * @param length Number of characters to process + * @return String with all the special symbols escaped + */ + public static String escape(char[] value, int offset, int length) { + return escape(value, offset, length, false); + } + + /** + * Converts input text into its XML representation by escaping all special symbols, + * if any are present. + * + * @param text Input string + * @param strict Method will throw an exception when it encounter illegal surrogate + * character if <code>strict</code> is true, otherwise illegal surrogate character + * will be replaced by character \uFFFD. + * @return String with all the special symbols escaped + * @throws XindiceRuntimeException If <code>strict</code> is true and <code>text</code> + * contains illegal surrogate character + */ + public static String escape(String text, boolean strict) { + StringBuffer buf = null; + int length = text.length(); + + for (int i = 0; i < length; i++) { + char ch = text.charAt(i); + String outval = escape(ch, strict); + + if (outval == null) { + if (isLeadingSurrogate(ch)) { + if (i + 1 < length && isTrailingSurrogate(text.charAt(i + 1))) { + outval = getSurrogateValue(ch, text.charAt(i + 1)); + + if (buf == null) { + buf = new StringBuffer(text.substring(0, i)); + } + i++; + } else { + if (strict) { + throw new XindiceRuntimeException("Leading surrogate &#" + Integer.toString(ch) + ";" + + "must be followed by trailing surrogate"); + } else { + outval = REPLACEMENT; + } + } + } + } + + if (outval != null && buf == null) { + buf = new StringBuffer(text.substring(0, i)); + } + + if (outval != null) { + buf.append(outval); + } else if (buf != null) { + buf.append(ch); + } + } + + return buf != null ? buf.toString() : text; + } + + /** + * Converts input text into its XML representation by escaping all special symbols, + * if any are present. + * + * @param text Input string + * @return String with all the special symbols escaped + */ + public static String escape(String text) { + return escape(text, false); + } + + private static String escape(char ch, boolean strict) { + String outval = null; + + switch (ch) { + case '&': + outval = "&"; + break; + case '\'': + outval = "'"; + break; + case '\"': + outval = """; + break; + case '<': + outval = "<"; + break; + case '>': + outval = ">"; + break; + default: + if (isTrailingSurrogate(ch)) { + if (strict) { + throw new XindiceRuntimeException("Trailing surrogate &#" + Integer.toString(ch) + + "; must follow leading surrogate"); + } else { + outval = REPLACEMENT; + } + } else if (!isLeadingSurrogate(ch) && !isLegal(ch)) { + outval = "&#" + Integer.toString(ch) + ";"; + } + break; + } + + return outval; + } + + private static boolean isLegal(char ch) { + return ch == 0x9 || ch == 0xA || ch == 0xD || + (ch >= 0x20 && ch <= 0xD7FF) || + (ch >= 0xE000 && ch <= 0xFFFD); + } + + /** + * Converts UTF-16 surrogate pair to UTF-8 + * @param high Leading surrogate + * @param low Trailing surrogate + * @return String with escaped 4-byte value + */ + private static String getSurrogateValue(char high, char low) { + int val = (high & 0x3FF) << 10 | (low & 0x3FF) + 0x10000; + return "&#" + Integer.toString(val) + ";"; + } + + private static boolean isLeadingSurrogate(char ch) { + return ch >= 0xD800 && ch <= 0xDBFF; + } + + private static boolean isTrailingSurrogate(char ch) { + return ch >= 0xDC00 && ch <= 0xDFFF; + } +} Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xml/xindice/trunk/java/src/org/apache/xindice/util/XMLUtilities.java ------------------------------------------------------------------------------ svn:keywords = Id Revision Author Date Modified: xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java?rev=635409&r1=635408&r2=635409&view=diff ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/xml/sax/SetContentHandler.java Sun Mar 9 17:52:12 2008 @@ -21,7 +21,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - +import org.apache.xindice.util.XMLUtilities; +import org.apache.xindice.util.XindiceRuntimeException; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; @@ -122,14 +123,12 @@ } private String getQNameAtt(String uri, String localName) throws SAXException { - - String prefix = null; - if ("".equals(uri)) { return localName; } /* Look for prefix */ + String prefix = null; Iterator prefixes = namespaces.keySet().iterator(); while (prefixes.hasNext()) { String key = (String) prefixes.next(); @@ -149,8 +148,6 @@ private String getQNameElement(String uri, String localName) throws SAXException { - String prefix = null; - if ("".equals(uri)) { if (namespaces.get("") != null) { throw new SAXException("default namespace is declared here!"); @@ -161,6 +158,7 @@ } /* Look for prefix */ + String prefix = null; Iterator prefixes = namespaces.keySet().iterator(); while (prefixes.hasNext()) { String key = (String) prefixes.next(); @@ -189,8 +187,7 @@ * @exception SAXException Description of Exception * @see org.xml.sax.ContentHandler#startElement */ - public void startElement(String uri, String localName, - String qName, Attributes attributes) + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { newContent.append("<"); @@ -213,7 +210,11 @@ newContent.append(qn); newContent.append("="); newContent.append("\""); - newContent.append(attributes.getValue(i)); + try { + newContent.append(XMLUtilities.escape(attributes.getValue(i), true)); + } catch (XindiceRuntimeException e) { + throw new SAXException(e); + } newContent.append("\""); // Avoid duplicate namespace declarations @@ -277,38 +278,11 @@ * @exception SAXException Description of Exception * @see org.xml.sax.ContentHandler#characters */ - public void characters(char ch[], int start, int length) - throws SAXException { - int i = 0; - while (i < length) { - char c = ch[start + i]; - switch (c) { - case '&': - newContent.append("&"); - break; - case '<': - newContent.append("<"); - break; - case '>': - newContent.append(">"); - break; - case '"': - newContent.append("""); - break; - case '\'': - newContent.append("'"); - break; - default: - // If we're outside 7 bit ascii encode as a character ref. - // Not sure what the proper behavior here should be. - if ((int) c > 127) { - newContent.append("&#" + (int) c + ";"); - } else { - newContent.append(c); - } - } - - i++; + public void characters(char ch[], int start, int length) throws SAXException { + try { + XMLUtilities.escape(ch, start, length, true); + } catch (XindiceRuntimeException e) { + throw new SAXException(e); } }