Author: siren Date: Tue Jun 20 12:11:00 2006 New Revision: 415772 URL: http://svn.apache.org/viewvc?rev=415772&view=rev Log: NUTCH-110 fix illegal xml output contributed by [EMAIL PROTECTED]
Added: lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestOpenSearchServlet.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java?rev=415772&r1=415771&r2=415772&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Tue Jun 20 12:11:00 2006 @@ -262,23 +262,57 @@ private static void addNode(Document doc, Node parent, String name, String text) { Element child = doc.createElement(name); - child.appendChild(doc.createTextNode(text)); + child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } private static void addNode(Document doc, Node parent, String ns, String name, String text) { Element child = doc.createElementNS((String)NS_MAP.get(ns), ns+":"+name); - child.appendChild(doc.createTextNode(text)); + child.appendChild(doc.createTextNode(getLegalXml(text))); parent.appendChild(child); } private static void addAttribute(Document doc, Element node, String name, String value) { Attr attribute = doc.createAttribute(name); - attribute.setValue(value); + attribute.setValue(getLegalXml(value)); node.getAttributes().setNamedItem(attribute); } -} + /* + * Ensure string is legal xml. + * @param text String to verify. + * @return Passed <code>text</code> or a new string with illegal + * characters removed if any found in <code>text</code>. + * @see http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char + */ + protected static String getLegalXml(final String text) { + if (text == null) { + return null; + } + StringBuffer buffer = null; + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (!isLegalXml(c)) { + if (buffer == null) { + // Start up a buffer. Copy characters here from now on + // now we've found at least one bad character in original. + buffer = new StringBuffer(text.length()); + buffer.append(text.substring(0, i)); + } + } else { + if (buffer != null) { + buffer.append(c); + } + } + } + return (buffer != null)? buffer.toString(): text; + } + + private static boolean isLegalXml(final char c) { + return c == 0x9 || c == 0xa || c == 0xd || (c >= 0x20 && c <= 0xd7ff) + || (c >= 0xe000 && c <= 0xfffd) || (c >= 0x10000 && c <= 0x10ffff); + } +} Added: lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestOpenSearchServlet.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestOpenSearchServlet.java?rev=415772&view=auto ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestOpenSearchServlet.java (added) +++ lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestOpenSearchServlet.java Tue Jun 20 12:11:00 2006 @@ -0,0 +1,17 @@ +package org.apache.nutch.searcher; + +import junit.framework.TestCase; + +public class TestOpenSearchServlet extends TestCase { + + /** + * Test removing of illegal xml chars from string + */ + public void testGetLegalXml(){ + assertEquals("hello",OpenSearchServlet.getLegalXml("hello")); + assertEquals("hello",OpenSearchServlet.getLegalXml("he\u0000llo")); + assertEquals("hello",OpenSearchServlet.getLegalXml("\u0000he\u0000llo")); + assertEquals("hello",OpenSearchServlet.getLegalXml("\u0000he\u0000llo\u0000")); + } + +} _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs