Author: scottbw
Date: Fri May 27 20:27:11 2011
New Revision: 1128443

URL: http://svn.apache.org/viewvc?rev=1128443&view=rev
Log:
Fixed a potential issue in the parser with double-encoding XML content, 
particularly nested text nodes, when recursively extracting text from localized 
text fields.

Modified:
    
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java

Modified: 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java
URL: 
http://svn.apache.org/viewvc/incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java?rev=1128443&r1=1128442&r2=1128443&view=diff
==============================================================================
--- 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java
 (original)
+++ 
incubator/wookie/trunk/parser/java/src/org/apache/wookie/w3c/impl/AbstractLocalizedEntity.java
 Fri May 27 20:27:11 2011
@@ -13,6 +13,7 @@
  */
 package org.apache.wookie.w3c.impl;
 
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.wookie.w3c.ILocalizedEntity;
 import org.apache.wookie.w3c.IW3CXMLConfiguration;
 import org.apache.wookie.w3c.util.LocalizationUtils;
@@ -99,8 +100,13 @@ public abstract class AbstractLocalizedE
                                        
content.append(getLocalizedTextContent((Element)node));
                                }
                        }
+                       // Append text to the string
+                       // First we have to unescape any XML special characters 
so we don't
+                       // double-encode them (e.g. ´ = ´) when 
exporting to 
+                       // HTML or XML later
                        if (node instanceof Text){
-                               content.append(((Text)node).getText());
+                         String text = ((Text)node).getText();
+                               
content.append(StringEscapeUtils.unescapeXml(text));
                        }
                }
                return UnicodeUtils.normalizeWhitespace(content.toString());


Reply via email to