Author: jukka
Date: Wed Feb  4 00:58:39 2009
New Revision: 740541

URL: http://svn.apache.org/viewvc?rev=740541&view=rev
Log:
TIKA-196: Configuration parser fails in Java 1.4

Fixed based on suggestion by Dmitry Kudryavtsev.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java

Modified: lucene/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=740541&r1=740540&r2=740541&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java 
(original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java Wed 
Feb  4 00:58:39 2009
@@ -87,8 +87,7 @@
                 Parser parser = (Parser) Class.forName(name).newInstance();
                 NodeList mimes = node.getElementsByTagName("mime");
                 for (int j = 0; j < mimes.getLength(); j++) {
-                    Element mime = (Element) mimes.item(j);
-                    parsers.put(mime.getTextContent().trim(), parser);
+                    parsers.put(getText(mimes.item(j)).trim(), parser);
                 }
             } catch (Exception e) {
                 throw new TikaException(
@@ -97,6 +96,21 @@
         }
     }
 
+    private String getText(Node node) {
+        if (node.getNodeType() == Node.TEXT_NODE) {
+            return node.getNodeValue();
+        } else if (node.getNodeType() == Node.ELEMENT_NODE) {
+            StringBuilder builder = new StringBuilder();
+            NodeList list = node.getChildNodes();
+            for (int i = 0; i < list.getLength(); i++) {
+                builder.append(getText(list.item(i)));
+            }
+            return builder.toString();
+        } else {
+            return "";
+        }
+    }
+
     /**
      * Returns the parser instance configured for the given MIME type.
      * Returns <code>null</code> if the given MIME type is unknown.


Reply via email to