Author: jukka
Date: Mon Jan 21 13:47:46 2008
New Revision: 614024

URL: http://svn.apache.org/viewvc?rev=614024&view=rev
Log:
TIKA-117: Drop JDOM and Jaxen dependencies
    - Note the signature changes in TikaConfig constructors!
    - Dropped a few obsolete Utils methods

Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/pom.xml
    incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
    incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Mon Jan 21 13:47:46 2008
@@ -9,6 +9,8 @@
 
 3. TIKA-116 - Streaming parser for OpenDocument files (Jukka Zitting)
 
+4. TIKA-117 - Drop JDOM and Jaxen dependencies (Jukka Zitting)
+
 Release 0.1-incubating - 12/27/2007
 
 1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)

Modified: incubator/tika/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/pom.xml?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/pom.xml (original)
+++ incubator/tika/trunk/pom.xml Mon Jan 21 13:47:46 2008
@@ -184,16 +184,6 @@
       <version>3.0-FINAL</version>
     </dependency>
     <dependency>
-      <groupId>jdom</groupId>
-      <artifactId>jdom</artifactId>
-      <version>1.0</version>
-    </dependency>
-    <dependency>
-      <groupId>jaxen</groupId>
-      <artifactId>jaxen</artifactId>
-      <version>1.1.1</version>
-    </dependency>
-    <dependency>
       <groupId>nekohtml</groupId>
       <artifactId>nekohtml</artifactId>
       <version>0.9.5</version>

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java 
Mon Jan 21 13:47:46 2008
@@ -16,9 +16,6 @@
  */
 package org.apache.tika.config;
 
-//JDK imports
-
-
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -26,14 +23,19 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
 import org.apache.tika.parser.Parser;
-import org.jdom.Document;
-import org.jdom.Element;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
-import org.jdom.xpath.XPath;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
 
 /**
  * Parse xml config file.
@@ -47,42 +49,50 @@
     
     private static MimeTypes mimeTypes;
 
-    public TikaConfig(String file) throws JDOMException, IOException {
+    public TikaConfig(String file)
+            throws TikaException, IOException, SAXException {
         this(new File(file));
     }
 
-    public TikaConfig(File file) throws JDOMException, IOException {
-        this(new SAXBuilder().build(file));
+    public TikaConfig(File file)
+            throws TikaException, IOException, SAXException {
+        this(getBuilder().parse(file));
     }
 
-    public TikaConfig(URL url) throws JDOMException, IOException {
-        this(new SAXBuilder().build(url));
+    public TikaConfig(URL url)
+            throws TikaException, IOException, SAXException {
+        this(getBuilder().parse(url.toString()));
     }
 
-    public TikaConfig(InputStream stream) throws JDOMException, IOException {
-        this(new SAXBuilder().build(stream));
+    public TikaConfig(InputStream stream)
+            throws TikaException, IOException, SAXException {
+        this(getBuilder().parse(stream));
     }
 
-    public TikaConfig(Document document) throws JDOMException, IOException {
-        this(document.getRootElement());
+    public TikaConfig(Document document) throws TikaException, IOException {
+        this(document.getDocumentElement());
     }
 
-    public TikaConfig(Element element) throws JDOMException, IOException {
-        Element mtr = element.getChild("mimeTypeRepository");
-        String mimeTypeRepoResource = mtr.getAttributeValue("resource");
-        mimeTypes = MimeTypesFactory.create(mimeTypeRepoResource);
+    public TikaConfig(Element element) throws TikaException, IOException {
+        Element mtr = getChild(element, "mimeTypeRepository");
+        if (mtr != null) {
+            mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+        }
 
-        for (Object node : XPath.selectNodes(element, "//parser")) {
-            String className = ((Element) node).getAttributeValue("class");
+        NodeList nodes = element.getElementsByTagName("parser");
+        for (int i = 0; i < nodes.getLength(); i++) {
+            Element node = (Element) nodes.item(i);
+            String name = node.getAttribute("class");
             try {
-                Parser parser =
-                        (Parser) Class.forName(className).newInstance();
-                for (Object child : ((Element) node).getChildren("mime")) {
-                    parsers.put(((Element) child).getTextTrim(), parser);
+                Parser parser = (Parser) Class.forName(name).newInstance();
+                NodeList mimes = node.getElementsByTagName("mime");
+                for (int j = 0; j < mimes.getLength(); j++) {
+                    Element mime = (Element) mimes.item(j);
+                    parsers.put(mime.getTextContent().trim(), parser);
                 }
             } catch (Exception e) {
-                throw new JDOMException(
-                        "Invalid parser configuration: " + className, e);
+                throw new TikaException(
+                        "Invalid parser configuration: " + name, e);
             }
         }
     }
@@ -101,21 +111,45 @@
     public MimeTypes getMimeRepository(){
         return mimeTypes;
     }
-    
+
     /**
      * Provides a default configuration (TikaConfig).  Currently creates a
      * new instance each time it's called; we may be able to have it
      * return a shared instance once it is completely immutable.
      *
-     * @return
-     * @throws IOException
-     * @throws JDOMException
+     * @return default configuration
+     * @throws TikaException if the default configuration is not available
      */
-    public static TikaConfig getDefaultConfig()
-            throws IOException, JDOMException {
+    public static TikaConfig getDefaultConfig() throws TikaException {
+        try {
+            InputStream stream =
+                TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
+            return new TikaConfig(stream);
+        } catch (IOException e) {
+            throw new TikaException("Unable to read default configuration", e);
+        } catch (SAXException e) {
+            throw new TikaException("Unable to parse default configuration", 
e);
+        }
+    }
+
+    private static DocumentBuilder getBuilder() throws TikaException {
+        try {
+            return DocumentBuilderFactory.newInstance().newDocumentBuilder();
+        } catch (ParserConfigurationException e) {
+            throw new TikaException("XML parser not available", e);
+        }
+    }
 
-        return new TikaConfig(
-                TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION));
+    private static Element getChild(Element element, String name) {
+        Node child = element.getFirstChild();
+        while (child != null) {
+            if (child.getNodeType() == Node.ELEMENT_NODE
+                    && name.equals(child.getNodeName())) {
+                return (Element) child;
+            }
+            child = child.getNextSibling();
+        }
+        return null;
     }
 
 }

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
(original)
+++ 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
Mon Jan 21 13:47:46 2008
@@ -27,7 +27,6 @@
 import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
-import org.jdom.JDOMException;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -42,10 +41,7 @@
     public AutoDetectParser() {
         try {
             config = TikaConfig.getDefaultConfig();
-        } catch (IOException e) {
-            // FIXME: This should never happen
-            throw new RuntimeException(e);
-        } catch (JDOMException e) {
+        } catch (TikaException e) {
             // FIXME: This should never happen
             throw new RuntimeException(e);
         }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java Mon Jan 
21 13:47:46 2008
@@ -35,11 +35,6 @@
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.HttpHeaders;
 import org.apache.tika.metadata.Metadata;
-import org.jdom.Document;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
-import org.jdom.output.Format;
-import org.jdom.output.XMLOutputter;
 
 import com.ibm.icu.text.CharsetDetector;
 import com.ibm.icu.text.CharsetMatch;
@@ -54,20 +49,6 @@
 
     static Logger logger = Logger.getRootLogger();
 
-    public static Document parse(InputStream is) {
-        org.jdom.Document xmlDoc = new org.jdom.Document();
-        try {
-            SAXBuilder builder = new SAXBuilder();
-            builder.setValidation(false);
-            xmlDoc = builder.build(is);
-        } catch (JDOMException e) {
-            logger.error(e.getMessage());
-        } catch (IOException e) {
-            logger.error(e.getMessage());
-        }
-        return xmlDoc;
-    }
-
     public static List unzip(InputStream is) {
         List res = new ArrayList();
         try {
@@ -109,24 +90,6 @@
 
         in.close();
         out.close();
-    }
-
-    public static void saveInXmlFile(Document doc, String file) {
-        Format f = Format.getPrettyFormat().setEncoding("UTF-8");
-
-        XMLOutputter xop = new XMLOutputter(f);
-
-        try {
-
-            xop.output(doc, new FileOutputStream(file));
-
-        }
-
-        catch (IOException ex) {
-
-            logger.error(ex.getMessage());
-
-        }
     }
 
     /**

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java 
(original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Mon Jan 
21 13:47:46 2008
@@ -18,7 +18,6 @@
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.List;
 
@@ -29,7 +28,6 @@
 import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.ParseUtils;
 import org.apache.tika.utils.Utils;
-import org.jdom.JDOMException;
 import org.xml.sax.helpers.DefaultHandler;
 
 /**
@@ -41,7 +39,7 @@
 
     private File testFilesBaseDir;
 
-    public void setUp() throws JDOMException, IOException {
+    public void setUp() throws Exception {
         /*
          * FIXME the old mechanism does not work anymore when running the tests
          * with Maven - need a resource-based one, but this means more changes


Reply via email to