Author: jukka
Date: Mon Jan 21 13:47:46 2008
New Revision: 614024
URL: http://svn.apache.org/viewvc?rev=614024&view=rev
Log:
TIKA-117: Drop JDOM and Jaxen dependencies
- Note the signature changes in TikaConfig constructors!
- Dropped a few obsolete Utils methods
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/pom.xml
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
Modified: incubator/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Mon Jan 21 13:47:46 2008
@@ -9,6 +9,8 @@
3. TIKA-116 - Streaming parser for OpenDocument files (Jukka Zitting)
+4. TIKA-117 - Drop JDOM and Jaxen dependencies (Jukka Zitting)
+
Release 0.1-incubating - 12/27/2007
1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)
Modified: incubator/tika/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/pom.xml?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/pom.xml (original)
+++ incubator/tika/trunk/pom.xml Mon Jan 21 13:47:46 2008
@@ -184,16 +184,6 @@
<version>3.0-FINAL</version>
</dependency>
<dependency>
- <groupId>jdom</groupId>
- <artifactId>jdom</artifactId>
- <version>1.0</version>
- </dependency>
- <dependency>
- <groupId>jaxen</groupId>
- <artifactId>jaxen</artifactId>
- <version>1.1.1</version>
- </dependency>
- <dependency>
<groupId>nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>0.9.5</version>
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
Mon Jan 21 13:47:46 2008
@@ -16,9 +16,6 @@
*/
package org.apache.tika.config;
-//JDK imports
-
-
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
@@ -26,14 +23,19 @@
import java.util.HashMap;
import java.util.Map;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.tika.exception.TikaException;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeTypesFactory;
import org.apache.tika.parser.Parser;
-import org.jdom.Document;
-import org.jdom.Element;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
-import org.jdom.xpath.XPath;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
/**
* Parse xml config file.
@@ -47,42 +49,50 @@
private static MimeTypes mimeTypes;
- public TikaConfig(String file) throws JDOMException, IOException {
+ public TikaConfig(String file)
+ throws TikaException, IOException, SAXException {
this(new File(file));
}
- public TikaConfig(File file) throws JDOMException, IOException {
- this(new SAXBuilder().build(file));
+ public TikaConfig(File file)
+ throws TikaException, IOException, SAXException {
+ this(getBuilder().parse(file));
}
- public TikaConfig(URL url) throws JDOMException, IOException {
- this(new SAXBuilder().build(url));
+ public TikaConfig(URL url)
+ throws TikaException, IOException, SAXException {
+ this(getBuilder().parse(url.toString()));
}
- public TikaConfig(InputStream stream) throws JDOMException, IOException {
- this(new SAXBuilder().build(stream));
+ public TikaConfig(InputStream stream)
+ throws TikaException, IOException, SAXException {
+ this(getBuilder().parse(stream));
}
- public TikaConfig(Document document) throws JDOMException, IOException {
- this(document.getRootElement());
+ public TikaConfig(Document document) throws TikaException, IOException {
+ this(document.getDocumentElement());
}
- public TikaConfig(Element element) throws JDOMException, IOException {
- Element mtr = element.getChild("mimeTypeRepository");
- String mimeTypeRepoResource = mtr.getAttributeValue("resource");
- mimeTypes = MimeTypesFactory.create(mimeTypeRepoResource);
+ public TikaConfig(Element element) throws TikaException, IOException {
+ Element mtr = getChild(element, "mimeTypeRepository");
+ if (mtr != null) {
+ mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
+ }
- for (Object node : XPath.selectNodes(element, "//parser")) {
- String className = ((Element) node).getAttributeValue("class");
+ NodeList nodes = element.getElementsByTagName("parser");
+ for (int i = 0; i < nodes.getLength(); i++) {
+ Element node = (Element) nodes.item(i);
+ String name = node.getAttribute("class");
try {
- Parser parser =
- (Parser) Class.forName(className).newInstance();
- for (Object child : ((Element) node).getChildren("mime")) {
- parsers.put(((Element) child).getTextTrim(), parser);
+ Parser parser = (Parser) Class.forName(name).newInstance();
+ NodeList mimes = node.getElementsByTagName("mime");
+ for (int j = 0; j < mimes.getLength(); j++) {
+ Element mime = (Element) mimes.item(j);
+ parsers.put(mime.getTextContent().trim(), parser);
}
} catch (Exception e) {
- throw new JDOMException(
- "Invalid parser configuration: " + className, e);
+ throw new TikaException(
+ "Invalid parser configuration: " + name, e);
}
}
}
@@ -101,21 +111,45 @@
public MimeTypes getMimeRepository(){
return mimeTypes;
}
-
+
/**
* Provides a default configuration (TikaConfig). Currently creates a
* new instance each time it's called; we may be able to have it
* return a shared instance once it is completely immutable.
*
- * @return
- * @throws IOException
- * @throws JDOMException
+ * @return default configuration
+ * @throws TikaException if the default configuration is not available
*/
- public static TikaConfig getDefaultConfig()
- throws IOException, JDOMException {
+ public static TikaConfig getDefaultConfig() throws TikaException {
+ try {
+ InputStream stream =
+ TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION);
+ return new TikaConfig(stream);
+ } catch (IOException e) {
+ throw new TikaException("Unable to read default configuration", e);
+ } catch (SAXException e) {
+ throw new TikaException("Unable to parse default configuration",
e);
+ }
+ }
+
+ private static DocumentBuilder getBuilder() throws TikaException {
+ try {
+ return DocumentBuilderFactory.newInstance().newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new TikaException("XML parser not available", e);
+ }
+ }
- return new TikaConfig(
- TikaConfig.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION));
+ private static Element getChild(Element element, String name) {
+ Node child = element.getFirstChild();
+ while (child != null) {
+ if (child.getNodeType() == Node.ELEMENT_NODE
+ && name.equals(child.getNodeName())) {
+ return (Element) child;
+ }
+ child = child.getNextSibling();
+ }
+ return null;
}
}
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Mon Jan 21 13:47:46 2008
@@ -27,7 +27,6 @@
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
-import org.jdom.JDOMException;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -42,10 +41,7 @@
public AutoDetectParser() {
try {
config = TikaConfig.getDefaultConfig();
- } catch (IOException e) {
- // FIXME: This should never happen
- throw new RuntimeException(e);
- } catch (JDOMException e) {
+ } catch (TikaException e) {
// FIXME: This should never happen
throw new RuntimeException(e);
}
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java Mon Jan
21 13:47:46 2008
@@ -35,11 +35,6 @@
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
-import org.jdom.Document;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
-import org.jdom.output.Format;
-import org.jdom.output.XMLOutputter;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
@@ -54,20 +49,6 @@
static Logger logger = Logger.getRootLogger();
- public static Document parse(InputStream is) {
- org.jdom.Document xmlDoc = new org.jdom.Document();
- try {
- SAXBuilder builder = new SAXBuilder();
- builder.setValidation(false);
- xmlDoc = builder.build(is);
- } catch (JDOMException e) {
- logger.error(e.getMessage());
- } catch (IOException e) {
- logger.error(e.getMessage());
- }
- return xmlDoc;
- }
-
public static List unzip(InputStream is) {
List res = new ArrayList();
try {
@@ -109,24 +90,6 @@
in.close();
out.close();
- }
-
- public static void saveInXmlFile(Document doc, String file) {
- Format f = Format.getPrettyFormat().setEncoding("UTF-8");
-
- XMLOutputter xop = new XMLOutputter(f);
-
- try {
-
- xop.output(doc, new FileOutputStream(file));
-
- }
-
- catch (IOException ex) {
-
- logger.error(ex.getMessage());
-
- }
}
/**
Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=614024&r1=614023&r2=614024&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
(original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Mon Jan
21 13:47:46 2008
@@ -18,7 +18,6 @@
import java.io.File;
import java.io.FileInputStream;
-import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@@ -29,7 +28,6 @@
import org.apache.tika.parser.Parser;
import org.apache.tika.utils.ParseUtils;
import org.apache.tika.utils.Utils;
-import org.jdom.JDOMException;
import org.xml.sax.helpers.DefaultHandler;
/**
@@ -41,7 +39,7 @@
private File testFilesBaseDir;
- public void setUp() throws JDOMException, IOException {
+ public void setUp() throws Exception {
/*
* FIXME the old mechanism does not work anymore when running the tests
* with Maven - need a resource-based one, but this means more changes