Author: jukka
Date: Tue Jun 23 20:43:09 2009
New Revision: 787830
URL: http://svn.apache.org/viewvc?rev=787830&view=rev
Log:
TIKA-248: No logging in tika-core
Replace warning logs with exceptions. Drop commons-logging dependency.
Modified:
lucene/tika/trunk/tika-core/pom.xml
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypeException.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Modified: lucene/tika/trunk/tika-core/pom.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/pom.xml?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/pom.xml (original)
+++ lucene/tika/trunk/tika-core/pom.xml Tue Jun 23 20:43:09 2009
@@ -39,11 +39,6 @@
<dependencies>
<dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
- </dependency>
- <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>1.4</version>
@@ -68,7 +63,6 @@
</Export-Package>
<Import-Package>
org.apache.commons.io.*;version="[1.4,2.0)",
- org.apache.commons.logging.*;version="[1.0.4,2.0)",
*
</Import-Package>
<Bundle-DocURL>${project.url}</Bundle-DocURL>
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MagicMatch.java
Tue Jun 23 20:43:09 2009
@@ -143,7 +143,7 @@
}
return decoded.toByteArray();
} catch (NumberFormatException e) {
- throw new MimeTypeException(e.toString() + " for " + value);
+ throw new MimeTypeException("Invalid string value: " + value, e);
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypeException.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypeException.java?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypeException.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypeException.java
Tue Jun 23 20:43:09 2009
@@ -16,37 +16,31 @@
*/
package org.apache.tika.mime;
+import org.apache.tika.exception.TikaException;
+
/**
* A class to encapsulate MimeType related exceptions.
- *
- *
*/
-public class MimeTypeException extends Exception {
-
- /**
- * Constructs a MimeTypeException with no specified detail message.
- */
- public MimeTypeException() {
- super();
- }
+public class MimeTypeException extends TikaException {
/**
* Constructs a MimeTypeException with the specified detail message.
*
- * @param msg
- * the detail message.
+ * @param message the detail message.
*/
- public MimeTypeException(String msg) {
- super(msg);
+ public MimeTypeException(String message) {
+ super(message);
}
/**
- * Constructs a MimeTypeException with the specified cause.
+ * Constructs a MimeTypeException with the specified detail message
+ * and root cause.
*
- * @param t
- * the cause.
+ * @param message the detail message.
+ * @param cause root cause
*/
- public MimeTypeException(Throwable t) {
- super(t);
+ public MimeTypeException(String message, Throwable cause) {
+ super(message, cause);
}
+
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesFactory.java
Tue Jun 23 20:43:09 2009
@@ -28,7 +28,6 @@
*/
public class MimeTypesFactory {
-
/**
* Creates an empty instance; same as calling new MimeTypes().
*
@@ -40,8 +39,9 @@
/**
* Creates and returns a MimeTypes instance from the specified document.
+ * @throws MimeTypeException if the type configuration is invalid
*/
- public static MimeTypes create(Document document) {
+ public static MimeTypes create(Document document) throws MimeTypeException
{
MimeTypes mimeTypes = new MimeTypes();
new MimeTypesReader(mimeTypes).read(document);
return mimeTypes;
@@ -50,8 +50,11 @@
/**
* Creates and returns a MimeTypes instance from the specified input
stream.
* Does not close the input stream.
+ * @throws IOException if the stream can not be read
+ * @throws MimeTypeException if the type configuration is invalid
*/
- public static MimeTypes create(InputStream inputStream) {
+ public static MimeTypes create(InputStream inputStream)
+ throws IOException, MimeTypeException {
MimeTypes mimeTypes = new MimeTypes();
new MimeTypesReader(mimeTypes).read(inputStream);
return mimeTypes;
@@ -61,8 +64,12 @@
* Creates and returns a MimeTypes instance from the resource
* at the location specified by the URL. Opens and closes the
* InputStream from the URL.
+ *
+ * @throws IOException if the URL can not be accessed
+ * @throws MimeTypeException if the type configuration is invalid
*/
- public static MimeTypes create(URL url) throws IOException {
+ public static MimeTypes create(URL url)
+ throws IOException, MimeTypeException {
InputStream stream = url.openStream();
try {
return create(stream);
@@ -74,8 +81,12 @@
/**
* Creates and returns a MimeTypes instance from the specified file path,
* as interpreted by the class loader in getResource().
+ *
+ * @throws IOException if the file can not be accessed
+ * @throws MimeTypeException if the type configuration is invalid
*/
- public static MimeTypes create(String filePath) throws IOException {
+ public static MimeTypes create(String filePath)
+ throws IOException, MimeTypeException {
return create(MimeTypesReader.class.getResource(filePath));
}
}
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
(original)
+++
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
Tue Jun 23 20:43:09 2009
@@ -16,10 +16,6 @@
*/
package org.apache.tika.mime;
-// Commons Logging imports
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
// DOM imports
import org.w3c.dom.Attr;
import org.w3c.dom.Node;
@@ -28,11 +24,14 @@
import org.w3c.dom.NodeList;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
// JDK imports
+import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
/**
* A reader for XML files compliant with the freedesktop MIME-info DTD.
@@ -91,43 +90,30 @@
*/
final class MimeTypesReader implements MimeTypesReaderMetKeys {
- /** The logger to use */
- private Log logger = null;
-
private final MimeTypes types;
MimeTypesReader(MimeTypes types) {
- this(types, null);
- }
-
- MimeTypesReader(MimeTypes types, Log logger) {
this.types = types;
- if (logger == null) {
- this.logger = LogFactory.getLog(this.getClass());
- } else {
- this.logger = logger;
- }
}
- void read(String filepath) {
+ void read(String filepath) throws IOException, MimeTypeException {
read(MimeTypesReader.class.getClassLoader().getResourceAsStream(filepath));
}
- void read(InputStream stream) {
+ void read(InputStream stream) throws IOException, MimeTypeException {
try {
- DocumentBuilderFactory factory = DocumentBuilderFactory
- .newInstance();
+ DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new InputSource(stream));
read(document);
- } catch (Exception e) {
- if (logger.isWarnEnabled()) {
- logger.warn(e.toString() + " while loading mime-types");
- }
+ } catch (ParserConfigurationException e) {
+ throw new MimeTypeException("Unable to create an XML parser", e);
+ } catch (SAXException e) {
+ throw new MimeTypeException("Invalid type configuration", e);
}
}
- void read(Document document) {
+ void read(Document document) throws MimeTypeException {
Element element = document.getDocumentElement();
if (element != null && element.getTagName().equals(MIME_INFO_TAG)) {
NodeList nodes = element.getChildNodes();
@@ -141,57 +127,51 @@
}
}
} else {
- logger.warn("Not a <"+MIME_INFO_TAG+"/> configuration document");
+ throw new MimeTypeException(
+ "Not a <" + MIME_INFO_TAG + "/> configuration document: "
+ + element.getTagName());
}
}
/** Read Element named mime-type. */
- private void readMimeType(Element element) {
+ private void readMimeType(Element element) throws MimeTypeException {
String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
- try {
- MimeType type = types.forName(name);
+ MimeType type = types.forName(name);
- NodeList nodes = element.getChildNodes();
- for (int i = 0; i < nodes.getLength(); i++) {
- Node node = nodes.item(i);
- if (node.getNodeType() == Node.ELEMENT_NODE) {
- Element nodeElement = (Element) node;
- if (nodeElement.getTagName().equals(COMMENT_TAG)) {
- type.setDescription(
- nodeElement.getFirstChild().getNodeValue());
- } else if (nodeElement.getTagName().equals(GLOB_TAG)) {
- boolean useRegex =
Boolean.valueOf(nodeElement.getAttribute(ISREGEX_ATTR));
- types.addPattern(type,
nodeElement.getAttribute(PATTERN_ATTR), useRegex);
- } else if (nodeElement.getTagName().equals(MAGIC_TAG)) {
- readMagic(nodeElement, type);
- } else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
- String alias =
nodeElement.getAttribute(ALIAS_TYPE_ATTR);
- try {
- type.addAlias(alias);
- } catch (MimeTypeException e) {
- logger.warn("Invalid media type alias: " + alias,
e);
- }
- } else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
- readRootXML(nodeElement, type);
- } else if
(nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {
- String parent =
nodeElement.getAttribute(SUB_CLASS_TYPE_ATTR);
- try {
- type.setSuperType(types.forName(parent));
- } catch (MimeTypeException e) {
- logger.warn("Invalid parent type: " + parent, e);
- }
- }
+ NodeList nodes = element.getChildNodes();
+ for (int i = 0; i < nodes.getLength(); i++) {
+ Node node = nodes.item(i);
+ if (node.getNodeType() == Node.ELEMENT_NODE) {
+ Element nodeElement = (Element) node;
+ if (nodeElement.getTagName().equals(COMMENT_TAG)) {
+ type.setDescription(
+ nodeElement.getFirstChild().getNodeValue());
+ } else if (nodeElement.getTagName().equals(GLOB_TAG)) {
+ boolean useRegex =
Boolean.valueOf(nodeElement.getAttribute(ISREGEX_ATTR));
+ types.addPattern(type,
nodeElement.getAttribute(PATTERN_ATTR), useRegex);
+ } else if (nodeElement.getTagName().equals(MAGIC_TAG)) {
+ readMagic(nodeElement, type);
+ } else if (nodeElement.getTagName().equals(ALIAS_TAG)) {
+ String alias = nodeElement.getAttribute(ALIAS_TYPE_ATTR);
+ type.addAlias(alias);
+ } else if (nodeElement.getTagName().equals(ROOT_XML_TAG)) {
+ readRootXML(nodeElement, type);
+ } else if (nodeElement.getTagName().equals(SUB_CLASS_OF_TAG)) {
+ String parent =
nodeElement.getAttribute(SUB_CLASS_TYPE_ATTR);
+ type.setSuperType(types.forName(parent));
}
}
-
- types.add(type);
- } catch (MimeTypeException e) {
- logger.warn("Invalid media type configuration entry: " + name, e);
}
+
+ types.add(type);
}
- /** Read Element named magic. */
- private void readMagic(Element element, MimeType mimeType) {
+ /**
+ * Read Element named magic.
+ * @throws MimeTypeException if the configuration is invalid
+ */
+ private void readMagic(Element element, MimeType mimeType)
+ throws MimeTypeException {
Magic magic = new Magic(mimeType);
String priority = element.getAttribute(MAGIC_PRIORITY_ATTR);
@@ -204,8 +184,7 @@
mimeType.addMagic(magic);
}
- private Clause readMatches(Element element) {
- Clause sub = null;
+ private Clause readMatches(Element element) throws MimeTypeException {
Clause prev = Clause.FALSE;
Clause clause = null;
NodeList nodes = element.getChildNodes();
@@ -214,20 +193,13 @@
if (node.getNodeType() == Node.ELEMENT_NODE) {
Element nodeElement = (Element) node;
if (nodeElement.getTagName().equals(MATCH_TAG)) {
- sub = readMatches(nodeElement);
- try {
- if (sub != null) {
- clause = new MagicClause(Operator.AND,
- readMatch(nodeElement), sub);
- } else {
- clause = readMatch(nodeElement);
- }
- clause = new MagicClause(Operator.OR, prev, clause);
- prev = clause;
- } catch (MimeTypeException mte) {
- logger.warn(mte + " while reading magic-match ["
- + nodeElement + "], Ignoring!");
+ clause = readMatch(nodeElement);
+ Clause sub = readMatches(nodeElement);
+ if (sub != null) {
+ clause = new MagicClause(Operator.AND, clause, sub);
}
+ clause = new MagicClause(Operator.OR, prev, clause);
+ prev = clause;
}
}
}
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=787830&r1=787829&r2=787830&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
Tue Jun 23 20:43:09 2009
@@ -27,8 +27,6 @@
import java.util.SortedMap;
import java.util.TreeMap;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
@@ -74,9 +72,6 @@
*/
public class ExcelExtractor {
- /** Logging instance */
- private static final Log log = LogFactory.getLog(ExcelExtractor.class);
-
/**
* <code>true</code> if the HSSFListener should be registered
* to listen for all records or <code>false</code> (the default)
@@ -119,8 +114,6 @@
*/
protected void parse(POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
throws IOException, SAXException {
- log.debug("Starting listenForAllRecords=" + listenForAllRecords);
-
// Set up listener and register the records we want to process
TikaHSSFListener listener = new TikaHSSFListener(xhtml);
HSSFRequest hssfRequest = new HSSFRequest();
@@ -216,9 +209,6 @@
public void processRecord(Record record) {
if (exception == null) {
try {
- if (log.isDebugEnabled()) {
- log.debug(record.toString());
- }
internalProcessRecord(record);
} catch (SAXException e) {
exception = e;