Author: jnioche
Date: Mon Jul 25 12:38:16 2011
New Revision: 1150669
URL: http://svn.apache.org/viewvc?rev=1150669&view=rev
Log:
NUTCH-1045 Mimeutil uses default Tika config unless overriden
Removed:
nutch/branches/branch-1.4/conf/tika-mimetypes.xml
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/conf/nutch-default.xml
nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 25 12:38:16 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
+
* NUTCH-1057 Fetcher thread time out configurable (markus)
* NUTCH-1037 Option to deduplicate anchors prior to indexing (markus)
Modified: nutch/branches/branch-1.4/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/nutch-default.xml?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.4/conf/nutch-default.xml Mon Jul 25 12:38:16 2011
@@ -782,12 +782,16 @@
<!-- mime properties -->
+<!--
<property>
<name>mime.types.file</name>
<value>tika-mimetypes.xml</value>
<description>Name of file in CLASSPATH containing filename extension and
- magic sequence to mime types mapping information</description>
+ magic sequence to mime types mapping information. Overrides the default Tika
config
+ if specified.
+ </description>
</property>
+-->
<property>
<name>mime.type.magic</name>
Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java Mon
Jul 25 12:38:16 2011
@@ -19,8 +19,6 @@ package org.apache.nutch.util;
// JDK imports
import java.io.File;
-import java.io.IOException;
-import java.util.logging.Logger;
// Hadoop imports
import org.apache.hadoop.conf.Configuration;
@@ -30,6 +28,8 @@ import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeTypesFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* @author mattmann
@@ -53,7 +53,7 @@ public final class MimeUtil {
private boolean mimeMagic;
/* our log stream */
- private static final Logger LOG = Logger.getLogger(MimeUtil.class.getName());
+ private static final Logger LOG =
LoggerFactory.getLogger(MimeUtil.class.getName());
public MimeUtil(Configuration conf) {
ObjectCache objectCache = ObjectCache.get(conf);
@@ -61,14 +61,23 @@ public final class MimeUtil {
.getName());
if (mimeTypez == null) {
try {
- mimeTypez = MimeTypesFactory.create(conf
- .getConfResourceAsInputStream(conf.get("mime.types.file")));
+ String customMimeTypeFile = conf.get("mime.types.file");
+ if (customMimeTypeFile!=null &&
customMimeTypeFile.equals("")==false){
+ try {
+ mimeTypez = MimeTypesFactory.create(conf
+ .getConfResourceAsInputStream(customMimeTypeFile));
+ }
+ catch (Exception e){
+ LOG.error("Can't load mime.types.file :
"+customMimeTypeFile+" using Tika's default");
+ }
+ }
+ if (mimeTypez==null)
+ mimeTypez = MimeTypes.getDefaultMimeTypes();
} catch (Exception e) {
- e.printStackTrace();
+ LOG.error("Exception in MimeUtil "+e.getMessage());
throw new RuntimeException(e);
}
objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
-
}
this.mimeTypes = mimeTypez;
@@ -206,7 +215,7 @@ public final class MimeUtil {
try {
return this.mimeTypes.forName(name);
} catch (MimeTypeException e) {
- LOG.warning("Exception getting mime type by name: [" + name
+ LOG.error("Exception getting mime type by name: [" + name
+ "]: Message: " + e.getMessage());
return null;
}