Author: jnioche
Date: Mon Jul 25 12:38:16 2011
New Revision: 1150669

URL: http://svn.apache.org/viewvc?rev=1150669&view=rev
Log:
NUTCH-1045 Mimeutil uses default Tika config unless overriden

Removed:
    nutch/branches/branch-1.4/conf/tika-mimetypes.xml
Modified:
    nutch/branches/branch-1.4/CHANGES.txt
    nutch/branches/branch-1.4/conf/nutch-default.xml
    nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 25 12:38:16 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
+
 * NUTCH-1057 Fetcher thread time out configurable (markus)
 
 * NUTCH-1037 Option to deduplicate anchors prior to indexing (markus)

Modified: nutch/branches/branch-1.4/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/nutch-default.xml?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.4/conf/nutch-default.xml Mon Jul 25 12:38:16 2011
@@ -782,12 +782,16 @@
 
 <!-- mime properties -->
 
+<!--
 <property>
   <name>mime.types.file</name>
   <value>tika-mimetypes.xml</value>
   <description>Name of file in CLASSPATH containing filename extension and
-  magic sequence to mime types mapping information</description>
+  magic sequence to mime types mapping information. Overrides the default Tika 
config 
+  if specified.
+  </description>
 </property>
+-->
 
 <property>
   <name>mime.type.magic</name>

Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java?rev=1150669&r1=1150668&r2=1150669&view=diff
==============================================================================
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java 
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/util/MimeUtil.java Mon 
Jul 25 12:38:16 2011
@@ -19,8 +19,6 @@ package org.apache.nutch.util;
 
 // JDK imports
 import java.io.File;
-import java.io.IOException;
-import java.util.logging.Logger;
 
 // Hadoop imports
 import org.apache.hadoop.conf.Configuration;
@@ -30,6 +28,8 @@ import org.apache.tika.mime.MimeType;
 import org.apache.tika.mime.MimeTypeException;
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * @author mattmann
@@ -53,7 +53,7 @@ public final class MimeUtil {
   private boolean mimeMagic;
 
   /* our log stream */
-  private static final Logger LOG = Logger.getLogger(MimeUtil.class.getName());
+  private static final Logger LOG = 
LoggerFactory.getLogger(MimeUtil.class.getName());
 
   public MimeUtil(Configuration conf) {
     ObjectCache objectCache = ObjectCache.get(conf);
@@ -61,14 +61,23 @@ public final class MimeUtil {
         .getName());
     if (mimeTypez == null) {
       try {
-        mimeTypez = MimeTypesFactory.create(conf
-            .getConfResourceAsInputStream(conf.get("mime.types.file")));
+          String customMimeTypeFile = conf.get("mime.types.file");
+          if (customMimeTypeFile!=null && 
customMimeTypeFile.equals("")==false){
+              try {
+              mimeTypez = MimeTypesFactory.create(conf
+                      .getConfResourceAsInputStream(customMimeTypeFile));
+              }
+              catch (Exception e){
+                  LOG.error("Can't load mime.types.file : 
"+customMimeTypeFile+" using Tika's default");
+              }
+          }
+          if (mimeTypez==null)
+              mimeTypez = MimeTypes.getDefaultMimeTypes();
       } catch (Exception e) {
-        e.printStackTrace();
+        LOG.error("Exception in MimeUtil "+e.getMessage());
         throw new RuntimeException(e);
       }
       objectCache.setObject(MimeTypes.class.getName(), mimeTypez);
-
     }
     
     this.mimeTypes = mimeTypez;
@@ -206,7 +215,7 @@ public final class MimeUtil {
     try {
       return this.mimeTypes.forName(name);
     } catch (MimeTypeException e) {
-      LOG.warning("Exception getting mime type by name: [" + name
+      LOG.error("Exception getting mime type by name: [" + name
           + "]: Message: " + e.getMessage());
       return null;
     }


Reply via email to