Author: mattmann
Date: Sat Apr 25 15:48:29 2015
New Revision: 1676028

URL: http://svn.apache.org/r1676028
Log:
Fix for NUTCH-1991 Tika mime detection not using Nutch supplied 
tika-mimetypes.xml for content based detection contributed by Iain Lopata and 
Sebastien Nagel.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1676028&r1=1676027&r2=1676028&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Apr 25 15:48:29 2015
@@ -2,6 +2,9 @@ Nutch Change Log
  
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1991 Tika mime detection not using Nutch supplied tika-mimetypes.xml 
for content based 
+  detection (Iain Lopata, snagel via mattmann)
+
 * NUTCH-1994 Upgrade to Apache Tika 1.8 (lewismc)
 
 * NUTCH-1996 Make protocol-selenium README part of plugin (lewismc)

Modified: nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=1676028&r1=1676027&r2=1676028&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Sat Apr 25 
15:48:29 2015
@@ -77,9 +77,11 @@ public final class MimeUtil {
     if (mimeTypez == null) {
       try {
         String customMimeTypeFile = conf.get("mime.types.file");
+        LOG.warn(">>"+customMimeTypeFile);
         if (customMimeTypeFile != null
             && customMimeTypeFile.equals("") == false) {
           try {
+            LOG.warn(customMimeTypeFile);
             mimeTypez = MimeTypesFactory.create(conf
                 .getConfResourceAsInputStream(customMimeTypeFile));
           } catch (Exception e) {
@@ -194,7 +196,7 @@ public final class MimeUtil {
       try {
         InputStream stream = TikaInputStream.get(data);
         try {
-          magicType = tika.detect(stream, tikaMeta);
+          magicType = mimeTypes.detect(stream, tikaMeta).toString();
         } finally {
           stream.close();
         }


Reply via email to