Author: mattmann
Date: Sat Apr 25 15:48:29 2015
New Revision: 1676028
URL: http://svn.apache.org/r1676028
Log:
Fix for NUTCH-1991 Tika mime detection not using Nutch supplied
tika-mimetypes.xml for content based detection contributed by Iain Lopata and
Sebastien Nagel.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1676028&r1=1676027&r2=1676028&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Apr 25 15:48:29 2015
@@ -2,6 +2,9 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1991 Tika mime detection not using Nutch supplied tika-mimetypes.xml
for content based
+ detection (Iain Lopata, snagel via mattmann)
+
* NUTCH-1994 Upgrade to Apache Tika 1.8 (lewismc)
* NUTCH-1996 Make protocol-selenium README part of plugin (lewismc)
Modified: nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=1676028&r1=1676027&r2=1676028&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Sat Apr 25
15:48:29 2015
@@ -77,9 +77,11 @@ public final class MimeUtil {
if (mimeTypez == null) {
try {
String customMimeTypeFile = conf.get("mime.types.file");
+ LOG.warn(">>"+customMimeTypeFile);
if (customMimeTypeFile != null
&& customMimeTypeFile.equals("") == false) {
try {
+ LOG.warn(customMimeTypeFile);
mimeTypez = MimeTypesFactory.create(conf
.getConfResourceAsInputStream(customMimeTypeFile));
} catch (Exception e) {
@@ -194,7 +196,7 @@ public final class MimeUtil {
try {
InputStream stream = TikaInputStream.get(data);
try {
- magicType = tika.detect(stream, tikaMeta);
+ magicType = mimeTypes.detect(stream, tikaMeta).toString();
} finally {
stream.close();
}