Author: jnioche
Date: Mon Jan 11 10:13:21 2010
New Revision: 897825

URL: http://svn.apache.org/viewvc?rev=897825&view=rev
Log:
fix for NUTCH-767 : reverted original expected values for test + treat 
text/plain as a default mime-type from Tika

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java?rev=897825&r1=897824&r2=897825&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java Mon Jan 11 
10:13:21 2010
@@ -159,6 +159,7 @@
     if (this.mimeMagic) {
       MimeType magicType = this.mimeTypes.getMimeType(data);
       if (magicType != null && 
!magicType.getName().equals(MimeTypes.OCTET_STREAM)
+          && !magicType.getName().equals(MimeTypes.PLAIN_TEXT)
           && type != null && !type.getName().equals(magicType.getName())) {
         // If magic enabled and the current mime type differs from that of the
         // one returned from the magic, take the magic mimeType

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=897825&r1=897824&r2=897825&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Mon 
Jan 11 10:13:21 2010
@@ -63,28 +63,19 @@
                     "http://www.foo.com/";,
                     "".getBytes("UTF8"),
                     "text/html; charset=UTF-8", p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/foo.html";,
                     "http://www.foo.com/";,
                     "".getBytes("UTF8"),
                     "", p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/foo.html";,
                     "http://www.foo.com/";,
                     "".getBytes("UTF8"),
                     null, p, conf);
-    // TODO check potential Tika issue and 
-    // revert the expected value to text/html
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals("text/html", c.getContentType());
 
     c = new Content("http://www.foo.com/";,
                     "http://www.foo.com/";,
@@ -108,10 +99,7 @@
                     "http://www.foo.com/";,
                     "".getBytes("UTF8"),
                     "", p, conf);
-    // TODO check that Tika returns the right value and
-    // revert to the default type
-    // see https://issues.apache.org/jira/browse/NUTCH-767
-    assertEquals("text/plain", c.getContentType());
+    assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
 
     c = new Content("http://www.foo.com/";,
                     "http://www.foo.com/";,


Reply via email to