Author: markus
Date: Wed Mar 18 08:05:11 2015
New Revision: 1667463

URL: http://svn.apache.org/r1667463
Log:
NUTCH-1967 Possible SIooBE in MimeAdaptiveFetchSchedule


Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1667463&r1=1667462&r2=1667463&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Mar 18 08:05:11 2015
@@ -2,6 +2,8 @@ Nutch Change Log
  
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1967 Possible SIooBE in MimeAdaptiveFetchSchedule (markus)
+
 * NUTCH-1957 FileDumper output file name collisions (Renxia Wang via mattmann)
 
 * NUTCH-1955 ByteWritable missing in NutchWritable (markus)

Modified: 
nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java?rev=1667463&r1=1667462&r2=1667463&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java 
Wed Mar 18 08:05:11 2015
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.*;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.metadata.HttpHeaders;
+import org.apache.nutch.util.MimeUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -111,11 +112,8 @@ public class MimeAdaptiveFetchSchedule e
     // Check if the Content-Type field is available in the CrawlDatum
     if (datum.getMetaData().containsKey(HttpHeaders.WRITABLE_CONTENT_TYPE)) {
       // Get the MIME-type of the current URL
-      String currentMime = datum.getMetaData()
-          .get(HttpHeaders.WRITABLE_CONTENT_TYPE).toString();
-
-      // Get rid of charset
-      currentMime = currentMime.substring(0, currentMime.indexOf(';'));
+      String currentMime = MimeUtil.cleanMimeType(datum.getMetaData()
+          .get(HttpHeaders.WRITABLE_CONTENT_TYPE).toString());
 
       // Check if this MIME-type exists in our map
       if (mimeMap.containsKey(currentMime)) {


Reply via email to