Author: markus
Date: Wed Mar 18 08:05:11 2015
New Revision: 1667463
URL: http://svn.apache.org/r1667463
Log:
NUTCH-1967 Possible SIooBE in MimeAdaptiveFetchSchedule
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1667463&r1=1667462&r2=1667463&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Mar 18 08:05:11 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1967 Possible SIooBE in MimeAdaptiveFetchSchedule (markus)
+
* NUTCH-1957 FileDumper output file name collisions (Renxia Wang via mattmann)
* NUTCH-1955 ByteWritable missing in NutchWritable (markus)
Modified:
nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java?rev=1667463&r1=1667462&r2=1667463&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.java
Wed Mar 18 08:05:11 2015
@@ -29,6 +29,7 @@ import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.*;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.HttpHeaders;
+import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -111,11 +112,8 @@ public class MimeAdaptiveFetchSchedule e
// Check if the Content-Type field is available in the CrawlDatum
if (datum.getMetaData().containsKey(HttpHeaders.WRITABLE_CONTENT_TYPE)) {
// Get the MIME-type of the current URL
- String currentMime = datum.getMetaData()
- .get(HttpHeaders.WRITABLE_CONTENT_TYPE).toString();
-
- // Get rid of charset
- currentMime = currentMime.substring(0, currentMime.indexOf(';'));
+ String currentMime = MimeUtil.cleanMimeType(datum.getMetaData()
+ .get(HttpHeaders.WRITABLE_CONTENT_TYPE).toString());
// Check if this MIME-type exists in our map
if (mimeMap.containsKey(currentMime)) {