Author: lewismc
Date: Wed Oct 10 23:02:57 2012
New Revision: 1396850
URL: http://svn.apache.org/viewvc?rev=1396850&view=rev
Log:
NUTCH-874 Make sure all plugins in src/plugin are compatible with Nutch 2.0 and
Gora (part 1)
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
nutch/branches/2.x/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
nutch/branches/2.x/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
nutch/branches/2.x/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
nutch/branches/2.x/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed Oct 10 23:02:57 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-874 Make sure all plugins in src/plugin are compatible with Nutch 2.0
and Gora (part 1) (Kiran Chitturi via lewismc)
+
* NUTCH-1344 BasicURLNormalizer to normalize https same as http
* NUTCH-706 Url regex normalizer: pattern for session id removal not to match
"newsId" (Meghna Kukreja via snagel)
Modified:
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
(original)
+++
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
Wed Oct 10 23:02:57 2012
@@ -25,15 +25,12 @@ import java.util.TimeZone;
//APACHE imports
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Feed;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseData;
import org.apache.solr.common.util.DateUtil;
/**
Modified:
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
(original)
+++
nutch/branches/2.x/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
Wed Oct 10 23:02:57 2012
@@ -39,10 +39,6 @@ import org.apache.nutch.net.URLNormalize
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseResult;
-import org.apache.nutch.parse.ParseStatus;
-import org.apache.nutch.parse.ParseText;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.ParserFactory;
import org.apache.nutch.parse.ParserNotFound;
Modified:
nutch/branches/2.x/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
(original)
+++
nutch/branches/2.x/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
Wed Oct 10 23:02:57 2012
@@ -26,10 +26,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
Modified:
nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
Wed Oct 10 23:02:57 2012
@@ -18,12 +18,8 @@
package org.apache.nutch.parse.ext;
import org.apache.nutch.protocol.Content;
-import org.apache.nutch.parse.ParseResult;
-import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.OutlinkExtractor;
Modified:
nutch/branches/2.x/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
Wed Oct 10 23:02:57 2012
@@ -23,14 +23,12 @@ import org.apache.nutch.protocol.Content
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
Modified:
nutch/branches/2.x/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
Wed Oct 10 23:02:57 2012
@@ -20,7 +20,6 @@ package org.apache.nutch.parse.swf;
import java.io.FileInputStream;
import java.io.InputStreamReader;
-import org.apache.nutch.crawl.CrawlDatum;
import org.apache.hadoop.io.Text;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.Protocol;
Modified:
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
Wed Oct 10 23:02:57 2012
@@ -53,7 +53,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MimeType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.html.HtmlMapper;
import org.w3c.dom.DocumentFragment;
/**
Modified:
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
Wed Oct 10 23:02:57 2012
@@ -29,10 +29,6 @@ import org.slf4j.LoggerFactory;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseImpl;
-import org.apache.nutch.parse.ParseResult;
-import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
import org.apache.hadoop.conf.Configuration;
Modified:
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
Wed Oct 10 23:02:57 2012
@@ -36,9 +36,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseUtil;
-import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseException;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.protocol.Content;
Modified:
nutch/branches/2.x/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java?rev=1396850&r1=1396849&r2=1396850&view=diff
==============================================================================
---
nutch/branches/2.x/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
(original)
+++
nutch/branches/2.x/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
Wed Oct 10 23:02:57 2012
@@ -23,14 +23,12 @@ import org.apache.nutch.protocol.Content
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.hadoop.io.Text;
-import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;