Author: ferdy
Date: Fri Sep 7 08:17:58 2012
New Revision: 1381931
URL: http://svn.apache.org/viewvc?rev=1381931&view=rev
Log:
NUTCH-1459 Remove dead code (phase2) from InjectorJob
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1381931&r1=1381930&r2=1381931&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Sep 7 08:17:58 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.1 - Current Development
+* NUTCH-1459 Remove dead code (phase2) from InjectorJob (ferdy)
+
* NUTCH-1431 Introduce link 'distance' and add configurable max distance in
the generator (ferdy)
* NUTCH-1448 Redirected urls should be handled more cleanly (more like an
outlink url) (ferdy)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java?rev=1381931&r1=1381930&r2=1381931&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/InjectorJob.java Fri Sep
7 08:17:58 2012
@@ -25,9 +25,6 @@ import java.util.Set;
import java.util.TreeMap;
import org.apache.avro.util.Utf8;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.gora.mapreduce.GoraMapper;
import org.apache.gora.mapreduce.GoraOutputFormat;
import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
@@ -53,6 +50,8 @@ import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TableUtil;
import org.apache.nutch.util.ToolUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/** This class takes a flat file of URLs and adds them to the of pages to be
* crawled. Useful for bootstrapping the system.
@@ -186,35 +185,6 @@ public class InjectorJob extends NutchTo
context.write(reversedUrl, row);
}
}
-
- public static class InjectorMapper
- extends GoraMapper<String, WebPage, String, WebPage> {
- private FetchSchedule schedule;
-
- @Override
- public void setup(Context context) throws IOException {
- Configuration conf = context.getConfiguration();
- schedule = FetchScheduleFactory.getFetchSchedule(conf);
- // scoreInjected = conf.getFloat("db.score.injected", 1.0f);
- }
-
- @Override
- protected void map(String key, WebPage row, Context context)
- throws IOException, InterruptedException {
- if (Mark.INJECT_MARK.checkMark(row) == null) {
- return;
- }
- Mark.INJECT_MARK.removeMark(row);
- if (!row.isReadable(WebPage.Field.STATUS.getIndex())) {
- row.setStatus(CrawlStatus.STATUS_UNFETCHED);
- schedule.initializeSchedule(key, row);
- // row.setScore(scoreInjected);
- }
-
- context.write(key, row);
- }
-
- }
public InjectorJob() {
@@ -233,10 +203,9 @@ public class InjectorJob extends NutchTo
} else {
input = new Path(path.toString());
}
- numJobs = 2;
+ numJobs = 1;
currentJobNum = 0;
- status.put(Nutch.STAT_PHASE, "convert input");
- currentJob = new NutchJob(getConf(), "inject-p1 " + input);
+ currentJob = new NutchJob(getConf(), "inject " + input);
FileInputFormat.addInputPath(currentJob, input);
currentJob.setMapperClass(UrlMapper.class);
currentJob.setMapOutputKeyClass(String.class);
@@ -249,17 +218,6 @@ public class InjectorJob extends NutchTo
currentJob.setNumReduceTasks(0);
currentJob.waitForCompletion(true);
ToolUtil.recordJobStatus(null, currentJob, results);
- currentJob = null;
-
- status.put(Nutch.STAT_PHASE, "merge input with db");
- status.put(Nutch.STAT_PROGRESS, 0.5f);
- currentJobNum = 1;
- currentJob = new NutchJob(getConf(), "inject-p2 " + input);
- StorageUtils.initMapperJob(currentJob, FIELDS, String.class,
- WebPage.class, InjectorMapper.class);
- currentJob.setNumReduceTasks(0);
- ToolUtil.recordJobStatus(null, currentJob, results);
- status.put(Nutch.STAT_PROGRESS, 1.0f);
return results;
}