Author: jnioche
Date: Mon Jul 18 09:20:02 2011
New Revision: 1147794

URL: http://svn.apache.org/viewvc?rev=1147794&view=rev
Log:
NUTCH-1054 : linkDB optional during indexing

Modified:
    nutch/branches/branch-1.4/CHANGES.txt
    
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
    
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147794&r1=1147793&r2=1147794&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 18 09:20:02 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1054 LinkDB optional during indexing (jnioche)
+
 * NUTCH-1029 Readdb throws EOFException (markus)
 
 * NUTCH-1036 Solr jobs should increment counters in Reporter (markus)

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/IndexerMapReduce.java?rev=1147794&r1=1147793&r2=1147794&view=diff
==============================================================================
--- 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
 (original)
+++ 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
 Mon Jul 18 09:20:02 2011
@@ -23,6 +23,7 @@ import java.util.Iterator;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
@@ -172,7 +173,9 @@ implements Mapper<Text, Writable, Text, 
                            JobConf job) {
 
     LOG.info("IndexerMapReduce: crawldb: " + crawlDb);
-    LOG.info("IndexerMapReduce: linkdb: " + linkDb);
+    
+    if (linkDb!=null)
+      LOG.info("IndexerMapReduce: linkdb: " + linkDb);
 
     for (final Path segment : segments) {
       LOG.info("IndexerMapReduces: adding segment: " + segment);
@@ -183,7 +186,10 @@ implements Mapper<Text, Writable, Text, 
     }
 
     FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME));
-    FileInputFormat.addInputPath(job, new Path(linkDb, LinkDb.CURRENT_NAME));
+    
+    if (linkDb!=null)
+         FileInputFormat.addInputPath(job, new Path(linkDb, 
LinkDb.CURRENT_NAME));
+    
     job.setInputFormat(SequenceFileInputFormat.class);
 
     job.setMapperClass(IndexerMapReduce.class);

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java?rev=1147794&r1=1147793&r2=1147794&view=diff
==============================================================================
--- 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
 (original)
+++ 
nutch/branches/branch-1.4/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
 Mon Jul 18 09:20:02 2011
@@ -99,20 +99,23 @@ public class SolrIndexer extends Configu
   }
 
   public int run(String[] args) throws Exception {
-    if (args.length < 4) {
-      System.err.println("Usage: SolrIndexer <solr url> <crawldb> <linkdb> 
(<segment> ... | -dir <segments>) [-noCommit]");
+    if (args.length < 3) {
+      System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb 
<linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
       return -1;
     }
 
     final Path crawlDb = new Path(args[1]);
-    final Path linkDb = new Path(args[2]);
+    Path linkDb = null;
 
     final List<Path> segments = new ArrayList<Path>();
 
     boolean noCommit = false;
 
-    for (int i = 3; i < args.length; i++) {
-      if (args[i].equals("-dir")) {
+    for (int i = 2; i < args.length; i++) {
+       if (args[i].equals("-linkdb")) {
+               linkDb = new Path(args[++i]);
+       }
+       else if (args[i].equals("-dir")) {
         Path dir = new Path(args[++i]);
         FileSystem fs = dir.getFileSystem(getConf());
         FileStatus[] fstats = fs.listStatus(dir,


Reply via email to