Author: ab
Date: Mon Nov 28 14:26:48 2011
New Revision: 1207217

URL: http://svn.apache.org/viewvc?rev=1207217&view=rev
Log:
NUTCH-1213 Pass additional SolrParams when indexing to Solr.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Nov 28 14:26:48 2011
@@ -1,5 +1,7 @@
 Nutch Change Log
 
+* NUTCH-1213 Pass additional SolrParams when indexing to Solr (ab)
+
 * NUTCH-1211 URLFilterChecker command line help doesn't inform user of 
   STDIN requirements (mattmann)
 

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java Mon 
Nov 28 14:26:48 2011
@@ -31,6 +31,8 @@ public interface SolrConstants {
 
   public static final String PASSWORD = SOLR_PREFIX + "auth.password";
   
+  public static final String PARAMS = SOLR_PREFIX + "params";
+
   public static final String ID_FIELD = "id";
   
   public static final String URL_FIELD = "url";

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java Mon Nov 
28 14:26:48 2011
@@ -57,11 +57,16 @@ public class SolrIndexer extends Configu
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
       List<Path> segments) throws IOException {
-      indexSolr(solrUrl, crawlDb, linkDb, segments, false);
+      indexSolr(solrUrl, crawlDb, linkDb, segments, false, null);
   }
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
-      List<Path> segments, boolean noCommit) throws IOException {
+          List<Path> segments, boolean noCommit) throws IOException {
+    indexSolr(solrUrl, crawlDb, linkDb, segments, noCommit, null);
+  }
+  
+  public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
+      List<Path> segments, boolean noCommit, String solrParams) throws 
IOException {
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
     LOG.info("SolrIndexer: starting at " + sdf.format(start));
@@ -72,6 +77,9 @@ public class SolrIndexer extends Configu
     IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
 
     job.set(SolrConstants.SERVER_URL, solrUrl);
+    if (solrParams != null) {
+      job.set(SolrConstants.PARAMS, solrParams);
+    }
     NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);
 
     job.setReduceSpeculativeExecution(false);
@@ -100,7 +108,7 @@ public class SolrIndexer extends Configu
 
   public int run(String[] args) throws Exception {
     if (args.length < 3) {
-      System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb 
<linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
+      System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb 
<linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>) 
[-noCommit]");
       return -1;
     }
 
@@ -108,6 +116,7 @@ public class SolrIndexer extends Configu
     Path linkDb = null;
 
     final List<Path> segments = new ArrayList<Path>();
+    String params = null;
 
     boolean noCommit = false;
 
@@ -126,13 +135,15 @@ public class SolrIndexer extends Configu
         }
       } else if (args[i].equals("-noCommit")) {
         noCommit = true;
+      } else if (args[i].equals("-params")) {
+        params = args[++i];
       } else {
         segments.add(new Path(args[i]));
       }
     }
 
     try {
-      indexSolr(args[0], crawlDb, linkDb, segments, noCommit);
+      indexSolr(args[0], crawlDb, linkDb, segments, noCommit, params);
       return 0;
     } catch (final Exception e) {
       LOG.error("SolrIndexer: " + StringUtils.stringifyException(e));

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Mon Nov 
28 14:26:48 2011
@@ -30,15 +30,18 @@ import org.apache.nutch.indexer.NutchFie
 import org.apache.nutch.indexer.NutchIndexWriter;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.DateUtil;
 
 public class SolrWriter implements NutchIndexWriter {
-
-  public static Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
+  
+  public static final Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
 
   private SolrServer solr;
   private SolrMappingReader solrMapping;
+  private ModifiableSolrParams params;
 
   private final List<SolrInputDocument> inputDocs =
     new ArrayList<SolrInputDocument>();
@@ -46,9 +49,28 @@ public class SolrWriter implements Nutch
   private int commitSize;
 
   public void open(JobConf job, String name) throws IOException {
-    solr = SolrUtils.getCommonsHttpSolrServer(job);
+    SolrServer server = SolrUtils.getCommonsHttpSolrServer(job);
+    init(server, job);
+  }
+  
+  // package protected for tests
+  void init(SolrServer server, JobConf job) throws IOException {
+    solr = server;
     commitSize = job.getInt(SolrConstants.COMMIT_SIZE, 1000);
     solrMapping = SolrMappingReader.getInstance(job);
+    // parse optional params
+    params = new ModifiableSolrParams();
+    String paramString = job.get(SolrConstants.PARAMS);
+    if (paramString != null) {
+      String[] values = paramString.split("&");
+      for (String v : values) {
+        String[] kv = v.split("=");
+        if (kv.length < 2) {
+          continue;
+        }
+        params.add(kv[0], kv[1]);
+      }
+    }
   }
 
   public void write(NutchDocument doc) throws IOException {
@@ -78,7 +100,10 @@ public class SolrWriter implements Nutch
     if (inputDocs.size() >= commitSize) {
       try {
         LOG.info("Adding " + Integer.toString(inputDocs.size()) + " 
documents");
-        solr.add(inputDocs);
+        UpdateRequest req = new UpdateRequest();
+        req.add(inputDocs);
+        req.setParams(params);
+        req.process(solr);
       } catch (final SolrServerException e) {
         throw makeIOException(e);
       }
@@ -90,7 +115,10 @@ public class SolrWriter implements Nutch
     try {
       if (!inputDocs.isEmpty()) {
         LOG.info("Adding " + Integer.toString(inputDocs.size()) + " 
documents");
-        solr.add(inputDocs);
+        UpdateRequest req = new UpdateRequest();
+        req.add(inputDocs);
+        req.setParams(params);
+        req.process(solr);
         inputDocs.clear();
       }
       // solr.commit();


Reply via email to