Author: ab
Date: Mon Nov 28 14:26:48 2011
New Revision: 1207217
URL: http://svn.apache.org/viewvc?rev=1207217&view=rev
Log:
NUTCH-1213 Pass additional SolrParams when indexing to Solr.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Nov 28 14:26:48 2011
@@ -1,5 +1,7 @@
Nutch Change Log
+* NUTCH-1213 Pass additional SolrParams when indexing to Solr (ab)
+
* NUTCH-1211 URLFilterChecker command line help doesn't inform user of
STDIN requirements (mattmann)
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrConstants.java Mon
Nov 28 14:26:48 2011
@@ -31,6 +31,8 @@ public interface SolrConstants {
public static final String PASSWORD = SOLR_PREFIX + "auth.password";
+ public static final String PARAMS = SOLR_PREFIX + "params";
+
public static final String ID_FIELD = "id";
public static final String URL_FIELD = "url";
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java Mon Nov
28 14:26:48 2011
@@ -57,11 +57,16 @@ public class SolrIndexer extends Configu
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
List<Path> segments) throws IOException {
- indexSolr(solrUrl, crawlDb, linkDb, segments, false);
+ indexSolr(solrUrl, crawlDb, linkDb, segments, false, null);
}
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
- List<Path> segments, boolean noCommit) throws IOException {
+ List<Path> segments, boolean noCommit) throws IOException {
+ indexSolr(solrUrl, crawlDb, linkDb, segments, noCommit, null);
+ }
+
+ public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
+ List<Path> segments, boolean noCommit, String solrParams) throws
IOException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = System.currentTimeMillis();
LOG.info("SolrIndexer: starting at " + sdf.format(start));
@@ -72,6 +77,9 @@ public class SolrIndexer extends Configu
IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job);
job.set(SolrConstants.SERVER_URL, solrUrl);
+ if (solrParams != null) {
+ job.set(SolrConstants.PARAMS, solrParams);
+ }
NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class);
job.setReduceSpeculativeExecution(false);
@@ -100,7 +108,7 @@ public class SolrIndexer extends Configu
public int run(String[] args) throws Exception {
if (args.length < 3) {
- System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb
<linkdb>] (<segment> ... | -dir <segments>) [-noCommit]");
+ System.err.println("Usage: SolrIndexer <solr url> <crawldb> [-linkdb
<linkdb>] [-params k1=v1&k2=v2...] (<segment> ... | -dir <segments>)
[-noCommit]");
return -1;
}
@@ -108,6 +116,7 @@ public class SolrIndexer extends Configu
Path linkDb = null;
final List<Path> segments = new ArrayList<Path>();
+ String params = null;
boolean noCommit = false;
@@ -126,13 +135,15 @@ public class SolrIndexer extends Configu
}
} else if (args[i].equals("-noCommit")) {
noCommit = true;
+ } else if (args[i].equals("-params")) {
+ params = args[++i];
} else {
segments.add(new Path(args[i]));
}
}
try {
- indexSolr(args[0], crawlDb, linkDb, segments, noCommit);
+ indexSolr(args[0], crawlDb, linkDb, segments, noCommit, params);
return 0;
} catch (final Exception e) {
LOG.error("SolrIndexer: " + StringUtils.stringifyException(e));
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1207217&r1=1207216&r2=1207217&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Mon Nov
28 14:26:48 2011
@@ -30,15 +30,18 @@ import org.apache.nutch.indexer.NutchFie
import org.apache.nutch.indexer.NutchIndexWriter;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.DateUtil;
public class SolrWriter implements NutchIndexWriter {
-
- public static Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
+
+ public static final Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
private SolrServer solr;
private SolrMappingReader solrMapping;
+ private ModifiableSolrParams params;
private final List<SolrInputDocument> inputDocs =
new ArrayList<SolrInputDocument>();
@@ -46,9 +49,28 @@ public class SolrWriter implements Nutch
private int commitSize;
public void open(JobConf job, String name) throws IOException {
- solr = SolrUtils.getCommonsHttpSolrServer(job);
+ SolrServer server = SolrUtils.getCommonsHttpSolrServer(job);
+ init(server, job);
+ }
+
+ // package protected for tests
+ void init(SolrServer server, JobConf job) throws IOException {
+ solr = server;
commitSize = job.getInt(SolrConstants.COMMIT_SIZE, 1000);
solrMapping = SolrMappingReader.getInstance(job);
+ // parse optional params
+ params = new ModifiableSolrParams();
+ String paramString = job.get(SolrConstants.PARAMS);
+ if (paramString != null) {
+ String[] values = paramString.split("&");
+ for (String v : values) {
+ String[] kv = v.split("=");
+ if (kv.length < 2) {
+ continue;
+ }
+ params.add(kv[0], kv[1]);
+ }
+ }
}
public void write(NutchDocument doc) throws IOException {
@@ -78,7 +100,10 @@ public class SolrWriter implements Nutch
if (inputDocs.size() >= commitSize) {
try {
LOG.info("Adding " + Integer.toString(inputDocs.size()) + "
documents");
- solr.add(inputDocs);
+ UpdateRequest req = new UpdateRequest();
+ req.add(inputDocs);
+ req.setParams(params);
+ req.process(solr);
} catch (final SolrServerException e) {
throw makeIOException(e);
}
@@ -90,7 +115,10 @@ public class SolrWriter implements Nutch
try {
if (!inputDocs.isEmpty()) {
LOG.info("Adding " + Integer.toString(inputDocs.size()) + "
documents");
- solr.add(inputDocs);
+ UpdateRequest req = new UpdateRequest();
+ req.add(inputDocs);
+ req.setParams(params);
+ req.process(solr);
inputDocs.clear();
}
// solr.commit();