Author: markus
Date: Tue Nov 6 09:16:46 2012
New Revision: 1406076
URL: http://svn.apache.org/viewvc?rev=1406076&view=rev
Log:
NUTCH-1491 Strip UTF-8 non-character codepoints in title
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1406076&r1=1406075&r2=1406076&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Nov 6 09:16:46 2012
@@ -2,6 +2,10 @@ Nutch Change Log
(trunk) Current Development:
+* NUTCH-1491 Strip UTF-8 non-character codepoints in title (Nathan Gass via
markus)
+
+* NUTCH-1480 SolrIndexer to write to multiple server (markus)
+
* NUTCH-1421 RegexURLNormalizer to only skip rules with invalid patterns
(snagel)
* NUTCH-1341 NotModified time set to now but page not modified (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1406076&r1=1406075&r2=1406076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Tue Nov
6 09:16:46 2012
@@ -37,7 +37,7 @@ import org.apache.solr.common.params.Mod
import org.apache.solr.common.util.DateUtil;
public class SolrWriter implements NutchIndexWriter {
-
+
public static final Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
private SolrServer solr;
@@ -55,7 +55,7 @@ public class SolrWriter implements Nutch
SolrServer server = SolrUtils.getCommonsHttpSolrServer(job);
init(server, job);
}
-
+
// package protected for tests
void init(SolrServer server, JobConf job) throws IOException {
solr = server;
@@ -99,14 +99,14 @@ public class SolrWriter implements Nutch
val2 = DateUtil.getThreadLocalDateFormat().format(val);
}
- if (e.getKey().equals("content")) {
+ if (e.getKey().equals("content") || e.getKey().equals("title")) {
val2 = SolrUtils.stripNonCharCodepoints((String)val);
}
inputDoc.addField(solrMapping.mapKey(e.getKey()), val2,
e.getValue().getWeight());
String sCopy = solrMapping.mapCopyKey(e.getKey());
if (sCopy != e.getKey()) {
- inputDoc.addField(sCopy, val);
+ inputDoc.addField(sCopy, val);
}
}
}