Author: markus
Date: Tue Nov  6 09:16:46 2012
New Revision: 1406076

URL: http://svn.apache.org/viewvc?rev=1406076&view=rev
Log:
NUTCH-1491 Strip UTF-8 non-character codepoints in title

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1406076&r1=1406075&r2=1406076&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Nov  6 09:16:46 2012
@@ -2,6 +2,10 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1491 Strip UTF-8 non-character codepoints in title (Nathan Gass via 
markus)
+
+* NUTCH-1480 SolrIndexer to write to multiple server (markus)
+
 * NUTCH-1421 RegexURLNormalizer to only skip rules with invalid patterns 
(snagel)
 
 * NUTCH-1341 NotModified time set to now but page not modified (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=1406076&r1=1406075&r2=1406076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Tue Nov  
6 09:16:46 2012
@@ -37,7 +37,7 @@ import org.apache.solr.common.params.Mod
 import org.apache.solr.common.util.DateUtil;
 
 public class SolrWriter implements NutchIndexWriter {
-  
+
   public static final Logger LOG = LoggerFactory.getLogger(SolrWriter.class);
 
   private SolrServer solr;
@@ -55,7 +55,7 @@ public class SolrWriter implements Nutch
     SolrServer server = SolrUtils.getCommonsHttpSolrServer(job);
     init(server, job);
   }
-  
+
   // package protected for tests
   void init(SolrServer server, JobConf job) throws IOException {
     solr = server;
@@ -99,14 +99,14 @@ public class SolrWriter implements Nutch
           val2 = DateUtil.getThreadLocalDateFormat().format(val);
         }
 
-        if (e.getKey().equals("content")) {
+        if (e.getKey().equals("content") || e.getKey().equals("title")) {
           val2 = SolrUtils.stripNonCharCodepoints((String)val);
         }
 
         inputDoc.addField(solrMapping.mapKey(e.getKey()), val2, 
e.getValue().getWeight());
         String sCopy = solrMapping.mapCopyKey(e.getKey());
         if (sCopy != e.getKey()) {
-               inputDoc.addField(sCopy, val);  
+               inputDoc.addField(sCopy, val);
         }
       }
     }


Reply via email to