plugin...

lewismc Wed, 15 Jan 2014 04:02:50 -0800

Added: 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java?rev=1558349&view=auto
==============================================================================
--- 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
 (added)
+++ 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrMappingReader.java
 Wed Jan 15 12:01:45 2014
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.util.HashMap;
+import java.util.Map;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.ObjectCache;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public class SolrMappingReader {
+  public static Logger LOG = LoggerFactory.getLogger(SolrMappingReader.class);
+  
+  private Configuration conf;
+  
+  private Map<String, String> keyMap = new HashMap<String, String>();
+  private Map<String, String> copyMap = new HashMap<String, String>();
+  private String uniqueKey = "id";
+  
+  public static synchronized SolrMappingReader getInstance(Configuration conf) 
{
+    ObjectCache cache = ObjectCache.get(conf);
+    SolrMappingReader instance = 
(SolrMappingReader)cache.getObject(SolrMappingReader.class.getName());
+    if (instance == null) {
+      instance = new SolrMappingReader(conf);
+      cache.setObject(SolrMappingReader.class.getName(), instance);
+    }
+    return instance;
+  }
+
+  protected SolrMappingReader(Configuration conf) {
+    this.conf = conf;
+    parseMapping();
+  }
+
+  private void parseMapping() {    
+    InputStream ssInputStream = null;
+    ssInputStream = 
conf.getConfResourceAsInputStream(conf.get(SolrConstants.MAPPING_FILE, 
"solrindex-mapping.xml"));
+
+    InputSource inputSource = new InputSource(ssInputStream);
+    try {
+      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+      DocumentBuilder builder = factory.newDocumentBuilder();
+      Document document = builder.parse(inputSource);
+      Element rootElement = document.getDocumentElement();
+      NodeList fieldList = rootElement.getElementsByTagName("field");
+      if (fieldList.getLength() > 0) {
+        for (int i = 0; i < fieldList.getLength(); i++) {
+          Element element = (Element) fieldList.item(i);
+          LOG.info("source: " + element.getAttribute("source") + " dest: " + 
element.getAttribute("dest"));
+          keyMap.put(element.getAttribute("source"), 
element.getAttribute("dest"));
+        }
+      }
+      NodeList copyFieldList = rootElement.getElementsByTagName("copyField");
+      if (copyFieldList.getLength() > 0) {
+        for (int i = 0; i < copyFieldList.getLength(); i++) {
+          Element element = (Element) copyFieldList.item(i);
+          LOG.info("source: " + element.getAttribute("source") + " dest: " + 
element.getAttribute("dest"));
+          copyMap.put(element.getAttribute("source"), 
element.getAttribute("dest"));
+        }
+      }
+      NodeList uniqueKeyItem = rootElement.getElementsByTagName("uniqueKey");
+      if (uniqueKeyItem.getLength() > 1) {
+        LOG.warn("More than one unique key definitions found in solr index 
mapping, using default 'id'");
+        uniqueKey = "id";
+      }
+      else if (uniqueKeyItem.getLength() == 0) {
+        LOG.warn("No unique key definition found in solr index mapping using, 
default 'id'");
+      }
+      else{
+         uniqueKey = uniqueKeyItem.item(0).getFirstChild().getNodeValue();
+      }
+    } catch (MalformedURLException e) {
+        LOG.warn(e.toString());
+    } catch (SAXException e) {
+        LOG.warn(e.toString());
+    } catch (IOException e) {
+       LOG.warn(e.toString());
+    } catch (ParserConfigurationException e) {
+       LOG.warn(e.toString());
+    } 
+  }
+         
+  public Map<String, String> getKeyMap() {
+    return keyMap;
+  }
+         
+  public Map<String, String> getCopyMap() {
+    return copyMap;
+  }
+         
+  public String getUniqueKey() {
+    return uniqueKey;
+  }
+
+  public String hasCopy(String key) {
+    if (copyMap.containsKey(key)) {
+      key = copyMap.get(key);
+    }
+    return key;
+  }
+
+  public String mapKey(String key) throws IOException {
+    if(keyMap.containsKey(key)) {
+      key = keyMap.get(key);
+    }
+    return key;
+  }
+
+  public String mapCopyKey(String key) throws IOException {
+    if(copyMap.containsKey(key)) {
+      key = copyMap.get(key);
+    }
+    return key;
+  }
+}


Added: 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java?rev=1558349&view=auto
==============================================================================
--- 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
 (added)
+++ 
nutch/branches/2.x/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
 Wed Jan 15 12:01:45 2014
@@ -0,0 +1,62 @@
+package org.apache.nutch.indexwriter.solr;
+
+
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.params.HttpClientParams;
+import org.apache.http.params.HttpParams;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+
+import java.net.MalformedURLException;
+
+public class SolrUtils {
+
+  public static Logger LOG = LoggerFactory.getLogger(SolrUtils.class);
+
+  public static HttpSolrServer getHttpSolrServer(Configuration job) throws 
MalformedURLException {
+    DefaultHttpClient client = new DefaultHttpClient();
+
+    // Check for username/password
+    if (job.getBoolean(SolrConstants.USE_AUTH, false)) {
+      String username = job.get(SolrConstants.USERNAME);
+
+      LOG.info("Authenticating as: " + username);
+
+      AuthScope scope = new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, 
AuthScope.ANY_REALM, AuthScope.ANY_SCHEME);
+
+      client.getCredentialsProvider().setCredentials(scope, new 
UsernamePasswordCredentials(username, job.get(SolrConstants.PASSWORD)));
+      
+      HttpParams params = client.getParams();
+      HttpClientParams.setAuthenticating(params, true);
+
+      client.setParams(params);
+    }
+
+    return new HttpSolrServer(job.get(SolrConstants.SERVER_URL), client);
+  }
+
+  public static String stripNonCharCodepoints(String input) {
+    StringBuilder retval = new StringBuilder();
+    char ch;
+
+    for (int i = 0; i < input.length(); i++) {
+      ch = input.charAt(i);
+
+      // Strip all non-characters 
http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Noncharacter_Code_Point=True:]
+      // and non-printable control characters except tabulator, new line and 
carriage return
+      if (ch % 0x10000 != 0xffff && // 0xffff - 0x10ffff range step 0x10000
+              ch % 0x10000 != 0xfffe && // 0xfffe - 0x10fffe range
+              (ch <= 0xfdd0 || ch >= 0xfdef) && // 0xfdd0 - 0xfdef
+              (ch > 0x1F || ch == 0x9 || ch == 0xa || ch == 0xd)) {
+
+        retval.append(ch);
+      }
+    }
+
+    return retval.toString();
+  }
+}
\ No newline at end of file

Modified: nutch/branches/2.x/src/plugin/nutch-extensionpoints/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/nutch-extensionpoints/plugin.xml?rev=1558349&r1=1558348&r2=1558349&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ nutch/branches/2.x/src/plugin/nutch-extensionpoints/plugin.xml Wed Jan 15 
12:01:45 2014
@@ -56,4 +56,7 @@
       id="org.apache.nutch.scoring.ScoringFilter"
       name="Nutch Scoring"/>
 
+<extension-point
+      id="org.apache.nutch.indexer.IndexWriter"
+      name="Nutch Index Writer"/>
 </plugin>

svn commit: r1558349 [2/2] - in /nutch/branches/2.x: ./ conf/ ivy/ src/bin/ src/java/org/apache/nutch/api/impl/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/indexer/elastic/ src/java/org/apache/nutch/indexer/solr/ src/plugin/ src/plugin...

Reply via email to