This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 194fc37cb5aa2879b279014bbeaf3bd207af85fd
Author: Nicola Marcacci Rossi <[email protected]>
AuthorDate: Wed Dec 13 16:33:00 2017 +0100

    Extend indexer-elastic-rest to support languages
---
 .../elasticrest/ElasticRestConstants.java          |  4 +-
 .../elasticrest/ElasticRestIndexWriter.java        | 43 +++++++++++++++++-----
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestConstants.java
 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestConstants.java
index 322ff44..74f37eb 100644
--- 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestConstants.java
+++ 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestConstants.java
@@ -30,4 +30,6 @@ public interface ElasticRestConstants {
   public static final String TYPE = ELASTIC_PREFIX + "type";
   public static final String HTTPS = ELASTIC_PREFIX + "https";
   public static final String HOSTNAME_TRUST = ELASTIC_PREFIX + 
"trustallhostnames";
-}
\ No newline at end of file
+  
+  public static final String LANGUAGES = ELASTIC_PREFIX + "languages";
+}
diff --git 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
index 1364722..dc54058 100644
--- 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
+++ 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
@@ -32,7 +32,6 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.exception.ExceptionUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.http.HttpResponse;
 import org.apache.http.concurrent.BasicFuture;
 import org.apache.http.conn.ssl.DefaultHostnameVerifier;
 import org.apache.http.conn.ssl.NoopHostnameVerifier;
@@ -48,7 +47,6 @@ import org.slf4j.LoggerFactory;
 
 import javax.net.ssl.HostnameVerifier;
 import javax.net.ssl.SSLContext;
-import java.io.BufferedReader;
 import java.io.IOException;
 import java.net.URL;
 import java.security.KeyManagementException;
@@ -58,11 +56,9 @@ import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.MissingResourceException;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
 
 /**
  */
@@ -80,7 +76,6 @@ public class ElasticRestIndexWriter implements IndexWriter {
   private Configuration config;
 
   private Bulk.Builder bulkBuilder;
-  private Future<HttpResponse> execute;
   private int port = -1;
   private String host = null;
   private Boolean https = null;
@@ -96,6 +91,8 @@ public class ElasticRestIndexWriter implements IndexWriter {
   private boolean createNewBulk = false;
   private long millis;
   private BasicFuture<JestResult> basicFuture = null;
+  
+  private String[] languages = null;
 
   @Override
   public void open(JobConf job, String name) throws IOException {
@@ -106,6 +103,7 @@ public class ElasticRestIndexWriter implements IndexWriter {
     password = job.get(ElasticRestConstants.PASSWORD);
     https = job.getBoolean(ElasticRestConstants.HTTPS, false);
     trustAllHostnames = job.getBoolean(ElasticRestConstants.HOSTNAME_TRUST, 
false);
+    languages = job.getStrings(ElasticRestConstants.LANGUAGES);
 
     // trust ALL certificates
     SSLContext sslContext = null;
@@ -195,7 +193,26 @@ public class ElasticRestIndexWriter implements IndexWriter 
{
         bulkLength += fieldValues[0].length();
       }
     }
-    Index indexRequest = new Index.Builder(source).index(defaultIndex)
+    
+    String index;
+    if (languages != null && languages.length > 0) {
+      String language = (String) doc.getFieldValue("lang");
+      boolean exists = false;
+      for (String lang : languages) {
+        if (lang.equals(language)) {
+          exists = true;
+          break;
+        }
+      }
+      if (exists) {
+        index = defaultIndex + "_" + language;
+      } else {
+        index = defaultIndex + "_others";
+      }
+    } else {
+      index = defaultIndex;
+    }
+    Index indexRequest = new Index.Builder(source).index(index)
         .type(type).id(id).build();
 
     // Add this indexing request to a bulk request
@@ -217,13 +234,21 @@ public class ElasticRestIndexWriter implements 
IndexWriter {
   @Override
   public void delete(String key) throws IOException {
     try {
-      client.execute(new Delete.Builder(key).index(defaultIndex)
-          .type(defaultType).build());
+      if (languages != null && languages.length > 0) {
+       Bulk.Builder bulkBuilder = new Bulk.Builder().defaultType(defaultType);
+       for (String lang : languages) {                   
+         bulkBuilder.addAction(new Delete.Builder(key).index(defaultIndex + 
"_" + lang).build());
+       }
+       bulkBuilder.addAction(new Delete.Builder(key).index(defaultIndex + 
"_others").build());
+       client.execute(bulkBuilder.build());
+      } else {
+       client.execute(new Delete.Builder(key).index(defaultIndex)
+           .type(defaultType).build());
+      }
     } catch (IOException e) {
       LOG.error(ExceptionUtils.getStackTrace(e));
       throw e;
     }
-
   }
 
   @Override

-- 
To stop receiving notification emails like this one, please contact
"[email protected]" <[email protected]>.

Reply via email to