[ 
https://issues.apache.org/jira/browse/NUTCH-2497?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16330845#comment-16330845
 ] 

ASF GitHub Bot commented on NUTCH-2497:
---------------------------------------

lewismc closed pull request #276: NUTCH-2497: Allow multiple hosts for Elastic 
REST Indexer
URL: https://github.com/apache/nutch/pull/276
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index f3f48a36a..f1fb2c476 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -2117,8 +2117,10 @@ visit 
https://wiki.apache.org/nutch/SimilarityScoringFilter-->
 <property>
     <name>elastic.rest.host</name>
     <value></value>
-    <description>The hostname to send documents to using Elasticsearch Jest. 
Both host
-        and port must be defined</description>
+    <description>
+      The hostname or a list of comma separated hostnames to send documents to
+      using Elasticsearch Jest. Both host and port must be defined.
+    </description>
 </property>
 
 <property>
diff --git 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
index 6371362e9..81eaf5067 100644
--- 
a/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
+++ 
b/src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
@@ -54,6 +54,7 @@
 import java.security.NoSuchAlgorithmException;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.HashSet;
@@ -80,7 +81,7 @@
 
   private Bulk.Builder bulkBuilder;
   private int port = -1;
-  private String host = null;
+  private String[] hosts = null;
   private Boolean https = null;
   private String user = null;
   private String password = null;
@@ -102,7 +103,7 @@
   @Override
   public void open(JobConf job, String name) throws IOException {
 
-    host = job.get(ElasticRestConstants.HOST);
+    hosts = job.getStrings(ElasticRestConstants.HOST);
     port = job.getInt(ElasticRestConstants.PORT, 9200);
     user = job.get(ElasticRestConstants.USER);
     password = job.get(ElasticRestConstants.PASSWORD);
@@ -139,24 +140,27 @@ public boolean isTrusted(X509Certificate[] arg0, String 
arg1) throws Certificate
     SchemeIOSessionStrategy httpsIOSessionStrategy = new 
SSLIOSessionStrategy(sslContext, hostnameVerifier);
 
     JestClientFactory jestClientFactory = new JestClientFactory();
-    URL urlOfElasticsearchNode = new URL(https ? "https" : "http", host, port, 
"");
-
-    if (host != null && port > 1) {
-      HttpClientConfig.Builder builder = new HttpClientConfig.Builder(
-          urlOfElasticsearchNode.toString()).multiThreaded(true)
-              .connTimeout(300000).readTimeout(300000);
-      if (https) {
-        if (user != null && password != null) {
-          builder.defaultCredentials(user, password);
-        }
-        builder.defaultSchemeForDiscoveredNodes("https")
-            .sslSocketFactory(sslSocketFactory) // this only affects sync calls
-            .httpsIOSessionStrategy(httpsIOSessionStrategy); // this only 
affects async calls
+
+    if (hosts == null || hosts.length == 0 || port <= 1) {
+      throw new IllegalStateException("No hosts or port specified. Please set 
the host and port in nutch-site.xml");
+    }
+
+    List<String> urlsOfElasticsearchNodes = new ArrayList<String>();
+    for (String host : hosts) {
+      urlsOfElasticsearchNodes.add(new URL(https ? "https" : "http", host, 
port, "").toString());
+    }
+    HttpClientConfig.Builder builder = new HttpClientConfig.Builder(
+            urlsOfElasticsearchNodes).multiThreaded(true)
+            .connTimeout(300000).readTimeout(300000);
+    if (https) {
+      if (user != null && password != null) {
+        builder.defaultCredentials(user, password);
       }
-      jestClientFactory.setHttpClientConfig(builder.build());
-    } else {
-      throw new IllegalStateException("No host or port specified. Please set 
the host and port in nutch-site.xml");
+      builder.defaultSchemeForDiscoveredNodes("https")
+          .sslSocketFactory(sslSocketFactory) // this only affects sync calls
+          .httpsIOSessionStrategy(httpsIOSessionStrategy); // this only 
affects async calls
     }
+    jestClientFactory.setHttpClientConfig(builder.build());
 
     client = jestClientFactory.getObject();
 
@@ -360,11 +364,11 @@ public String describe() {
   @Override
   public void setConf(Configuration conf) {
     config = conf;
-    String host = conf.get(ElasticRestConstants.HOST);
+    String[] hosts = conf.getStrings(ElasticRestConstants.HOST);
     String port = conf.get(ElasticRestConstants.PORT);
 
-    if (StringUtils.isBlank(host) && StringUtils.isBlank(port)) {
-      String message = "Missing elastic.rest.host and elastic.rest.port. At 
least one of them should be set in nutch-site.xml ";
+    if (hosts == null || hosts.length == 0 || StringUtils.isBlank(port)) {
+      String message = "No hosts or port specified. Please set the host and 
port in nutch-site.xml";
       message += "\n" + describe();
       LOG.error(message);
       throw new RuntimeException(message);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Elastic REST Indexer: Allow multiple hosts
> ------------------------------------------
>
>                 Key: NUTCH-2497
>                 URL: https://issues.apache.org/jira/browse/NUTCH-2497
>             Project: Nutch
>          Issue Type: Improvement
>            Reporter: Moreno Feltscher
>            Assignee: Moreno Feltscher
>            Priority: Major
>             Fix For: 1.15
>
>
> Allow specifying a list of Elasticsearch hosts to index documents to. This 
> would be especially helpful when working with a Elasticsearch cluster which 
> contains of multiple nodes.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to