[
https://issues.apache.org/jira/browse/NUTCH-2296?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15419766#comment-15419766
]
ASF GitHub Bot commented on NUTCH-2296:
---------------------------------------
Github user lewismc commented on a diff in the pull request:
https://github.com/apache/nutch/pull/139#discussion_r74679042
--- Diff:
src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
---
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//TODO trust self signed and non matching certs:
http://stackoverflow.com/questions/2893819/telling-java-to-accept-self-signed-ssl-certificate
+//TODO refactor the dependencies out of root ivy file
+
+package org.apache.nutch.indexwriter.elasticrest;
+
+import io.searchbox.client.JestClient;
+import io.searchbox.client.JestClientFactory;
+import io.searchbox.client.JestResult;
+import io.searchbox.client.JestResultHandler;
+import io.searchbox.client.config.HttpClientConfig;
+import io.searchbox.core.Bulk;
+import io.searchbox.core.BulkResult;
+import io.searchbox.core.Delete;
+import io.searchbox.core.Index;
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.http.HttpResponse;
+import org.apache.http.concurrent.BasicFuture;
+import org.apache.http.conn.ssl.DefaultHostnameVerifier;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
+import org.apache.http.nio.conn.SchemeIOSessionStrategy;
+import org.apache.http.nio.conn.ssl.SSLIOSessionStrategy;
+import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.http.ssl.TrustStrategy;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.NutchDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.net.ssl.HostnameVerifier;
+import javax.net.ssl.SSLContext;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.URL;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+
+/**
+ */
+public class ElasticRestIndexWriter implements IndexWriter {
+ public static Logger LOG =
LoggerFactory.getLogger(ElasticRestIndexWriter.class);
+
+ private static final int DEFAULT_MAX_BULK_DOCS = 250;
+ private static final int DEFAULT_MAX_BULK_LENGTH = 2500500;
+
+ private JestClient client;
+ private String defaultIndex;
+ private String defaultType = null;
+
+ private Configuration config;
+
+ private Bulk.Builder bulkBuilder;
+ private Future<HttpResponse> execute;
+ private int port = -1;
+ private String host = null;
+ private String user = null;
+ private Boolean https = null;
+ private String password = null;
+ private Boolean trustAllHostnames = null;
+
+ private int maxBulkDocs;
+ private int maxBulkLength;
+ private long indexedDocs = 0;
+ private int bulkDocs = 0;
+ private int bulkLength = 0;
+ private boolean createNewBulk = false;
+ private long millis;
+ private BasicFuture<JestResult> basicFuture = null;
+
+ @Override
+ public void open(JobConf job, String name) throws IOException {
+
+ host = job.get(ElasticRestConstants.HOST);
+ port = job.getInt(ElasticRestConstants.PORT, 9200);
+ user = job.get(ElasticRestConstants.USER);
+ password = job.get(ElasticRestConstants.PASSWORD);
+ https = job.getBoolean(ElasticRestConstants.HTTPS, false);
+ trustAllHostnames =
job.getBoolean(ElasticRestConstants.HOSTNAME_TRUST, false);
+
+ // trust ALL certificates
+ SSLContext sslContext = null;
+ try {
+ sslContext = new SSLContextBuilder().loadTrustMaterial(new
TrustStrategy() {
+ public boolean isTrusted(X509Certificate[] arg0, String
arg1) throws CertificateException {
+ return true;
+ }
+ }).build();
+ } catch (NoSuchAlgorithmException | KeyManagementException |
KeyStoreException e) {
+ e.printStackTrace();
--- End diff --
Can you actually LOG the stacktrace instead of printing it?
LOG.error("Error detected whilst... blah blah blah", e).
Thank you
> Elasticsearch Indexing Over Rest
> --------------------------------
>
> Key: NUTCH-2296
> URL: https://issues.apache.org/jira/browse/NUTCH-2296
> Project: Nutch
> Issue Type: Improvement
> Components: indexer
> Reporter: Brian Zhao
> Priority: Minor
>
> Open Elasticsearch to the option of REST-based indexing, via another indexing
> plugin implemeted using Jest, potentially allowing the use of https.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)