[ 
https://issues.apache.org/jira/browse/NUTCH-2296?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15419766#comment-15419766
 ] 

ASF GitHub Bot commented on NUTCH-2296:
---------------------------------------

Github user lewismc commented on a diff in the pull request:

    https://github.com/apache/nutch/pull/139#discussion_r74679042
  
    --- Diff: 
src/plugin/indexer-elastic-rest/src/java/org/apache/nutch/indexwriter/elasticrest/ElasticRestIndexWriter.java
 ---
    @@ -0,0 +1,336 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +//TODO trust self signed and non matching certs: 
http://stackoverflow.com/questions/2893819/telling-java-to-accept-self-signed-ssl-certificate
    +//TODO refactor the dependencies out of root ivy file
    +
    +package org.apache.nutch.indexwriter.elasticrest;
    +
    +import io.searchbox.client.JestClient;
    +import io.searchbox.client.JestClientFactory;
    +import io.searchbox.client.JestResult;
    +import io.searchbox.client.JestResultHandler;
    +import io.searchbox.client.config.HttpClientConfig;
    +import io.searchbox.core.Bulk;
    +import io.searchbox.core.BulkResult;
    +import io.searchbox.core.Delete;
    +import io.searchbox.core.Index;
    +import org.apache.commons.lang.StringUtils;
    +import org.apache.commons.lang3.exception.ExceptionUtils;
    +import org.apache.hadoop.conf.Configuration;
    +import org.apache.hadoop.mapred.JobConf;
    +import org.apache.http.HttpResponse;
    +import org.apache.http.concurrent.BasicFuture;
    +import org.apache.http.conn.ssl.DefaultHostnameVerifier;
    +import org.apache.http.conn.ssl.NoopHostnameVerifier;
    +import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
    +import org.apache.http.nio.conn.SchemeIOSessionStrategy;
    +import org.apache.http.nio.conn.ssl.SSLIOSessionStrategy;
    +import org.apache.http.ssl.SSLContextBuilder;
    +import org.apache.http.ssl.TrustStrategy;
    +import org.apache.nutch.indexer.IndexWriter;
    +import org.apache.nutch.indexer.NutchDocument;
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import javax.net.ssl.HostnameVerifier;
    +import javax.net.ssl.SSLContext;
    +import java.io.BufferedReader;
    +import java.io.IOException;
    +import java.net.URL;
    +import java.security.KeyManagementException;
    +import java.security.KeyStoreException;
    +import java.security.NoSuchAlgorithmException;
    +import java.security.cert.CertificateException;
    +import java.security.cert.X509Certificate;
    +import java.util.HashMap;
    +import java.util.Map;
    +import java.util.concurrent.ExecutionException;
    +import java.util.concurrent.Future;
    +
    +
    +/**
    + */
    +public class ElasticRestIndexWriter implements IndexWriter {
    +    public static Logger LOG = 
LoggerFactory.getLogger(ElasticRestIndexWriter.class);
    +
    +    private static final int DEFAULT_MAX_BULK_DOCS = 250;
    +    private static final int DEFAULT_MAX_BULK_LENGTH = 2500500;
    +
    +    private JestClient client;
    +    private String defaultIndex;
    +    private String defaultType = null;
    +
    +    private Configuration config;
    +
    +    private Bulk.Builder bulkBuilder;
    +    private Future<HttpResponse> execute;
    +    private int port = -1;
    +    private String host = null;
    +    private String user = null;
    +    private Boolean https = null;
    +    private String password = null;
    +    private Boolean trustAllHostnames = null;
    +
    +    private int maxBulkDocs;
    +    private int maxBulkLength;
    +    private long indexedDocs = 0;
    +    private int bulkDocs = 0;
    +    private int bulkLength = 0;
    +    private boolean createNewBulk = false;
    +    private long millis;
    +    private BasicFuture<JestResult> basicFuture = null;
    +
    +    @Override
    +    public void open(JobConf job, String name) throws IOException {
    +
    +        host = job.get(ElasticRestConstants.HOST);
    +        port = job.getInt(ElasticRestConstants.PORT, 9200);
    +        user = job.get(ElasticRestConstants.USER);
    +        password = job.get(ElasticRestConstants.PASSWORD);
    +        https = job.getBoolean(ElasticRestConstants.HTTPS, false);
    +        trustAllHostnames = 
job.getBoolean(ElasticRestConstants.HOSTNAME_TRUST, false);
    +
    +        // trust ALL certificates
    +        SSLContext sslContext = null;
    +        try {
    +            sslContext = new SSLContextBuilder().loadTrustMaterial(new 
TrustStrategy() {
    +                public boolean isTrusted(X509Certificate[] arg0, String 
arg1) throws CertificateException {
    +                    return true;
    +                }
    +            }).build();
    +        } catch (NoSuchAlgorithmException | KeyManagementException | 
KeyStoreException e) {
    +            e.printStackTrace();
    --- End diff --
    
    Can you actually LOG the stacktrace instead of printing it?
    LOG.error("Error detected whilst... blah blah blah", e).
    Thank you


> Elasticsearch Indexing Over Rest
> --------------------------------
>
>                 Key: NUTCH-2296
>                 URL: https://issues.apache.org/jira/browse/NUTCH-2296
>             Project: Nutch
>          Issue Type: Improvement
>          Components: indexer
>            Reporter: Brian Zhao
>            Priority: Minor
>
> Open Elasticsearch to the option of REST-based indexing, via another indexing 
> plugin implemeted using Jest, potentially allowing the use of https.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to