This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 71fabb2a87ff81b78997133ab7c790afa4ea6157
Author: tallison <talli...@apache.org>
AuthorDate: Wed Mar 1 13:48:57 2023 -0500

    NUTCH-2920 -- improve username/pw logic and update README.md
---
 src/plugin/indexer-opensearch-1x/README.md         | 24 +++++++++++++++++++++-
 .../opensearch1x/OpenSearch1xIndexWriter.java      | 10 ++++++---
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/plugin/indexer-opensearch-1x/README.md 
b/src/plugin/indexer-opensearch-1x/README.md
index b68557fae..52e5844af 100644
--- a/src/plugin/indexer-opensearch-1x/README.md
+++ b/src/plugin/indexer-opensearch-1x/README.md
@@ -36,9 +36,31 @@ scheme | The scheme (http or https) to connect to OpenSearch 
server. | https
 index | Default index to send documents to. | nutch
 username | Username for auth credentials | admin
 password | Password for auth credentials | admin
-auth | Whether to enable HTTP basic authentication with OpenSearch. Use 
`username` and `password` properties to configure your credentials. | false
+trust.store.path | Path to the trust store |
+trust.store.password | Password for trust store |
+trust.store.type | Type of trust store | JKS
+key.store.path | Path to the key store |
+key.store.password | Password for the key and the key store |
+key.store.type | Type of key store | JKS
 max.bulk.docs | Maximum size of the bulk in number of documents. | 250
 max.bulk.size | Maximum size of the bulk in bytes. | 2500500
 exponential.backoff.millis | Initial delay for the 
[BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html)
 exponential backoff policy. | 100
 exponential.backoff.retries | Number of times the 
[BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html)
 exponential backoff policy should retry bulk operations. | 10
 bulk.close.timeout | Number of seconds allowed for the 
[BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html)
 to complete its last operation. | 600
+
+## Authentication and SSL/TLS
+
+It is highly recommended that users use at least basic authentication (modify 
the `username` and `password`!!!) and that they set up at least the trust store 
(1-way TLS).
+For a "getting started" level introduction to setting up a trust store, see: 
[Connecting 
java-high-level-rest-client](https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/).
+For a more in depth treatment, see: [Configuring TLS 
certificates](https://opensearch.org/docs/latest/security/configuration/tls/).
+
+Users may opt for 2-way TLS and skip basic authentication (`username` and 
`password`).  
+To do this, specify both the `trust.store.*` parameters and the `key.store.*` 
parameters.
+
+If users do not specify at least 1-way TLS (trust-store), this indexer logs a 
warning that this is a bad idea(TM), and it will proceed by completely ignoring 
all the SSL security.
+
+## Design
+This index writer was built to be as close as possible to Nutch's existing 
indexer-elastic code. We
+therefore chose to use the to-be-deprecated-in-3.x 
`opensearch-rest-high-level-client`.
+We should plan to migrate to the `java client` for 2.x, whenever the 
BulkProcessor has been added.
+See the discussion on 
[NUTCH-2920](https://issues.apache.org/jira/projects/NUTCH/issues/NUTCH-2920).
\ No newline at end of file
diff --git 
a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java
 
b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java
index ec516e250..878c55a09 100644
--- 
a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java
+++ 
b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java
@@ -194,6 +194,10 @@ public class OpenSearch1xIndexWriter implements 
IndexWriter {
     keyStorePath = parameters.get(OpenSearch1xConstants.KEY_STORE_PATH);
     keyStorePassword = 
parameters.get(OpenSearch1xConstants.KEY_STORE_PASSWORD);
     keyStoreType = parameters.get(OpenSearch1xConstants.KEY_STORE_TYPE, "JKS");
+
+    if (! StringUtils.isAllBlank(user) && password == null) {
+      throw new IllegalArgumentException("Must specify a password, even if 
empty, if a 'user' is specified.");
+    }
     boolean basicAuth = user != null && password != null;
 
     final CredentialsProvider credentialsProvider = new 
BasicCredentialsProvider();
@@ -262,9 +266,9 @@ public class OpenSearch1xIndexWriter implements IndexWriter 
{
       sslBuilder.loadTrustMaterial(trustStore.get(), null);
     } else {
       LOG.warn("You haven't set up a trust store. We're effectively turning 
off " +
-          " tls.  This is 'Not a good idea'(tm). See a getting started guide: 
" +
-          
"https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/
 "+
-          " or in more depth: 
https://opensearch.org/docs/latest/security/configuration/tls/";);
+          " tls.  This is a 'Bad Idea'(TM). See a getting started guide: '" +
+          
"https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/'
 "+
+          " or in more depth: 
'https://opensearch.org/docs/latest/security/configuration/tls/'");
       sslBuilder.loadTrustMaterial(null, new TrustSelfSignedStrategy());
     }
 

Reply via email to