This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 71fabb2a87ff81b78997133ab7c790afa4ea6157 Author: tallison <talli...@apache.org> AuthorDate: Wed Mar 1 13:48:57 2023 -0500 NUTCH-2920 -- improve username/pw logic and update README.md --- src/plugin/indexer-opensearch-1x/README.md | 24 +++++++++++++++++++++- .../opensearch1x/OpenSearch1xIndexWriter.java | 10 ++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/plugin/indexer-opensearch-1x/README.md b/src/plugin/indexer-opensearch-1x/README.md index b68557fae..52e5844af 100644 --- a/src/plugin/indexer-opensearch-1x/README.md +++ b/src/plugin/indexer-opensearch-1x/README.md @@ -36,9 +36,31 @@ scheme | The scheme (http or https) to connect to OpenSearch server. | https index | Default index to send documents to. | nutch username | Username for auth credentials | admin password | Password for auth credentials | admin -auth | Whether to enable HTTP basic authentication with OpenSearch. Use `username` and `password` properties to configure your credentials. | false +trust.store.path | Path to the trust store | +trust.store.password | Password for trust store | +trust.store.type | Type of trust store | JKS +key.store.path | Path to the key store | +key.store.password | Password for the key and the key store | +key.store.type | Type of key store | JKS max.bulk.docs | Maximum size of the bulk in number of documents. | 250 max.bulk.size | Maximum size of the bulk in bytes. | 2500500 exponential.backoff.millis | Initial delay for the [BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html) exponential backoff policy. | 100 exponential.backoff.retries | Number of times the [BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html) exponential backoff policy should retry bulk operations. | 10 bulk.close.timeout | Number of seconds allowed for the [BulkProcessor](https://static.javadoc.io/org.opensearch/opensearch/1.3.8/org/opensearch/action/bulk/BulkProcessor.html) to complete its last operation. | 600 + +## Authentication and SSL/TLS + +It is highly recommended that users use at least basic authentication (modify the `username` and `password`!!!) and that they set up at least the trust store (1-way TLS). +For a "getting started" level introduction to setting up a trust store, see: [Connecting java-high-level-rest-client](https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/). +For a more in depth treatment, see: [Configuring TLS certificates](https://opensearch.org/docs/latest/security/configuration/tls/). + +Users may opt for 2-way TLS and skip basic authentication (`username` and `password`). +To do this, specify both the `trust.store.*` parameters and the `key.store.*` parameters. + +If users do not specify at least 1-way TLS (trust-store), this indexer logs a warning that this is a bad idea(TM), and it will proceed by completely ignoring all the SSL security. + +## Design +This index writer was built to be as close as possible to Nutch's existing indexer-elastic code. We +therefore chose to use the to-be-deprecated-in-3.x `opensearch-rest-high-level-client`. +We should plan to migrate to the `java client` for 2.x, whenever the BulkProcessor has been added. +See the discussion on [NUTCH-2920](https://issues.apache.org/jira/projects/NUTCH/issues/NUTCH-2920). \ No newline at end of file diff --git a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java index ec516e250..878c55a09 100644 --- a/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java +++ b/src/plugin/indexer-opensearch-1x/src/java/org/apache/nutch/indexwriter/opensearch1x/OpenSearch1xIndexWriter.java @@ -194,6 +194,10 @@ public class OpenSearch1xIndexWriter implements IndexWriter { keyStorePath = parameters.get(OpenSearch1xConstants.KEY_STORE_PATH); keyStorePassword = parameters.get(OpenSearch1xConstants.KEY_STORE_PASSWORD); keyStoreType = parameters.get(OpenSearch1xConstants.KEY_STORE_TYPE, "JKS"); + + if (! StringUtils.isAllBlank(user) && password == null) { + throw new IllegalArgumentException("Must specify a password, even if empty, if a 'user' is specified."); + } boolean basicAuth = user != null && password != null; final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); @@ -262,9 +266,9 @@ public class OpenSearch1xIndexWriter implements IndexWriter { sslBuilder.loadTrustMaterial(trustStore.get(), null); } else { LOG.warn("You haven't set up a trust store. We're effectively turning off " + - " tls. This is 'Not a good idea'(tm). See a getting started guide: " + - "https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/ "+ - " or in more depth: https://opensearch.org/docs/latest/security/configuration/tls/"); + " tls. This is a 'Bad Idea'(TM). See a getting started guide: '" + + "https://opensearch.org/blog/connecting-java-high-level-rest-client-with-opensearch-over-https/' "+ + " or in more depth: 'https://opensearch.org/docs/latest/security/configuration/tls/'"); sslBuilder.loadTrustMaterial(null, new TrustSelfSignedStrategy()); }