This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 9109bdd74 NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763) 9109bdd74 is described below commit 9109bdd740ba578fcffff17745ebc9f53f464667 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Tue Jun 6 14:51:20 2023 +0200 NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763) NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (patch contributed by Marcos Gomez) - adds params auth.header.name and auth.header.value for JWT Authentication with Bearer Tokens sent via the HTTP Authorization header connections - also document basic authentication and improve error message when reading the configuration fails --- conf/index-writers.xml.template | 19 ++++- .../org/apache/nutch/indexer/IndexWriters.java | 2 +- .../nutch/indexwriter/solr/SolrConstants.java | 4 + .../nutch/indexwriter/solr/SolrIndexWriter.java | 47 ++++++++--- .../apache/nutch/indexwriter/solr/SolrUtils.java | 94 +++++++++++++++++++++- 5 files changed, 153 insertions(+), 13 deletions(-) diff --git a/conf/index-writers.xml.template b/conf/index-writers.xml.template index 549ebd4c9..6ed341cb7 100644 --- a/conf/index-writers.xml.template +++ b/conf/index-writers.xml.template @@ -26,9 +26,24 @@ <param name="collection" value=""/> <param name="weight.field" value=""/> <param name="commitSize" value="1000"/> + <!-- enable authentication. In addition, set username and + password for basic authentication, or pass the Bearer + token via Authentication header. See below. + --> <param name="auth" value="false"/> - <param name="username" value="username"/> - <param name="password" value="password"/> + <!-- username and password for basic authentication --> + <param name="username" value=""/> + <param name="password" value=""/> + <!-- Name for Authorization HTTP header + <param name="auth.header.name" value="Bearer"/> + HTTP header -> Authorization: Bearer 1234567890 + --> + <param name="auth.header.name" value=""/> + <!-- Value for Authorization HTTP header + <param name="auth.header.value" value="1234567890"/> + HTTP header -> Authorization: Bearer 1234567890 + --> + <param name="auth.header.value" value=""/> </parameters> <mapping> <copy> diff --git a/src/java/org/apache/nutch/indexer/IndexWriters.java b/src/java/org/apache/nutch/indexer/IndexWriters.java index a8ab0ec9c..f8ae8ee86 100644 --- a/src/java/org/apache/nutch/indexer/IndexWriters.java +++ b/src/java/org/apache/nutch/indexer/IndexWriters.java @@ -137,7 +137,7 @@ public class IndexWriters { return indexWriterConfigs; } catch (SAXException | IOException | ParserConfigurationException e) { - LOG.error(e.toString()); + LOG.error("Failed to read index writers configuration: {}", e.getMessage()); return new IndexWriterConfig[0]; } } diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java index 302ed75ed..ee6d5d623 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java @@ -34,4 +34,8 @@ public interface SolrConstants { String PASSWORD = "password"; + String AUTH_HEADER_NAME = "auth.header.name"; + + String AUTH_HEADER_VALUE = "auth.header.value"; + } diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java index 12d3ff6b7..ec2ab46d2 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java @@ -16,8 +16,8 @@ */ package org.apache.nutch.indexwriter.solr; -import java.lang.invoke.MethodHandles; import java.io.IOException; +import java.lang.invoke.MethodHandles; import java.time.format.DateTimeFormatter; import java.util.AbstractMap; import java.util.ArrayList; @@ -72,6 +72,8 @@ public class SolrIndexWriter implements IndexWriter { private boolean auth; private String username; private String password; + private String authHeaderName; + private String authHeaderValue; @Override public void open(Configuration conf, String name) { @@ -99,20 +101,40 @@ public class SolrIndexWriter implements IndexWriter { this.auth = parameters.getBoolean(SolrConstants.USE_AUTH, false); this.username = parameters.get(SolrConstants.USERNAME); this.password = parameters.get(SolrConstants.PASSWORD); + this.authHeaderName = parameters.get(SolrConstants.AUTH_HEADER_NAME, ""); + this.authHeaderValue = parameters.get(SolrConstants.AUTH_HEADER_VALUE, ""); this.solrClients = new ArrayList<>(); switch (type) { case "http": for (String url : urls) { - solrClients.add(SolrUtils.getHttpSolrClient(url)); + if (this.auth && !StringUtil.isEmpty(this.authHeaderName) + && !StringUtil.isEmpty(this.authHeaderValue)) { + solrClients.add(SolrUtils.getHttpSolrClientHeaderAuthorization(url, + this.authHeaderName, this.authHeaderValue)); + } else if (this.auth && !StringUtil.isEmpty(this.username) + && !StringUtil.isEmpty(this.password)) { + solrClients.add( + SolrUtils.getHttpSolrClient(url, this.username, this.password)); + } else { + solrClients.add(SolrUtils.getHttpSolrClient(url)); + } } break; case "cloud": - CloudSolrClient sc = this.auth - ? SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username, - this.password) - : SolrUtils.getCloudSolrClient(Arrays.asList(urls)); + CloudSolrClient sc; + if (this.auth && !StringUtil.isEmpty(this.authHeaderName) + && !StringUtil.isEmpty(this.authHeaderValue)) { + sc = SolrUtils.getCloudSolrClientHeaderAuthorization( + Arrays.asList(urls), this.authHeaderName, this.authHeaderValue); + } else if (this.auth && !StringUtil.isEmpty(this.username) + && !StringUtil.isEmpty(this.password)) { + sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username, + this.password); + } else { + sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls)); + } sc.setDefaultCollection(this.collection); solrClients.add(sc); break; @@ -219,7 +241,8 @@ public class SolrIndexWriter implements IndexWriter { push(); try { for (SolrClient solrClient : solrClients) { - if (this.auth) { + if (this.auth && !StringUtil.isEmpty(this.username) + && !StringUtil.isEmpty(this.password)) { UpdateRequest req = new UpdateRequest(); req.setAction(UpdateRequest.ACTION.COMMIT, true, true); req.setBasicAuthCredentials(this.username, this.password); @@ -243,7 +266,8 @@ public class SolrIndexWriter implements IndexWriter { req.add(inputDocs); req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false); req.setParams(params); - if (this.auth) { + if (this.auth && !StringUtil.isEmpty(this.username) + && !StringUtil.isEmpty(this.password)) { req.setBasicAuthCredentials(this.username, this.password); } for (SolrClient solrClient : solrClients) { @@ -264,7 +288,8 @@ public class SolrIndexWriter implements IndexWriter { req.deleteById(deleteIds); req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false); req.setParams(params); - if (this.auth) { + if (this.auth && !StringUtil.isEmpty(this.username) + && !StringUtil.isEmpty(this.password)) { req.setBasicAuthCredentials(this.username, this.password); } @@ -326,6 +351,10 @@ public class SolrIndexWriter implements IndexWriter { properties.put(SolrConstants.USE_AUTH, new AbstractMap.SimpleEntry<>( "Whether to enable HTTP basic authentication for communicating with Solr. Use the username and password properties to configure your credentials.", this.auth)); + properties.put(SolrConstants.AUTH_HEADER_NAME, new AbstractMap.SimpleEntry<>( + "The authentication header content name.", this.authHeaderName)); + properties.put(SolrConstants.AUTH_HEADER_VALUE, new AbstractMap.SimpleEntry<>( + "The authentication header content value.", StringUtil.mask(this.authHeaderValue))); properties.put(SolrConstants.USERNAME, new AbstractMap.SimpleEntry<>( "The username of Solr server.", this.username)); properties.put(SolrConstants.PASSWORD, new AbstractMap.SimpleEntry<>( diff --git a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java index 8f97b166e..d307edc59 100644 --- a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java +++ b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java @@ -22,10 +22,12 @@ import org.apache.http.client.CredentialsProvider; import org.apache.http.client.HttpClient; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.message.BasicHeader; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import java.util.Arrays; import java.util.List; public class SolrUtils { @@ -39,7 +41,7 @@ public class SolrUtils { static CloudSolrClient getCloudSolrClient(List<String> urls, String username, String password) { - // Building http client + // Building HTTP client CredentialsProvider provider = new BasicCredentialsProvider(); UsernamePasswordCredentials credentials = new UsernamePasswordCredentials( username, password); @@ -55,10 +57,100 @@ public class SolrUtils { return sc; } + /** + * Creates a new SolrClient, passing an Authorization header on the requests' + * HTTP Header: + * + * <pre> + * Authorization: headerName headerValue + * </pre> + * + * ie. + * + * <pre> + * Authorization: Bearer XXXXXXXXXXX + * </pre> + * + * @param url + * Sorl URL + * @param headerName + * Header name send on the Authorization: Bearer, Token, etc. + * @param headerValue + * Header value send on the Authorization: JWT_TOKEN + * @return CloudSolrClient + */ + static CloudSolrClient getCloudSolrClientHeaderAuthorization( + List<String> urls, String headerName, String headerValue) { + // Building http client + HttpClientBuilder httpClientBuilder = HttpClientBuilder.create(); + httpClientBuilder.setDefaultHeaders(Arrays.asList( + new BasicHeader("Authorization", headerName + " " + headerValue))); + // Building the client + CloudSolrClient sc = new CloudSolrClient.Builder(urls) + .withParallelUpdates(true).withHttpClient(httpClientBuilder.build()) + .build(); + sc.connect(); + return sc; + } + static SolrClient getHttpSolrClient(String url) { return new HttpSolrClient.Builder(url).build(); } + /** + * Creates a new SolrClient, passing an Authorization header on the requests' + * HTTP Header: + * + * <pre> + * Authorization: headerName headerValue + * </pre> + * + * ie. + * + * <pre> + * Authorization: Bearer XXXXXXXXXXX + * </pre> + * + * @param url + * Solr URL + * @param headerName + * Header name send on the Authorization: Bearer, Token, etc. + * @param headerValue + * Header value send on the Authorization: JWT_TOKEN + * @return SolrClient + */ + static SolrClient getHttpSolrClientHeaderAuthorization(String url, + String headerName, String headerValue) { + HttpClientBuilder httpClientBuilder = HttpClientBuilder.create(); + httpClientBuilder.setDefaultHeaders(Arrays.asList( + new BasicHeader("Authorization", headerName + " " + headerValue))); + return new HttpSolrClient.Builder(url) + .withHttpClient(httpClientBuilder.build()).build(); + } + + /** + * Creates a new SolrClient, using Basic Authentication. + * + * @param url + * Solr URL + * @param username + * Username + * @param password + * Password + * @return SolrClient + */ + static SolrClient getHttpSolrClient(String url, String username, + String password) { + CredentialsProvider provider = new BasicCredentialsProvider(); + UsernamePasswordCredentials credentials = new UsernamePasswordCredentials( + username, password); + provider.setCredentials(AuthScope.ANY, credentials); + HttpClientBuilder httpClientBuilder = HttpClientBuilder.create(); + httpClientBuilder.setDefaultCredentialsProvider(provider); + return new HttpSolrClient.Builder(url) + .withHttpClient(httpClientBuilder.build()).build(); + } + static String stripNonCharCodepoints(String input) { StringBuilder retval = new StringBuilder(); char ch;