This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 9109bdd74 NUTCH-2991 Support HTTP/S Header Authorization for Solr 
connections (#763)
9109bdd74 is described below

commit 9109bdd740ba578fcffff17745ebc9f53f464667
Author: Sebastian Nagel <sna...@apache.org>
AuthorDate: Tue Jun 6 14:51:20 2023 +0200

    NUTCH-2991 Support HTTP/S Header Authorization for Solr connections (#763)
    
    NUTCH-2991 Support HTTP/S Header Authorization for Solr connections
    (patch contributed by Marcos Gomez)
    - adds params auth.header.name and auth.header.value for JWT Authentication
      with Bearer Tokens sent via the HTTP Authorization header connections
    - also document basic authentication and improve error message when reading 
the configuration fails
---
 conf/index-writers.xml.template                    | 19 ++++-
 .../org/apache/nutch/indexer/IndexWriters.java     |  2 +-
 .../nutch/indexwriter/solr/SolrConstants.java      |  4 +
 .../nutch/indexwriter/solr/SolrIndexWriter.java    | 47 ++++++++---
 .../apache/nutch/indexwriter/solr/SolrUtils.java   | 94 +++++++++++++++++++++-
 5 files changed, 153 insertions(+), 13 deletions(-)

diff --git a/conf/index-writers.xml.template b/conf/index-writers.xml.template
index 549ebd4c9..6ed341cb7 100644
--- a/conf/index-writers.xml.template
+++ b/conf/index-writers.xml.template
@@ -26,9 +26,24 @@
       <param name="collection" value=""/>
       <param name="weight.field" value=""/>
       <param name="commitSize" value="1000"/>
+      <!-- enable authentication. In addition, set username and
+           password for basic authentication, or pass the Bearer
+           token via Authentication header. See below.
+      -->
       <param name="auth" value="false"/>
-      <param name="username" value="username"/>
-      <param name="password" value="password"/>
+      <!-- username and password for basic authentication -->
+      <param name="username" value=""/>
+      <param name="password" value=""/>
+      <!-- Name for Authorization HTTP header
+           <param name="auth.header.name" value="Bearer"/>
+           HTTP header -> Authorization: Bearer 1234567890
+      -->
+      <param name="auth.header.name" value=""/>
+      <!-- Value for Authorization HTTP header
+           <param name="auth.header.value" value="1234567890"/>
+           HTTP header -> Authorization: Bearer 1234567890
+      -->
+      <param name="auth.header.value" value=""/>
     </parameters>
     <mapping>
       <copy>
diff --git a/src/java/org/apache/nutch/indexer/IndexWriters.java 
b/src/java/org/apache/nutch/indexer/IndexWriters.java
index a8ab0ec9c..f8ae8ee86 100644
--- a/src/java/org/apache/nutch/indexer/IndexWriters.java
+++ b/src/java/org/apache/nutch/indexer/IndexWriters.java
@@ -137,7 +137,7 @@ public class IndexWriters {
 
       return indexWriterConfigs;
     } catch (SAXException | IOException | ParserConfigurationException e) {
-      LOG.error(e.toString());
+      LOG.error("Failed to read index writers configuration: {}", 
e.getMessage());
       return new IndexWriterConfig[0];
     }
   }
diff --git 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
index 302ed75ed..ee6d5d623 100644
--- 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
+++ 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrConstants.java
@@ -34,4 +34,8 @@ public interface SolrConstants {
 
   String PASSWORD = "password";
 
+  String AUTH_HEADER_NAME = "auth.header.name";
+
+  String AUTH_HEADER_VALUE = "auth.header.value";
+
 }
diff --git 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
index 12d3ff6b7..ec2ab46d2 100644
--- 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
+++ 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -16,8 +16,8 @@
  */
 package org.apache.nutch.indexwriter.solr;
 
-import java.lang.invoke.MethodHandles;
 import java.io.IOException;
+import java.lang.invoke.MethodHandles;
 import java.time.format.DateTimeFormatter;
 import java.util.AbstractMap;
 import java.util.ArrayList;
@@ -72,6 +72,8 @@ public class SolrIndexWriter implements IndexWriter {
   private boolean auth;
   private String username;
   private String password;
+  private String authHeaderName;
+  private String authHeaderValue;
 
   @Override
   public void open(Configuration conf, String name) {
@@ -99,20 +101,40 @@ public class SolrIndexWriter implements IndexWriter {
     this.auth = parameters.getBoolean(SolrConstants.USE_AUTH, false);
     this.username = parameters.get(SolrConstants.USERNAME);
     this.password = parameters.get(SolrConstants.PASSWORD);
+    this.authHeaderName = parameters.get(SolrConstants.AUTH_HEADER_NAME, "");
+    this.authHeaderValue = parameters.get(SolrConstants.AUTH_HEADER_VALUE, "");
 
     this.solrClients = new ArrayList<>();
 
     switch (type) {
     case "http":
       for (String url : urls) {
-        solrClients.add(SolrUtils.getHttpSolrClient(url));
+        if (this.auth && !StringUtil.isEmpty(this.authHeaderName)
+            && !StringUtil.isEmpty(this.authHeaderValue)) {
+          solrClients.add(SolrUtils.getHttpSolrClientHeaderAuthorization(url,
+              this.authHeaderName, this.authHeaderValue));
+        } else if (this.auth && !StringUtil.isEmpty(this.username)
+            && !StringUtil.isEmpty(this.password)) {
+          solrClients.add(
+              SolrUtils.getHttpSolrClient(url, this.username, this.password));
+        } else {
+          solrClients.add(SolrUtils.getHttpSolrClient(url));
+        }
       }
       break;
     case "cloud":
-      CloudSolrClient sc = this.auth
-          ? SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username,
-              this.password)
-          : SolrUtils.getCloudSolrClient(Arrays.asList(urls));
+      CloudSolrClient sc;
+      if (this.auth && !StringUtil.isEmpty(this.authHeaderName)
+          && !StringUtil.isEmpty(this.authHeaderValue)) {
+        sc = SolrUtils.getCloudSolrClientHeaderAuthorization(
+            Arrays.asList(urls), this.authHeaderName, this.authHeaderValue);
+      } else if (this.auth && !StringUtil.isEmpty(this.username)
+          && !StringUtil.isEmpty(this.password)) {
+        sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls), this.username,
+            this.password);
+      } else {
+        sc = SolrUtils.getCloudSolrClient(Arrays.asList(urls));
+      }
       sc.setDefaultCollection(this.collection);
       solrClients.add(sc);
       break;
@@ -219,7 +241,8 @@ public class SolrIndexWriter implements IndexWriter {
     push();
     try {
       for (SolrClient solrClient : solrClients) {
-        if (this.auth) {
+        if (this.auth && !StringUtil.isEmpty(this.username)
+            && !StringUtil.isEmpty(this.password)) {
           UpdateRequest req = new UpdateRequest();
           req.setAction(UpdateRequest.ACTION.COMMIT, true, true);
           req.setBasicAuthCredentials(this.username, this.password);
@@ -243,7 +266,8 @@ public class SolrIndexWriter implements IndexWriter {
         req.add(inputDocs);
         req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false);
         req.setParams(params);
-        if (this.auth) {
+        if (this.auth && !StringUtil.isEmpty(this.username)
+            && !StringUtil.isEmpty(this.password)) {
           req.setBasicAuthCredentials(this.username, this.password);
         }
         for (SolrClient solrClient : solrClients) {
@@ -264,7 +288,8 @@ public class SolrIndexWriter implements IndexWriter {
         req.deleteById(deleteIds);
         req.setAction(UpdateRequest.ACTION.OPTIMIZE, false, false);
         req.setParams(params);
-        if (this.auth) {
+        if (this.auth && !StringUtil.isEmpty(this.username)
+            && !StringUtil.isEmpty(this.password)) {
           req.setBasicAuthCredentials(this.username, this.password);
         }
 
@@ -326,6 +351,10 @@ public class SolrIndexWriter implements IndexWriter {
     properties.put(SolrConstants.USE_AUTH, new AbstractMap.SimpleEntry<>(
         "Whether to enable HTTP basic authentication for communicating with 
Solr. Use the username and password properties to configure your credentials.",
         this.auth));
+    properties.put(SolrConstants.AUTH_HEADER_NAME, new 
AbstractMap.SimpleEntry<>(
+        "The authentication header content name.", this.authHeaderName));
+    properties.put(SolrConstants.AUTH_HEADER_VALUE, new 
AbstractMap.SimpleEntry<>(
+        "The authentication header content value.", 
StringUtil.mask(this.authHeaderValue)));
     properties.put(SolrConstants.USERNAME, new AbstractMap.SimpleEntry<>(
         "The username of Solr server.", this.username));
     properties.put(SolrConstants.PASSWORD, new AbstractMap.SimpleEntry<>(
diff --git 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
index 8f97b166e..d307edc59 100644
--- 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
+++ 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrUtils.java
@@ -22,10 +22,12 @@ import org.apache.http.client.CredentialsProvider;
 import org.apache.http.client.HttpClient;
 import org.apache.http.impl.client.BasicCredentialsProvider;
 import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.message.BasicHeader;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 
+import java.util.Arrays;
 import java.util.List;
 
 public class SolrUtils {
@@ -39,7 +41,7 @@ public class SolrUtils {
 
   static CloudSolrClient getCloudSolrClient(List<String> urls, String username,
       String password) {
-    // Building http client
+    // Building HTTP client
     CredentialsProvider provider = new BasicCredentialsProvider();
     UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
         username, password);
@@ -55,10 +57,100 @@ public class SolrUtils {
     return sc;
   }
 
+  /**
+   * Creates a new SolrClient, passing an Authorization header on the requests'
+   * HTTP Header:
+   * 
+   * <pre>
+   * Authorization: headerName headerValue
+   * </pre>
+   * 
+   * ie.
+   * 
+   * <pre>
+   * Authorization: Bearer XXXXXXXXXXX
+   * </pre>
+   * 
+   * @param url
+   *          Sorl URL
+   * @param headerName
+   *          Header name send on the Authorization: Bearer, Token, etc.
+   * @param headerValue
+   *          Header value send on the Authorization: JWT_TOKEN
+   * @return CloudSolrClient
+   */
+  static CloudSolrClient getCloudSolrClientHeaderAuthorization(
+      List<String> urls, String headerName, String headerValue) {
+    // Building http client
+    HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+    httpClientBuilder.setDefaultHeaders(Arrays.asList(
+        new BasicHeader("Authorization", headerName + " " + headerValue)));
+    // Building the client
+    CloudSolrClient sc = new CloudSolrClient.Builder(urls)
+        .withParallelUpdates(true).withHttpClient(httpClientBuilder.build())
+        .build();
+    sc.connect();
+    return sc;
+  }
+
   static SolrClient getHttpSolrClient(String url) {
     return new HttpSolrClient.Builder(url).build();
   }
 
+  /**
+   * Creates a new SolrClient, passing an Authorization header on the requests'
+   * HTTP Header:
+   * 
+   * <pre>
+   * Authorization: headerName headerValue
+   * </pre>
+   * 
+   * ie.
+   * 
+   * <pre>
+   * Authorization: Bearer XXXXXXXXXXX
+   * </pre>
+   * 
+   * @param url
+   *          Solr URL
+   * @param headerName
+   *          Header name send on the Authorization: Bearer, Token, etc.
+   * @param headerValue
+   *          Header value send on the Authorization: JWT_TOKEN
+   * @return SolrClient
+   */
+  static SolrClient getHttpSolrClientHeaderAuthorization(String url,
+      String headerName, String headerValue) {
+    HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+    httpClientBuilder.setDefaultHeaders(Arrays.asList(
+        new BasicHeader("Authorization", headerName + " " + headerValue)));
+    return new HttpSolrClient.Builder(url)
+        .withHttpClient(httpClientBuilder.build()).build();
+  }
+
+  /**
+   * Creates a new SolrClient, using Basic Authentication.
+   * 
+   * @param url
+   *          Solr URL
+   * @param username
+   *          Username
+   * @param password
+   *          Password
+   * @return SolrClient
+   */
+  static SolrClient getHttpSolrClient(String url, String username,
+      String password) {
+    CredentialsProvider provider = new BasicCredentialsProvider();
+    UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
+        username, password);
+    provider.setCredentials(AuthScope.ANY, credentials);
+    HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
+    httpClientBuilder.setDefaultCredentialsProvider(provider);
+    return new HttpSolrClient.Builder(url)
+        .withHttpClient(httpClientBuilder.build()).build();
+  }
+
   static String stripNonCharCodepoints(String input) {
     StringBuilder retval = new StringBuilder();
     char ch;

Reply via email to