This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new f8ec624  NUTCH-2905 Mask sensitive strings in log output of index 
writers - add utility methods (StringUtil) to mask password strings or   
passwords in strings - mask passwords in log output of index writers 
(Elasticsearch,   Solr, RabbitMQ) - mask password in trace log of 
protocol-httpclient when using   basic authentication
     new dd27044  Merge pull request #704 from 
sebastian-nagel/NUTCH-2905-index-writers-logging-mask-credentials
f8ec624 is described below

commit f8ec6243fc19c20c667c0749c2292ab53f19c029
Author: Sebastian Nagel <[email protected]>
AuthorDate: Thu Nov 18 10:34:29 2021 +0100

    NUTCH-2905 Mask sensitive strings in log output of index writers
    - add utility methods (StringUtil) to mask password strings or
      passwords in strings
    - mask passwords in log output of index writers (Elasticsearch,
      Solr, RabbitMQ)
    - mask password in trace log of protocol-httpclient when using
      basic authentication
---
 src/java/org/apache/nutch/util/StringUtil.java     | 55 ++++++++++++++++++++++
 .../indexwriter/elastic/ElasticIndexWriter.java    |  3 +-
 .../indexwriter/rabbit/RabbitIndexWriter.java      |  5 +-
 .../nutch/indexwriter/solr/SolrIndexWriter.java    |  3 +-
 .../httpclient/HttpBasicAuthentication.java        |  8 ++--
 src/test/org/apache/nutch/util/TestStringUtil.java | 24 +++++++++-
 6 files changed, 91 insertions(+), 7 deletions(-)

diff --git a/src/java/org/apache/nutch/util/StringUtil.java 
b/src/java/org/apache/nutch/util/StringUtil.java
index 10ff51c..70e8b13 100644
--- a/src/java/org/apache/nutch/util/StringUtil.java
+++ b/src/java/org/apache/nutch/util/StringUtil.java
@@ -16,6 +16,9 @@
  */
 package org.apache.nutch.util;
 
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 /**
  * A collection of String processing utility methods.
  */
@@ -156,6 +159,57 @@ public class StringUtil {
     return value.replaceAll("�", "");
   }
 
+  /**
+   * Mask sensitive strings - passwords, etc.
+   * 
+   * @param str input string
+   * @return the masked string, all characters replaced by <code>*</code>
+   */
+  public static String mask(final String str) {
+    return mask(str, '*');
+  }
+
+  /**
+   * Mask sensitive strings - passwords, etc.
+   * 
+   * @param str input string
+   * @param mask
+   *          char used for masking
+   * @return the masked string, all characters replaced by the mask character
+   */
+  public static String mask(final String str, final char mask) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < str.length(); i++) {
+      sb.append(mask);
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Mask sensitive strings - passwords, etc.
+   * 
+   * @param str input string
+   * @param pattern
+   *          pattern which defines capturing groups to be masked in input
+   * @param mask
+   *          char used for masking
+   * @return the masked string, all characters matched in capturing groups of
+   *         the pattern replaced by the mask character
+   */
+  public static String mask(final String str, final Pattern pattern,
+      final char mask) {
+    StringBuilder sb = new StringBuilder(str);
+    Matcher matcher = pattern.matcher(sb);
+    while (matcher.find()) {
+      for (int i = 1; i <= matcher.groupCount(); i++) {
+        for (int j = matcher.start(i); j < matcher.end(i); j++) {
+          sb.setCharAt(j, mask);
+        }
+      }
+    }
+    return sb.toString();
+  }
+
   public static void main(String[] args) {
     if (args.length != 1)
       System.out.println("Usage: StringUtil <encoding name>");
@@ -163,4 +217,5 @@ public class StringUtil {
       System.out.println(args[0] + " is resolved to "
           + EncodingDetector.resolveEncodingAlias(args[0]));
   }
+
 }
diff --git 
a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
 
b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
index e81e968..05103d5 100644
--- 
a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
+++ 
b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
@@ -37,6 +37,7 @@ import org.apache.nutch.indexer.IndexWriter;
 import org.apache.nutch.indexer.IndexWriterParams;
 import org.apache.nutch.indexer.NutchDocument;
 import org.apache.nutch.indexer.NutchField;
+import org.apache.nutch.util.StringUtil;
 import org.elasticsearch.action.bulk.BulkResponse;
 import org.elasticsearch.action.DocWriteRequest;
 import org.elasticsearch.action.bulk.BackoffPolicy;
@@ -306,7 +307,7 @@ public class ElasticIndexWriter implements IndexWriter {
     properties.put(ElasticConstants.USER, new AbstractMap.SimpleEntry<>(
         "Username for auth credentials", this.user));
     properties.put(ElasticConstants.PASSWORD, new AbstractMap.SimpleEntry<>(
-        "Password for auth credentials", this.password));
+        "Password for auth credentials", StringUtil.mask(this.password)));
     properties.put(ElasticConstants.MAX_BULK_DOCS,
         new AbstractMap.SimpleEntry<>(
             "Maximum size of the bulk in number of documents.",
diff --git 
a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
 
b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
index 0ce5cbc..f7a47e2 100644
--- 
a/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
+++ 
b/src/plugin/indexer-rabbit/src/java/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriter.java
@@ -27,6 +27,7 @@ import org.apache.nutch.indexer.IndexWriter;
 import org.apache.nutch.indexer.NutchField;
 import org.apache.nutch.rabbitmq.RabbitMQClient;
 import org.apache.nutch.rabbitmq.RabbitMQMessage;
+import org.apache.nutch.util.StringUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -35,6 +36,7 @@ import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 public class RabbitIndexWriter implements IndexWriter {
 
@@ -220,9 +222,10 @@ public class RabbitIndexWriter implements IndexWriter {
   public Map<String, Map.Entry<String, Object>> describe() {
     Map<String, Map.Entry<String, Object>> properties = new LinkedHashMap<>();
 
+    Pattern maskPasswordPattern = Pattern.compile("^amqp://[^:]+:([^@]+)@");
     properties.put(RabbitMQConstants.SERVER_URI, new AbstractMap.SimpleEntry<>(
         "URI with connection parameters in the form 
amqp://<username>:<password>@<hostname>:<port>/<virtualHost>",
-        this.uri));
+        StringUtil.mask(this.uri, maskPasswordPattern, '*')));
     properties.put(RabbitMQConstants.BINDING, new AbstractMap.SimpleEntry<>(
         "Whether the relationship between an exchange and a queue is created 
automatically. "
             + "NOTE: Binding between exchanges is not supported.",
diff --git 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
index 04c08b3..12d3ff6 100644
--- 
a/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
+++ 
b/src/plugin/indexer-solr/src/java/org/apache/nutch/indexwriter/solr/SolrIndexWriter.java
@@ -34,6 +34,7 @@ import org.apache.nutch.indexer.IndexWriterParams;
 import org.apache.nutch.indexer.IndexerMapReduce;
 import org.apache.nutch.indexer.NutchDocument;
 import org.apache.nutch.indexer.NutchField;
+import org.apache.nutch.util.StringUtil;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -328,7 +329,7 @@ public class SolrIndexWriter implements IndexWriter {
     properties.put(SolrConstants.USERNAME, new AbstractMap.SimpleEntry<>(
         "The username of Solr server.", this.username));
     properties.put(SolrConstants.PASSWORD, new AbstractMap.SimpleEntry<>(
-        "The password of Solr server.", this.password));
+        "The password of Solr server.", StringUtil.mask(this.password)));
 
     return properties;
   }
diff --git 
a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
 
b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
index a0a255b..0603e3d 100644
--- 
a/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
+++ 
b/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
@@ -30,6 +30,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.StringUtil;
 import org.apache.hadoop.conf.Configurable;
 
 /**
@@ -80,9 +81,10 @@ public class HttpBasicAuthentication implements 
HttpAuthentication,
         + ".password");
 
     if (LOG.isTraceEnabled()) {
-      LOG.trace("BasicAuthentication challenge is " + challenge);
-      LOG.trace("BasicAuthentication username=" + username);
-      LOG.trace("BasicAuthentication password=" + password);
+      LOG.trace("BasicAuthentication challenge is {}", challenge);
+      LOG.trace("BasicAuthentication username={}", username);
+      LOG.trace("BasicAuthentication password={} (masked)",
+          StringUtil.mask(password));
     }
 
     if (username == null) {
diff --git a/src/test/org/apache/nutch/util/TestStringUtil.java 
b/src/test/org/apache/nutch/util/TestStringUtil.java
index d9398df..9b82912 100644
--- a/src/test/org/apache/nutch/util/TestStringUtil.java
+++ b/src/test/org/apache/nutch/util/TestStringUtil.java
@@ -16,6 +16,8 @@
  */
 package org.apache.nutch.util;
 
+import java.util.regex.Pattern;
+
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -36,7 +38,6 @@ public class TestStringUtil {
 
     ps = StringUtil.rightPad(s, 15);
     Assert.assertTrue((s + "      ").equals(ps));
-
   }
 
   @Test
@@ -54,7 +55,28 @@ public class TestStringUtil {
 
     ps = StringUtil.leftPad(s, 15);
     Assert.assertTrue(("      " + s).equals(ps));
+  }
+
+  @Test
+  public void testMaskPasswords() {
+    String secret = "password";
+    String masked = StringUtil.mask(secret);
+    Assert.assertNotEquals(secret, masked);
+    Assert.assertEquals(secret.length(), masked.length());
+
+    char mask = 'X';
+    masked = StringUtil.mask(secret, mask);
+    Assert.assertNotEquals(secret, masked);
+    Assert.assertEquals(secret.length(), masked.length());
+    masked.chars().forEach((c) -> Assert.assertEquals(mask, c));
 
+    String strWithSecret = 
"amqp://username:[email protected]:5672/virtualHost";
+    Pattern maskPasswordPattern = Pattern.compile("^amqp://[^:]+:([^@]+)@");
+    masked = StringUtil.mask(strWithSecret, maskPasswordPattern, mask);
+    Assert.assertNotEquals(strWithSecret, masked);
+    Assert.assertEquals(strWithSecret.length(), masked.length());
+    Assert.assertFalse(masked.contains(secret));
+    Assert.assertTrue(masked.contains(StringUtil.mask(secret, mask)));
   }
 
 }

Reply via email to