steveloughran commented on a change in pull request #2845:
URL: https://github.com/apache/hadoop/pull/2845#discussion_r618504576
##########
File path:
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java
##########
@@ -227,11 +229,30 @@ public String getLogString() {
.append(" m=")
.append(method)
.append(" u=")
- .append(getSignatureMaskedEncodedUrl());
+ .append(getMaskedEncodedUrl());
return sb.toString();
}
+ public String getMaskedUrl() {
+ if (!shouldMask) {
+ return url.toString();
+ }
+ if (this.maskedUrl != null) {
Review comment:
remove the this. prefix; not needed
##########
File path:
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
##########
@@ -73,6 +98,63 @@ public static String generateUniqueTestPath() {
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId +
"/test";
}
+ public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
+ HashSet<String> queryParamsForFullMask,
+ HashSet<String> queryParamsForPartialMask) {
+ return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
+ queryParamsForPartialMask, 256);
+ }
+
+ public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
+ HashSet<String> queryParamsForFullMask,
+ HashSet<String> queryParamsForPartialMask, int queryLen) {
+ StringBuilder maskedUrl = new StringBuilder(queryLen);
+ for (NameValuePair keyValuePair : keyValueList) {
+ String key = keyValuePair.getName();
+ if (key.isEmpty()) {
+ throw new IllegalArgumentException("Query param key should not be
empty");
+ }
+ String value = keyValuePair.getValue();
+ maskedUrl.append(key);
+ maskedUrl.append(EQUAL);
+ if (value != null && !value.isEmpty()) { //no mask
+ if (queryParamsForFullMask.contains(key)) {
+ maskedUrl.append(FULL_MASK);
+ } else if (queryParamsForPartialMask.contains(key)) {
+ int valueLen = value.length();
+ int maskedLen = valueLen - Math.min(4, valueLen);
+ maskedUrl.append(StringUtils.repeat(CHAR_MASK, maskedLen));
+ maskedUrl.append(value, maskedLen, valueLen);
+ } else {
+ maskedUrl.append(value);
+ }
+ }
+ maskedUrl.append(AND_MARK);
+ }
+ maskedUrl.deleteCharAt(maskedUrl.length() - 1);
+ return maskedUrl.toString();
+ }
+
+ public static String encodedUrlStr(String url) {
+ try {
+ return URLEncoder.encode(url, "UTF-8");
+ } catch (UnsupportedEncodingException e) {
+ return "https%3A%2F%2Ffailed%2Fto%2Fencode%2Furl";
Review comment:
heh. nice.
##########
File path:
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
##########
@@ -18,14 +18,39 @@
package org.apache.hadoop.fs.azurebfs.utils;
+import java.io.UnsupportedEncodingException;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
import java.util.regex.Pattern;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.NameValuePair;
+import org.apache.http.client.utils.URLEncodedUtils;
+
+import static
org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK;
+import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL;
+import static
org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID;
+import static
org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE;
+import static
org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID;
+import static
org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SUOID;
+
/**
* Utility class to help with Abfs url transformation to blob urls.
*/
public final class UriUtils {
private static final String ABFS_URI_REGEX =
"[^.]+\\.dfs\\.(preprod\\.){0,1}core\\.windows\\.net";
private static final Pattern ABFS_URI_PATTERN =
Pattern.compile(ABFS_URI_REGEX);
+ private static final HashSet<String> FULL_MASK_PARAM_KEYS = new HashSet<>(
Review comment:
use `Set<String>` as the type of variable
##########
File path:
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
##########
@@ -73,6 +98,63 @@ public static String generateUniqueTestPath() {
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId +
"/test";
}
+ public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
+ HashSet<String> queryParamsForFullMask,
+ HashSet<String> queryParamsForPartialMask) {
+ return maskUrlQueryParameters(keyValueList, queryParamsForFullMask,
+ queryParamsForPartialMask, 256);
+ }
+
+ public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
+ HashSet<String> queryParamsForFullMask,
+ HashSet<String> queryParamsForPartialMask, int queryLen) {
+ StringBuilder maskedUrl = new StringBuilder(queryLen);
+ for (NameValuePair keyValuePair : keyValueList) {
+ String key = keyValuePair.getName();
+ if (key.isEmpty()) {
+ throw new IllegalArgumentException("Query param key should not be
empty");
+ }
+ String value = keyValuePair.getValue();
+ maskedUrl.append(key);
+ maskedUrl.append(EQUAL);
+ if (value != null && !value.isEmpty()) { //no mask
+ if (queryParamsForFullMask.contains(key)) {
+ maskedUrl.append(FULL_MASK);
+ } else if (queryParamsForPartialMask.contains(key)) {
+ int valueLen = value.length();
+ int maskedLen = valueLen - Math.min(4, valueLen);
Review comment:
make this 4 a constant too, allows for different levels of masking to be
used in future.
##########
File path:
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java
##########
@@ -73,6 +98,63 @@ public static String generateUniqueTestPath() {
return testUniqueForkId == null ? "/test" : "/" + testUniqueForkId +
"/test";
}
+ public static String maskUrlQueryParameters(List<NameValuePair> keyValueList,
+ HashSet<String> queryParamsForFullMask,
Review comment:
and have these methods take Set<> too. Just to hide the detail that
HashSet is used
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]