Repository: hadoop
Updated Branches:
refs/heads/branch-2 92ff52418 -> 3d36d4737
HADOOP-11506. Configuration variable expansion regex expensive for long values.
(Gera Shegalov via gera)
(cherry picked from commit 644548f201743408904dfe24b9f5b515b2c96713)
Conflicts:
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3d36d473
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3d36d473
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3d36d473
Branch: refs/heads/branch-2
Commit: 3d36d4737c160d7dc8829e9dd6b801ef6726c0c0
Parents: 92ff524
Author: Gera Shegalov <[email protected]>
Authored: Fri Feb 6 01:08:32 2015 -0800
Committer: Gera Shegalov <[email protected]>
Committed: Fri Feb 6 01:44:14 2015 -0800
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 3 +
.../org/apache/hadoop/conf/Configuration.java | 100 +++++++++++++++++--
.../apache/hadoop/conf/TestConfiguration.java | 66 ++++++++++++
3 files changed, 159 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt
b/hadoop-common-project/hadoop-common/CHANGES.txt
index 4b0faf4..41efde0 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -192,6 +192,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11188. hadoop-azure: automatically expand page blobs when they
become
full. (Eric Hanson via cnauroth)
+ HADOOP-11506. Configuration variable expansion regex expensive for long
+ values. (Gera Shegalov via gera)
+
BUG FIXES
HADOOP-11488. Difference in default connection timeout for S3A FS
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
----------------------------------------------------------------------
diff --git
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 927d4b6..4271c11 100644
---
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -844,25 +844,101 @@ public class Configuration implements
Iterable<Map.Entry<String,String>>,
resources.add(resource); // add to resources
reloadConfiguration();
}
-
- private static final Pattern VAR_PATTERN =
- Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
private static final int MAX_SUBST = 20;
+ private static final int SUB_START_IDX = 0;
+ private static final int SUB_END_IDX = SUB_START_IDX + 1;
+
+ /**
+ * This is a manual implementation of the following regex
+ * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
+ * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
+ * Hadoop apps building on the assumption Configuration#get is an O(1)
+ * hash table lookup, especially when the eval is a long string.
+ *
+ * @param eval a string that may contain variables requiring expansion.
+ * @return a 2-element int array res such that
+ * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
+ * ${var} in eval. If no variable is found -1, -1 is returned.
+ */
+ private static int[] findSubVariable(String eval) {
+ int[] result = {-1, -1};
+
+ int matchStart;
+ int leftBrace;
+
+ // scanning for a brace first because it's less frequent than $
+ // that can occur in nested class names
+ //
+ match_loop:
+ for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
+ // minimum left brace position (follows '$')
+ leftBrace > 0
+ // right brace of a smallest valid expression "${c}"
+ && leftBrace + "{c".length() < eval.length();
+ leftBrace = eval.indexOf('{', matchStart)) {
+ int matchedLen = 0;
+ if (eval.charAt(leftBrace - 1) == '$') {
+ int subStart = leftBrace + 1; // after '{'
+ for (int i = subStart; i < eval.length(); i++) {
+ switch (eval.charAt(i)) {
+ case '}':
+ if (matchedLen > 0) { // match
+ result[SUB_START_IDX] = subStart;
+ result[SUB_END_IDX] = subStart + matchedLen;
+ break match_loop;
+ }
+ // fall through to skip 1 char
+ case ' ':
+ case '$':
+ matchStart = i + 1;
+ continue match_loop;
+ default:
+ matchedLen++;
+ }
+ }
+ // scanned from "${" to the end of eval, and no reset via ' ', '$':
+ // no match!
+ break match_loop;
+ } else {
+ // not a start of a variable
+ //
+ matchStart = leftBrace + 1;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Attempts to repeatedly expand the value {@code expr} by replacing the
+ * left-most substring of the form "${var}" in the following precedence order
+ * <ol>
+ * <li>by the value of the Java system property "var" if defined</li>
+ * <li>by the value of the configuration key "var" if defined</li>
+ * </ol>
+ *
+ * If var is unbounded the current state of expansion "prefix${var}suffix" is
+ * returned.
+ *
+ * @param expr the literal value of a config key
+ * @return null if expr is null, otherwise the value resulting from expanding
+ * expr using the algorithm above.
+ * @throws IllegalArgumentException when more than
+ * {@link Configuration#MAX_SUBST} replacements are required
+ */
private String substituteVars(String expr) {
if (expr == null) {
return null;
}
- Matcher match = VAR_PATTERN.matcher("");
String eval = expr;
- for(int s=0; s<MAX_SUBST; s++) {
- match.reset(eval);
- if (!match.find()) {
+ for (int s = 0; s < MAX_SUBST; s++) {
+ final int[] varBounds = findSubVariable(eval);
+ if (varBounds[SUB_START_IDX] == -1) {
return eval;
}
- String var = match.group();
- var = var.substring(2, var.length()-1); // remove ${ .. }
+ final String var = eval.substring(varBounds[SUB_START_IDX],
+ varBounds[SUB_END_IDX]);
String val = null;
try {
val = System.getProperty(var);
@@ -875,8 +951,12 @@ public class Configuration implements
Iterable<Map.Entry<String,String>>,
if (val == null) {
return eval; // return literal ${var}: var is unbound
}
+ final int dollar = varBounds[SUB_START_IDX] - "${".length();
+ final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
// substitute
- eval = eval.substring(0, match.start())+val+eval.substring(match.end());
+ eval = eval.substring(0, dollar)
+ + val
+ + eval.substring(afterRightBrace);
}
throw new IllegalStateException("Variable substitution depth too large: "
+ MAX_SUBST + " " + expr);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
----------------------------------------------------------------------
diff --git
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 5aa8350..a4020c7 100644
---
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -49,6 +49,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils;
import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
+import static org.junit.Assert.fail;
+
import org.codehaus.jackson.map.ObjectMapper;
public class TestConfiguration extends TestCase {
@@ -1194,6 +1196,70 @@ public class TestConfiguration extends TestCase {
}
}
+ public void testInvalidSubstitutation() {
+ final Configuration configuration = new Configuration(false);
+
+ // 2-var loops
+ //
+ final String key = "test.random.key";
+ for (String keyExpression : Arrays.asList(
+ "${" + key + "}",
+ "foo${" + key + "}",
+ "foo${" + key + "}bar",
+ "${" + key + "}bar")) {
+ configuration.set(key, keyExpression);
+ checkSubDepthException(configuration, key);
+ }
+
+ //
+ // 3-variable loops
+ //
+
+ final String expVal1 = "${test.var2}";
+ String testVar1 = "test.var1";
+ configuration.set(testVar1, expVal1);
+ configuration.set("test.var2", "${test.var3}");
+ configuration.set("test.var3", "${test.var1}");
+ checkSubDepthException(configuration, testVar1);
+
+ // 3-variable loop with non-empty value prefix/suffix
+ //
+ final String expVal2 = "foo2${test.var2}bar2";
+ configuration.set(testVar1, expVal2);
+ configuration.set("test.var2", "foo3${test.var3}bar3");
+ configuration.set("test.var3", "foo1${test.var1}bar1");
+ checkSubDepthException(configuration, testVar1);
+ }
+
+ private static void checkSubDepthException(Configuration configuration,
+ String key) {
+ try {
+ configuration.get(key);
+ fail("IllegalStateException depth too large not thrown");
+ } catch (IllegalStateException e) {
+ assertTrue("Unexpected exception text: " + e,
+ e.getMessage().contains("substitution depth"));
+ }
+ }
+
+ public void testIncompleteSubbing() {
+ Configuration configuration = new Configuration(false);
+ String key = "test.random.key";
+ for (String keyExpression : Arrays.asList(
+ "{}",
+ "${}",
+ "{" + key,
+ "${" + key,
+ "foo${" + key,
+ "foo${" + key + "bar",
+ "foo{" + key + "}bar",
+ "${" + key + "bar")) {
+ configuration.set(key, keyExpression);
+ String value = configuration.get(key);
+ assertTrue("Unexpected value " + value, value.equals(keyExpression));
+ }
+ }
+
public void testGetClassByNameOrNull() throws Exception {
Configuration config = new Configuration();
Class<?> clazz = config.getClassByNameOrNull("java.lang.Object");