HIVE-11842: Improve RuleRegExp by caching some internal data structures (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79244ab4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79244ab4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79244ab4 Branch: refs/heads/beeline-cli Commit: 79244ab453823b8787b70a08f923e25c2abbd0bf Parents: 8d524e0 Author: Jesus Camacho Rodriguez <[email protected]> Authored: Thu Sep 17 17:46:55 2015 +0100 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Thu Sep 17 17:46:55 2015 +0100 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/lib/RuleRegExp.java | 61 ++++++++++++++++---- 1 file changed, 51 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/79244ab4/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java index fd5f133..1e850d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hive.ql.lib; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.regex.Matcher; @@ -125,6 +127,12 @@ public class RuleRegExp implements Rule { */ private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException { int numElems = (stack != null ? stack.size() : 0); + + // No elements + if (numElems == 0) { + return -1; + } + int patLen = patternWithoutWildCardChar.length(); StringBuilder name = new StringBuilder(patLen + numElems); for (int pos = numElems - 1; pos >= 0; pos--) { @@ -133,9 +141,8 @@ public class RuleRegExp implements Rule { if (name.length() >= patLen) { if (patternWithoutWildCardChar.contentEquals(name)) { return patLen; - } else { - return -1; } + break; } } return -1; @@ -152,20 +159,54 @@ public class RuleRegExp implements Rule { */ private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException { int numElems = (stack != null ? stack.size() : 0); + + // No elements + if (numElems == 0) { + return -1; + } + + // These DS are used to cache previously created String + Map<Integer,String> cachedNames = new HashMap<Integer,String>(); + int maxDepth = numElems; + int maxLength = 0; + + // For every pattern for (String pattern : patternORWildChar) { int patLen = pattern.length(); - StringBuilder name = new StringBuilder(patLen + numElems); - for (int pos = numElems - 1; pos >= 0; pos--) { - String nodeName = stack.get(pos).getName() + "%"; - name.insert(0, nodeName); - if (name.length() >= patLen) { - if (pattern.contentEquals(name)) { - return patLen; - } else { + // If the stack has been explored already till that level, + // obtained cached String + if (cachedNames.containsKey(patLen)) { + if (pattern.contentEquals(cachedNames.get(patLen))) { + return patLen; + } + } else if (maxLength >= patLen) { + // We have already explored the stack deep enough, but + // we do not have a matching + continue; + } else { + // We are going to build the name + StringBuilder name = new StringBuilder(patLen + numElems); + if (maxLength != 0) { + name.append(cachedNames.get(maxLength)); + } + for (int pos = maxDepth - 1; pos >= 0; pos--) { + String nodeName = stack.get(pos).getName() + "%"; + name.insert(0, nodeName); + + // We cache the values + cachedNames.put(name.length(), name.toString()); + maxLength = name.length(); + maxDepth--; + + if (name.length() >= patLen) { + if (pattern.contentEquals(name)) { + return patLen; + } break; } } + } } return -1;
