KYLIN-2610 Optimize BuiltInFunctionTransformer performance

KYLIN-2610 fix three percent sign bug


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/b3ba338e
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/b3ba338e
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/b3ba338e

Branch: refs/heads/KYLIN-2624
Commit: b3ba338eaf889d42e02870007c1876eb32c884ee
Parents: 3976420
Author: Hongbin Ma <mahong...@apache.org>
Authored: Thu May 11 20:26:14 2017 +0800
Committer: Hongbin Ma <mahong...@apache.org>
Committed: Fri May 12 13:00:43 2017 +0800

----------------------------------------------------------------------
 .../kylin/dict/BuiltInFunctionTransformer.java  |   9 +-
 .../metadata/filter/function/BuiltInMethod.java |  39 +++--
 .../metadata/filter/function/LikeMatchers.java  | 141 +++++++++++++++++++
 3 files changed, 177 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java
 
b/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java
index 6ef2f96..5196593 100755
--- 
a/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java
+++ 
b/core-dictionary/src/main/java/org/apache/kylin/dict/BuiltInFunctionTransformer.java
@@ -57,13 +57,13 @@ public class BuiltInFunctionTransformer implements 
ITupleFilterTransformer {
             //normal case
             translated = translateCompareTupleFilter((CompareTupleFilter) 
tupleFilter);
             if (translated != null) {
-                logger.debug("Translated {{}} to IN clause: {{}}", 
tupleFilter, translated);
+                logger.debug("Translated {{}} to IN clause. ", tupleFilter);
             }
         } else if (tupleFilter instanceof BuiltInFunctionTupleFilter) {
-            //like case
+            //like,tolower case
             translated = 
translateFunctionTupleFilter((BuiltInFunctionTupleFilter) tupleFilter);
             if (translated != null) {
-                logger.debug("Translated {{}} to IN clause: {{}}", 
tupleFilter, translated);
+                logger.debug("Translated {{}} to IN clause. ", tupleFilter);
             }
         } else if (tupleFilter instanceof LogicalTupleFilter) {
             @SuppressWarnings("unchecked")
@@ -99,6 +99,7 @@ public class BuiltInFunctionTransformer implements 
ITupleFilterTransformer {
                     translated.addChild(new ConstantTupleFilter(dictVal));
                 }
             }
+            logger.debug("getting a in clause with {} children", 
translated.getChildren().size());
         } catch (Exception e) {
             logger.debug(e.getMessage());
             return null;
@@ -175,4 +176,4 @@ public class BuiltInFunctionTransformer implements 
ITupleFilterTransformer {
         }
         return translated;
     }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java
----------------------------------------------------------------------
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java
 
b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java
index 2f28fae..31ee297 100644
--- 
a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java
+++ 
b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/BuiltInMethod.java
@@ -18,8 +18,11 @@
 
 package org.apache.kylin.metadata.filter.function;
 
+import static 
org.apache.kylin.metadata.filter.function.LikeMatchers.LikeMatcher;
+
 import java.lang.reflect.Method;
-import java.util.regex.Pattern;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.commons.lang3.reflect.MethodUtils;
 
@@ -28,9 +31,15 @@ import com.google.common.collect.ImmutableMap;
 public enum BuiltInMethod {
     UPPER(BuiltInMethod.class, "upper", String.class), 
LOWER(BuiltInMethod.class, "lower", String.class), 
SUBSTRING(BuiltInMethod.class, "substring", String.class, int.class, 
int.class), CHAR_LENGTH(BuiltInMethod.class, "charLength", String.class), 
LIKE(BuiltInMethod.class, "like", String.class, String.class), 
INITCAP(BuiltInMethod.class, "initcap", String.class);
     public final Method method;
-
     public static final ImmutableMap<String, BuiltInMethod> MAP;
 
+    private static ThreadLocal<Map<String, LikeMatcher>> likePatterns = new 
ThreadLocal<Map<String, LikeMatcher>>() {
+        @Override
+        public Map<String, LikeMatcher> initialValue() {
+            return new HashMap<>();
+        }
+    };
+
     static {
         final ImmutableMap.Builder<String, BuiltInMethod> builder = 
ImmutableMap.builder();
         for (BuiltInMethod value : BuiltInMethod.values()) {
@@ -46,12 +55,26 @@ public enum BuiltInMethod {
     }
 
     /** SQL {@code LIKE} function. */
-    public static boolean like(String s, String pattern) {
-        if (s == null)
+    public static boolean like(String s, String patternStr) {
+        //TODO: escape in like is unsupported
+        //TODO: like is case sensitive now
+
+        if (s == null || patternStr == null)
             return false;
-        
-        final String regex = Like.sqlToRegexLike(pattern, null);
-        return Pattern.matches(regex, s);
+
+        Map<String, LikeMatcher> patterns = likePatterns.get();
+        LikeMatcher p = patterns.get(patternStr);
+        if (p == null) {
+
+            p = LikeMatchers.createMatcher(patternStr);
+
+            if (patterns.size() > 100) {
+                patterns.clear();//brutal but good enough
+            }
+            patterns.put(patternStr, p);
+        }
+
+        return p.matches(s);
     }
 
     /** SQL INITCAP(string) function. */
@@ -117,4 +140,4 @@ public enum BuiltInMethod {
         return s.toLowerCase();
     }
 
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/kylin/blob/b3ba338e/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java
----------------------------------------------------------------------
diff --git 
a/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java
 
b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java
new file mode 100644
index 0000000..ef1e412
--- /dev/null
+++ 
b/core-metadata/src/main/java/org/apache/kylin/metadata/filter/function/LikeMatchers.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kylin.metadata.filter.function;
+
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+
+import com.google.common.base.Preconditions;
+
+public class LikeMatchers {
+
+    private final static String PERCENT_SIGN = "%";
+    private final static String UNDERSCORE_SIGN = "_";
+
+    public interface LikeMatcher {
+        boolean matches(String input);
+    }
+
+    public static class DefaultLikeMatcher implements LikeMatcher {
+        private Pattern p;
+
+        private DefaultLikeMatcher(String patternStr) {
+            patternStr = patternStr.toLowerCase();
+            final String regex = Like.sqlToRegexLike(patternStr, null);
+            p = Pattern.compile(regex);
+        }
+
+        @Override
+        public boolean matches(String input) {
+            return p.matcher(input).matches();
+        }
+    }
+
+    // abc%, %abc, abc%def
+    public static class OnePercentSignLikeMatcher implements LikeMatcher {
+
+        enum SignPosition {
+            LEFT, MIDDLE, RIGHT
+        }
+
+        private SignPosition signPosition;
+        private String[] remaining;
+
+        private OnePercentSignLikeMatcher(String patternStr) {
+            if (patternStr.startsWith(PERCENT_SIGN)) {
+                signPosition = SignPosition.LEFT;
+            } else if (patternStr.endsWith(PERCENT_SIGN)) {
+                signPosition = SignPosition.RIGHT;
+            } else {
+                signPosition = SignPosition.MIDDLE;
+            }
+            remaining = StringUtils.split(patternStr, PERCENT_SIGN);
+        }
+
+        @Override
+        public boolean matches(String input) {
+            if (input == null)
+                return false;
+
+            switch (signPosition) {
+            case LEFT:
+                return input.endsWith(remaining[0]);
+            case RIGHT:
+                return input.startsWith(remaining[0]);
+            case MIDDLE:
+                return input.startsWith(remaining[0]) && 
input.endsWith(remaining[1]);
+            default:
+                throw new IllegalStateException();
+            }
+        }
+    }
+
+    //only deal with %abc%
+    public static class TwoPercentSignLikeMatcher implements LikeMatcher {
+        private String[] remaining;
+
+        private TwoPercentSignLikeMatcher(String patternStr) {
+            remaining = StringUtils.split(patternStr, PERCENT_SIGN);
+            Preconditions.checkState(remaining.length == 1);
+        }
+
+        @Override
+        public boolean matches(String input) {
+            return input.contains(remaining[0]);
+        }
+    }
+
+    //only deal with %abc%def%
+    public static class ThreePercentSignLikeMatcher implements LikeMatcher {
+        private String[] remaining;
+
+        private ThreePercentSignLikeMatcher(String patternStr) {
+            remaining = StringUtils.split(patternStr, PERCENT_SIGN);
+            Preconditions.checkState(remaining.length == 2);
+        }
+
+        @Override
+        public boolean matches(String input) {
+            int i = input.indexOf(remaining[0]);
+            int j = input.lastIndexOf(remaining[1]);
+            return (i != -1) && (j != -1) && (i <= j - remaining[0].length());
+        }
+    }
+
+    public static LikeMatcher createMatcher(String patternStr) {
+        if (patternStr == null) {
+            throw new IllegalArgumentException("pattern is null");
+        }
+
+        if (patternStr.contains(UNDERSCORE_SIGN)) {
+            return new DefaultLikeMatcher(patternStr);
+        }
+
+        int count = StringUtils.countMatches(patternStr, PERCENT_SIGN);
+        if (count == 1) {
+            return new OnePercentSignLikeMatcher(patternStr);
+        } else if (count == 2 && patternStr.startsWith(PERCENT_SIGN) && 
patternStr.endsWith(PERCENT_SIGN)) {
+            return new TwoPercentSignLikeMatcher(patternStr);
+        } else if (count == 3 && patternStr.startsWith(PERCENT_SIGN) && 
patternStr.endsWith(PERCENT_SIGN) && !patternStr.contains(PERCENT_SIGN + 
PERCENT_SIGN)) {
+            return new ThreePercentSignLikeMatcher(patternStr);
+        } else {
+            return new DefaultLikeMatcher(patternStr);
+        }
+    }
+}

Reply via email to