This is an automated email from the ASF dual-hosted git repository.

kezhenxu94 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking-python.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e42df2  [Enhancement] Optimized fast_path_match() (#86)
5e42df2 is described below

commit 5e42df2e94c575e46e0d65e8bbc140992004713f
Author: Tomasz Pytel <[email protected]>
AuthorDate: Sun Nov 22 10:38:33 2020 -0300

    [Enhancement] Optimized fast_path_match() (#86)
---
 skywalking/config.py            |  2 +-
 skywalking/plugins/__init__.py  | 10 ++---
 skywalking/utils/ant_matcher.py | 87 +++++++++--------------------------------
 3 files changed, 24 insertions(+), 75 deletions(-)

diff --git a/skywalking/config.py b/skywalking/config.py
index 35540b1..339747b 100644
--- a/skywalking/config.py
+++ b/skywalking/config.py
@@ -46,7 +46,7 @@ correlation_element_max_number = 
int(os.getenv('SW_CORRELATION_ELEMENT_MAX_NUMBE
 correlation_value_max_length = 
int(os.getenv('SW_CORRELATION_VALUE_MAX_LENGTH') or '128')  # type: int
 trace_ignore = True if os.getenv('SW_TRACE_IGNORE') and \
                        os.getenv('SW_TRACE_IGNORE') == 'True' else False  # 
type: bool
-trace_ignore_path = (os.getenv('SW_TRACE_IGNORE_PATH') or '').split(',')  # 
type: List[str]
+trace_ignore_path = [s.strip() for s in (os.getenv('SW_TRACE_IGNORE_PATH') or 
'').split(',')]  # type: List[str]
 elasticsearch_trace_dsl = True if os.getenv('SW_ELASTICSEARCH_TRACE_DSL') and \
                                   os.getenv('SW_ELASTICSEARCH_TRACE_DSL') == 
'True' else False  # type: bool
 kafka_bootstrap_servers = os.getenv('SW_KAFKA_REPORTER_BOOTSTRAP_SERVERS') or 
"localhost:9092"  # type: str
diff --git a/skywalking/plugins/__init__.py b/skywalking/plugins/__init__.py
index 217ef81..01d297f 100644
--- a/skywalking/plugins/__init__.py
+++ b/skywalking/plugins/__init__.py
@@ -32,12 +32,12 @@ logger = logging.getLogger(__name__)
 
 
 def install():
+    disable_patterns = config.disable_plugins
+    if isinstance(disable_patterns, str):
+        disable_patterns = [re.compile(p.strip()) for p in 
disable_patterns.split(',') if p.strip()]
+    else:
+        disable_patterns = [re.compile(p.strip()) for p in disable_patterns if 
p.strip()]
     for importer, modname, ispkg in 
pkgutil.iter_modules(skywalking.plugins.__path__):
-        disable_patterns = config.disable_plugins
-        if isinstance(disable_patterns, str):
-            disable_patterns = [re.compile(p.strip()) for p in 
disable_patterns.split(',') if p.strip()]
-        else:
-            disable_patterns = [re.compile(p.strip()) for p in 
disable_patterns if p.strip()]
         if any(pattern.match(modname) for pattern in disable_patterns):
             logger.info('plugin %s is disabled and thus won\'t be installed', 
modname)
             continue
diff --git a/skywalking/utils/ant_matcher.py b/skywalking/utils/ant_matcher.py
index 41e004a..382a1a5 100644
--- a/skywalking/utils/ant_matcher.py
+++ b/skywalking/utils/ant_matcher.py
@@ -16,75 +16,24 @@
 #
 
 
-def fast_path_match(pattern: str, path: str):
-    return normal_match(pattern, 0, path, 0)
-
-
-def normal_match(pat: str, p: int, var: str, s: int) -> bool:
-    while p < len(pat):
-        pc = pat[p]
-        sc = safe_char_at(var, s)
-
-        if pc == '*':
-            p += 1
-
-            if safe_char_at(pat, p) == '*':
-                p += 1
-
-                return multi_wildcard_match(pat, p, var, s)
-            else:
-                return wildcard_match(pat, p, var, s)
-
-        if (pc == '?' and sc != '0' and sc != '/') or pc == sc:
-            s += 1
-            p += 1
-            continue
-
-        return False
-
-    return s == len(var)
-
-
-def wildcard_match(pat: str, p: int, var: str, s: int) -> bool:
-    pc = safe_char_at(pat, p)
-
-    while True:
-        sc = safe_char_at(var, s)
+import re
 
-        if sc == '/':
+reesc = re.compile(r'([.*+?^=!:${}()|\[\]\\])')
+recache = {}
 
-            if pc == sc:
-                return normal_match(pat, p + 1, var, s + 1)
 
-            return False
-
-        if normal_match(pat, p, var, s) is False:
-            if s >= len(var):
-                return False
-
-            s += 1
-            continue
-
-        return True
-
-
-def multi_wildcard_match(pat: str, p: int, var: str, s: int) -> bool:
-    if p >= len(pat) and s < len(var):
-        return var[len(var) - 1] != '/'
-
-    while True:
-        if not normal_match(pat, p, var, s):
-            if s >= len(var):
-                return False
-
-            s += 1
-            continue
-
-        return True
-
-
-def safe_char_at(value: str, index: int) -> str:
-    if index >= len(value):
-        return '0'
-
-    return value[index]
+def fast_path_match(pattern: str, path: str):
+    repat = recache.get(pattern)
+
+    if repat is None:
+        repat = recache[pattern] = \
+            re.compile('^(?:' +                       # this could handle 
multiple patterns in one by joining with '|'
+                       '(?:(?:[^/]+/)*[^/]+)?'.join(  # replaces "**"
+                           '[^/]*'.join(              # replaces "*"
+                               '[^/]'.join(           # replaces "?"
+                                   reesc.sub(r'\\\1', s) for s in p2.split('?')
+                               ) for p2 in p1.split('*')
+                           ) for p1 in pattern.split('**')
+                       ) + ')$')
+
+    return bool(repat.match(path))

Reply via email to