This is an automated email from the ASF dual-hosted git repository.

dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 72abd30  [ASTERIXDB-2685][FUN] regex-split avoid creating unnecessary 
objects if not needed
72abd30 is described below

commit 72abd30a585da23e007b4d22daa712eb751b426e
Author: Hussain Towaileb <[email protected]>
AuthorDate: Thu Jul 16 12:02:17 2020 +0300

    [ASTERIXDB-2685][FUN] regex-split avoid creating unnecessary objects if not 
needed
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Switched to using the RegExpMatcher class to avoid creating
      unnecessary objects (compiling patterns) if the next tuple
      pattern is similar to the previous one.
    
    Change-Id: Ie6920fb049f7b333e3d41de154839d83b5280926
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7223
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Hussain Towaileb <[email protected]>
    Reviewed-by: Dmitry Lychagin <[email protected]>
---
 .../evaluators/functions/StringRegExpSplitDescriptor.java        | 6 +++++-
 .../runtime/evaluators/functions/utils/RegExpMatcher.java        | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
index da6a206..8de8c8a 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
@@ -30,6 +30,7 @@ import org.apache.asterix.om.types.ATypeTag;
 import org.apache.asterix.om.types.AbstractCollectionType;
 import org.apache.asterix.om.types.BuiltinType;
 import 
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.utils.RegExpMatcher;
 import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
 import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
@@ -65,10 +66,13 @@ public class StringRegExpSplitDescriptor extends 
AbstractScalarFunctionDynamicDe
                     private final AbstractCollectionType collectionType =
                             new AOrderedListType(BuiltinType.ASTRING, 
BuiltinType.ASTRING.getTypeName());
 
+                    private final RegExpMatcher matcher = new RegExpMatcher();
+
                     @Override
                     protected void process(UTF8StringPointable srcPtr, 
UTF8StringPointable patternPtr,
                             IPointable result) throws HyracksDataException {
-                        String[] splits = 
srcPtr.toString().split(patternPtr.toString());
+                        matcher.build(srcPtr, patternPtr);
+                        String[] splits = matcher.split();
 
                         // Result is a list of type strings
                         listBuilder.reset(collectionType);
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
index 778df5b..1a190cc 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
@@ -209,4 +209,13 @@ public class RegExpMatcher {
         matcher.appendTail(resultBuf);
         return resultBuf.toString();
     }
+
+    /**
+     * Splits the provided source string using the provided regular expression.
+     *
+     * @return the string split tokens
+     */
+    public String[] split() {
+        return pattern.split(charSeq);
+    }
 }

Reply via email to