This is an automated email from the ASF dual-hosted git repository.
dlych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 72abd30 [ASTERIXDB-2685][FUN] regex-split avoid creating unnecessary
objects if not needed
72abd30 is described below
commit 72abd30a585da23e007b4d22daa712eb751b426e
Author: Hussain Towaileb <[email protected]>
AuthorDate: Thu Jul 16 12:02:17 2020 +0300
[ASTERIXDB-2685][FUN] regex-split avoid creating unnecessary objects if not
needed
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Switched to using the RegExpMatcher class to avoid creating
unnecessary objects (compiling patterns) if the next tuple
pattern is similar to the previous one.
Change-Id: Ie6920fb049f7b333e3d41de154839d83b5280926
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7223
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Hussain Towaileb <[email protected]>
Reviewed-by: Dmitry Lychagin <[email protected]>
---
.../evaluators/functions/StringRegExpSplitDescriptor.java | 6 +++++-
.../runtime/evaluators/functions/utils/RegExpMatcher.java | 9 +++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
index da6a206..8de8c8a 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
@@ -30,6 +30,7 @@ import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AbstractCollectionType;
import org.apache.asterix.om.types.BuiltinType;
import
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.utils.RegExpMatcher;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
@@ -65,10 +66,13 @@ public class StringRegExpSplitDescriptor extends
AbstractScalarFunctionDynamicDe
private final AbstractCollectionType collectionType =
new AOrderedListType(BuiltinType.ASTRING,
BuiltinType.ASTRING.getTypeName());
+ private final RegExpMatcher matcher = new RegExpMatcher();
+
@Override
protected void process(UTF8StringPointable srcPtr,
UTF8StringPointable patternPtr,
IPointable result) throws HyracksDataException {
- String[] splits =
srcPtr.toString().split(patternPtr.toString());
+ matcher.build(srcPtr, patternPtr);
+ String[] splits = matcher.split();
// Result is a list of type strings
listBuilder.reset(collectionType);
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
index 778df5b..1a190cc 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
@@ -209,4 +209,13 @@ public class RegExpMatcher {
matcher.appendTail(resultBuf);
return resultBuf.toString();
}
+
+ /**
+ * Splits the provided source string using the provided regular expression.
+ *
+ * @return the string split tokens
+ */
+ public String[] split() {
+ return pattern.split(charSeq);
+ }
}