This is an automated email from the ASF dual-hosted git repository.

htowaileb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new db0e80e  [ASTERIXDB-2680][FUN] Add support to regexp_matches() and 
regexp_split()
db0e80e is described below

commit db0e80ee91adab00bdcb2a34aa649e1ebfb41a9d
Author: Hussain Towaileb <hussain.towai...@couchbase.com>
AuthorDate: Wed Dec 4 13:11:29 2019 +0300

    [ASTERIXDB-2680][FUN] Add support to regexp_matches() and regexp_split()
    
    - user model changes: yes
    - storage format changes: no
    - interface changes: no
    
    Details:
    - Added support to regexp_matches().
    - Added support to regexp_split().
    - Added test cases for regexp_matches().
    - Added test cases for regexp_split().
    - Changed behavior of UTF8CharSequence when
      subSequence is called with start = end, originally
      it returns a null char[] array which causes an NPE,
      now it returns an empty char[].
    
    Change-Id: Iccf5ba14f5c8b8cf4bcd6dd6e412bb515d68dd74
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/4243
    Contrib: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Hussain Towaileb <hussai...@gmail.com>
    Reviewed-by: Dmitry Lychagin <dmitry.lycha...@couchbase.com>
---
 .../001/regexp_matches.000.ddl.sqlpp               |  30 ++++++
 .../001/regexp_matches.001.update.sqlpp            |  29 ++++++
 .../001/regexp_matches.002.query.sqlpp             |  22 ++++
 .../001/regexp_matches.003.ddl.sqlpp               |  20 ++++
 .../002/regexp_matches.000.ddl.sqlpp               |  30 ++++++
 .../002/regexp_matches.001.update.sqlpp            |  28 ++++++
 .../002/regexp_matches.002.query.sqlpp             |  28 ++++++
 .../002/regexp_matches.003.ddl.sqlpp               |  20 ++++
 .../003/regexp_matches.000.query.sqlpp             |  27 +++++
 .../004/regexp_matches.000.query.sqlpp             |  26 +++++
 .../regexp_split/001/regexp_split.000.ddl.sqlpp    |  30 ++++++
 .../regexp_split/001/regexp_split.001.update.sqlpp |  36 +++++++
 .../regexp_split/001/regexp_split.002.query.sqlpp  |  22 ++++
 .../regexp_split/001/regexp_split.003.ddl.sqlpp    |  20 ++++
 .../regexp_split/002/regexp_split.000.ddl.sqlpp    |  30 ++++++
 .../regexp_split/002/regexp_split.001.update.sqlpp |  28 ++++++
 .../regexp_split/002/regexp_split.002.query.sqlpp  |  28 ++++++
 .../regexp_split/002/regexp_split.003.ddl.sqlpp    |  20 ++++
 .../regexp_split/003/regexp_split.000.query.sqlpp  |  34 +++++++
 .../regexp_split/004/regexp_split.000.query.sqlpp  |  26 +++++
 .../regexp_matches/001/regexp_matches.002.adm      |   6 ++
 .../regexp_matches/002/regexp_matches.002.adm      |   1 +
 .../regexp_matches/003/regexp_matches.000.adm      |   1 +
 .../regexp_matches/004/regexp_matches.000.adm      |   1 +
 .../string/regexp_split/001/regexp_split.002.adm   |  13 +++
 .../string/regexp_split/002/regexp_split.002.adm   |   1 +
 .../string/regexp_split/003/regexp_split.000.adm   |   1 +
 .../string/regexp_split/004/regexp_split.000.adm   |   1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  40 ++++++++
 .../lang/common/util/CommonFunctionMapUtil.java    |   2 +
 .../asterix/om/functions/BuiltinFunctions.java     |   6 ++
 .../functions/AbstractBinaryStringEval.java        |  10 +-
 .../functions/StringRegExpMatchesDescriptor.java   | 111 +++++++++++++++++++++
 .../functions/StringRegExpSplitDescriptor.java     | 108 ++++++++++++++++++++
 .../evaluators/functions/utils/RegExpMatcher.java  |  10 ++
 .../runtime/functions/FunctionCollection.java      |   4 +
 .../hyracks/data/std/util/UTF8CharSequence.java    |   3 +
 37 files changed, 848 insertions(+), 5 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp
new file mode 100644
index 0000000..dda55b0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test if exists;
+create  dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+id: int32
+};
+
+drop dataset test if exists;
+create dataset test(test) primary key id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp
new file mode 100644
index 0000000..e15e988
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into test([
+{"id": 1, "f1": "So, 'twas better Betty Botter bought a bit of better butter", 
"f2": "\\b[Bb]\\w+"},
+{"id": 2, "f1": "So, 'twas better Betty Botter bought a bit of better butter", 
"f2": "\\b[Bb]\\w+ \\b[Bb]\\w+"},
+{"id": 3, "f1": "abracadabra", "f2": "[abc]"},
+{"id": 4, "f1": "abc", "f2": ""},
+{"id": 5, "f1": "abc", "f2": "123"},
+{"id": 6, "f1": "", "f2": ""}
+]);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp
new file mode 100644
index 0000000..83f2d9a
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value regexp_matches(f1, f2) from test order by id asc;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp
new file mode 100644
index 0000000..269f673
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp
new file mode 100644
index 0000000..dda55b0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test if exists;
+create  dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+id: int32
+};
+
+drop dataset test if exists;
+create dataset test(test) primary key id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp
new file mode 100644
index 0000000..d77abf0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into test([
+{"id": 1, "f1": missing, "f2": null},
+{"id": 2, "f1": null, "f2": missing},
+{"id": 3, "f1": null, "f2": "[abc]"},
+{"id": 4, "f1": 13, "f2": ""},
+{"id": 5, "f1": "abc", "f2": true}
+]);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp
new file mode 100644
index 0000000..2ae8bbb
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value [
+(select value regexp_matches(f1, f2) is missing from test where id = 1)[0],
+(select value regexp_matches(f1, f2) is missing from test where id = 2)[0],
+(select value regexp_matches(f1, f2) is null from test where id = 3)[0],
+(select value regex_matches(f1, f2) is null from test where id = 4)[0],
+(select value regex_matches(f1, f2) is null from test where id = 5)[0]
+];
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp
new file mode 100644
index 0000000..269f673
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp
new file mode 100644
index 0000000..a387704
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+[
+regexp_matches("So, 'twas better Betty Botter bought a bit of better butter", 
"\\b[Bb]\\w+"),
+regexp_matches("So, 'twas better Betty Botter bought a bit of better butter", 
"\\b[Bb]\\w+ \\b[Bb]\\w+"),
+regexp_matches("abracadabra", "[abc]"),
+regex_matches("abc", ""),
+regex_matches("abc", "123"),
+regex_matches("", "")
+];
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp
new file mode 100644
index 0000000..f127ce3
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+[
+regexp_matches(missing, null) is missing,
+regexp_matches(null, missing) is missing,
+regexp_matches(null, "[abc]") is null,
+regexp_matches(13, "") is null,
+regexp_matches("abc", true) is null
+];
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp
new file mode 100644
index 0000000..dda55b0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test if exists;
+create  dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+id: int32
+};
+
+drop dataset test if exists;
+create dataset test(test) primary key id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp
new file mode 100644
index 0000000..d4f38c5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into test([
+{"id": 1, "f1": "C:\\Program Files\\asterixdb\\server\\bin", "f2": "[\\\\]"},
+{"id": 2, "f1": "/opt/asterixdb/bin", "f2": "/"},
+{"id": 3, "f1": "a + b - c * d / e < f > g >= h <= i == j", "f2": 
"\\s*[a-zA-Z]+\\s*"},
+{"id": 4, "f1": "abc", "f2": ""},
+{"id": 5, "f1": "abc", "f2": "/"},
+{"id": 6, "f1": "", "f2": ""},
+{"id": 7, "f1": "", "f2": "1"},
+{"id": 8, "f1": "1", "f2": ""},
+{"id": 9, "f1": "1", "f2": "2"},
+{"id": 10, "f1": "1", "f2": "1"},
+{"id": 11, "f1": "12", "f2": "1"},
+{"id": 12, "f1": "12", "f2": "2"},
+{"id": 13, "f1": "121", "f2": "2"}
+]);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp
new file mode 100644
index 0000000..897b20b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value regexp_split(f1, f2) from test order by id asc;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp
new file mode 100644
index 0000000..269f673
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp
new file mode 100644
index 0000000..dda55b0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test if exists;
+create  dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+id: int32
+};
+
+drop dataset test if exists;
+create dataset test(test) primary key id;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp
new file mode 100644
index 0000000..d77abf0
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into test([
+{"id": 1, "f1": missing, "f2": null},
+{"id": 2, "f1": null, "f2": missing},
+{"id": 3, "f1": null, "f2": "[abc]"},
+{"id": 4, "f1": 13, "f2": ""},
+{"id": 5, "f1": "abc", "f2": true}
+]);
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp
new file mode 100644
index 0000000..bbb8b7b
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value [
+(select value regexp_split(f1, f2) is missing from test where id = 1)[0],
+(select value regexp_split(f1, f2) is missing from test where id = 2)[0],
+(select value regexp_split(f1, f2) is null from test where id = 3)[0],
+(select value regexp_split(f1, f2) is null from test where id = 4)[0],
+(select value regexp_split(f1, f2) is null from test where id = 5)[0]
+];
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp
new file mode 100644
index 0000000..269f673
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop  dataverse test;
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp
new file mode 100644
index 0000000..a48af93
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+[
+regexp_split("C:\\Program Files\\asterixdb\\server\\bin", "[\\\\]"),
+regexp_split("/opt/asterixdb/bin", "/"),
+regexp_split("a + b - c * d / e < f > g >= h <= i == j", "\\s*[a-zA-Z]+\\s*"),
+regexp_split("abc", ""),
+regexp_split("abc", "/"),
+regexp_split("", ""),
+regex_split("", "1"),
+regex_split("1", ""),
+regex_split("1", "2"),
+regex_split("1", "1"),
+regex_split("12", "1"),
+regex_split("12", "2"),
+regex_split("121", "2")
+];
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp
new file mode 100644
index 0000000..69d0ca7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+[
+regexp_split(missing, null) is missing,
+regexp_split(null, missing) is missing,
+regexp_split(null, "[abc]") is null,
+regexp_split(13, "") is null,
+regexp_split("abc", true) is null
+];
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm
new file mode 100644
index 0000000..1aa8d8c
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm
@@ -0,0 +1,6 @@
+[ "better", "Betty", "Botter", "bought", "bit", "better", "butter" ]
+[ "better Betty", "Botter bought", "better butter" ]
+[ "a", "b", "a", "c", "a", "a", "b", "a" ]
+[ "", "", "", "" ]
+[  ]
+[ "" ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm
new file mode 100644
index 0000000..2f00e1d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm
@@ -0,0 +1 @@
+[ true, true, true, true, true ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm
new file mode 100644
index 0000000..32f80be
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm
@@ -0,0 +1 @@
+[ [ "better", "Betty", "Botter", "bought", "bit", "better", "butter" ], [ 
"better Betty", "Botter bought", "better butter" ], [ "a", "b", "a", "c", "a", 
"a", "b", "a" ], [ "", "", "", "" ], [  ], [ "" ] ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm
new file mode 100644
index 0000000..2f00e1d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm
@@ -0,0 +1 @@
+[ true, true, true, true, true ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm
new file mode 100644
index 0000000..96a4a2e
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm
@@ -0,0 +1,13 @@
+[ "C:", "Program Files", "asterixdb", "server", "bin" ]
+[ "", "opt", "asterixdb", "bin" ]
+[ "", "+", "-", "*", "/", "<", ">", ">=", "<=", "==" ]
+[ "a", "b", "c" ]
+[ "abc" ]
+[ "" ]
+[ "" ]
+[ "1" ]
+[ "1" ]
+[  ]
+[ "", "2" ]
+[ "1" ]
+[ "1", "1" ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm
new file mode 100644
index 0000000..2f00e1d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm
@@ -0,0 +1 @@
+[ true, true, true, true, true ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm
new file mode 100644
index 0000000..796e544
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm
@@ -0,0 +1 @@
+[ [ "C:", "Program Files", "asterixdb", "server", "bin" ], [ "", "opt", 
"asterixdb", "bin" ], [ "", "+", "-", "*", "/", "<", ">", ">=", "<=", "==" ], [ 
"a", "b", "c" ], [ "abc" ], [ "" ], [ "" ], [ "1" ], [ "1" ], [  ], [ "", "2" 
], [ "1" ], [ "1", "1" ] ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm
new file mode 100644
index 0000000..2f00e1d
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm
@@ -0,0 +1 @@
+[ true, true, true, true, true ]
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 5bb68e0..201ec6b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -9213,6 +9213,26 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="regexp_matches/001">
+        <output-dir compare="Text">regexp_matches/001</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_matches/002">
+        <output-dir compare="Text">regexp_matches/002</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_matches/003">
+        <output-dir compare="Text">regexp_matches/003</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_matches/004">
+        <output-dir compare="Text">regexp_matches/004</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="regexp_position/offset0/regexp_position">
         <output-dir 
compare="Text">regexp_position/offset0/regexp_position</output-dir>
       </compilation-unit>
@@ -9333,6 +9353,26 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="regexp_split/001">
+        <output-dir compare="Text">regexp_split/001</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_split/002">
+        <output-dir compare="Text">regexp_split/002</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_split/003">
+        <output-dir compare="Text">regexp_split/003</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
+      <compilation-unit name="regexp_split/004">
+        <output-dir compare="Text">regexp_split/004</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="repeat">
         <output-dir compare="Text">repeat</output-dir>
       </compilation-unit>
diff --git 
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java
 
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java
index a7ec834..c87f4dc 100644
--- 
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java
+++ 
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java
@@ -70,6 +70,8 @@ public class CommonFunctionMapUtil {
         addFunctionMapping("regex_position1", "regexp-position1");
         addFunctionMapping("regexp_pos1", "regexp-position1");
         addFunctionMapping("regex_replace", "regexp-replace");
+        addFunctionMapping("regex_matches", "regexp-matches");
+        addFunctionMapping("regex_split", "regexp-split");
 
         // Type functions.
         addFunctionMapping("isnull", "is-null"); // isnull, internal: is-null
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 53ed003..94303a7 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -440,6 +440,10 @@ public class BuiltinFunctions {
             new FunctionIdentifier(FunctionConstants.ASTERIX_NS, 
"regexp-replace", 3);
     public static final FunctionIdentifier STRING_REGEXP_REPLACE_WITH_FLAG =
             new FunctionIdentifier(FunctionConstants.ASTERIX_NS, 
"regexp-replace", 4);
+    public static final FunctionIdentifier STRING_REGEXP_MATCHES =
+            new FunctionIdentifier(FunctionConstants.ASTERIX_NS, 
"regexp-matches", 2);
+    public static final FunctionIdentifier STRING_REGEXP_SPLIT =
+            new FunctionIdentifier(FunctionConstants.ASTERIX_NS, 
"regexp-split", 2);
     public static final FunctionIdentifier STRING_LOWERCASE =
             new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "lowercase", 
1);
     public static final FunctionIdentifier STRING_UPPERCASE =
@@ -1744,6 +1748,8 @@ public class BuiltinFunctions {
         addFunction(STRING_REGEXP_REPLACE, 
UniformInputTypeComputer.STRING_STRING_INSTANCE, true);
         addFunction(STRING_REGEXP_REPLACE_WITH_FLAG, 
AStringTypeComputer.INSTANCE_NULLABLE, true);
         addFunction(STRING_REPLACE, 
UniformInputTypeComputer.STRING_STRING_INSTANCE, true);
+        addFunction(STRING_REGEXP_MATCHES, 
UniformInputTypeComputer.STRING_STRING_LIST_INSTANCE, true);
+        addFunction(STRING_REGEXP_SPLIT, 
UniformInputTypeComputer.STRING_STRING_LIST_INSTANCE, true);
         addFunction(STRING_REPLACE_WITH_LIMIT, 
AStringTypeComputer.INSTANCE_NULLABLE, true);
         addFunction(STRING_REVERSE, 
UniformInputTypeComputer.STRING_STRING_INSTANCE, true);
         addFunction(SUBSTRING_BEFORE, 
UniformInputTypeComputer.STRING_STRING_INSTANCE, true);
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
index 65fba47..2fc8654 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java
@@ -46,8 +46,8 @@ public abstract class AbstractBinaryStringEval implements 
IScalarEvaluator {
     // Argument pointables.
     private final IPointable argPtrLeft = new VoidPointable();
     private final IPointable argPtrSecond = new VoidPointable();
-    private final UTF8StringPointable leftPtr = new UTF8StringPointable();
-    private final UTF8StringPointable rightPtr = new UTF8StringPointable();
+    private final UTF8StringPointable leftStringPointable = new 
UTF8StringPointable();
+    private final UTF8StringPointable rightStringPointable = new 
UTF8StringPointable();
 
     // For results.
     protected final ArrayBackedValueStorage resultStorage = new 
ArrayBackedValueStorage();
@@ -100,12 +100,12 @@ public abstract class AbstractBinaryStringEval implements 
IScalarEvaluator {
         }
 
         // Sets StringUTF8Pointables.
-        leftPtr.set(bytes0, offset0 + 1, len0 - 1);
-        rightPtr.set(bytes1, offset1 + 1, len1 - 1);
+        leftStringPointable.set(bytes0, offset0 + 1, len0 - 1);
+        rightStringPointable.set(bytes1, offset1 + 1, len1 - 1);
 
         // The actual processing.
         try {
-            process(leftPtr, rightPtr, resultPointable);
+            process(leftStringPointable, rightStringPointable, 
resultPointable);
         } catch (IOException e) {
             throw HyracksDataException.create(e);
         }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java
new file mode 100644
index 0000000..595203b
--- /dev/null
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.IOException;
+
+import org.apache.asterix.builders.IAsterixListBuilder;
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.common.annotations.MissingNullInOutFunction;
+import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AbstractCollectionType;
+import org.apache.asterix.om.types.BuiltinType;
+import 
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.utils.RegExpMatcher;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+
+/**
+ * This function takes 2 arguments, a string, and a pattern
+ */
+@MissingNullInOutFunction
+public class StringRegExpMatchesDescriptor extends 
AbstractScalarFunctionDynamicDescriptor {
+    private static final long serialVersionUID = 1L;
+
+    public static final IFunctionDescriptorFactory FACTORY = 
StringRegExpMatchesDescriptor::new;
+
+    @Override
+    public IScalarEvaluatorFactory createEvaluatorFactory(final 
IScalarEvaluatorFactory[] args) {
+        return new IScalarEvaluatorFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public IScalarEvaluator createScalarEvaluator(IEvaluatorContext 
ctx) throws HyracksDataException {
+                return new AbstractBinaryStringEval(ctx, args[0], args[1], 
getIdentifier(), sourceLoc) {
+                    private final RegExpMatcher matcher = new RegExpMatcher();
+
+                    private final UTF8StringBuilder stringBuilder = new 
UTF8StringBuilder();
+                    private final GrowableArray stringBuilderArray = new 
GrowableArray();
+
+                    private final IAsterixListBuilder listBuilder = new 
OrderedListBuilder();
+                    private final AbstractCollectionType collectionType =
+                            new AOrderedListType(BuiltinType.ASTRING, 
BuiltinType.ASTRING.getTypeName());
+
+                    @Override
+                    protected void process(UTF8StringPointable srcPtr, 
UTF8StringPointable patternPtr,
+                            IPointable result) throws HyracksDataException {
+                        matcher.build(srcPtr, patternPtr);
+
+                        // Result is a list of type strings
+                        listBuilder.reset(collectionType);
+
+                        try {
+                            // Add all the matches to the builder
+                            while (matcher.find()) {
+                                String match = matcher.group();
+                                stringBuilderArray.reset();
+
+                                // Estimated length is number of characters + 
1 (1 byte for string length)
+                                stringBuilder.reset(stringBuilderArray, 
match.length() + 1);
+                                stringBuilder.appendString(match);
+                                stringBuilder.finish();
+
+                                resultStorage.reset();
+                                
dataOutput.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                                
dataOutput.write(stringBuilderArray.getByteArray(), 0, 
stringBuilderArray.getLength());
+                                listBuilder.addItem(resultStorage);
+                            }
+
+                            resultStorage.reset();
+                            listBuilder.write(dataOutput, true);
+                            result.set(resultStorage);
+                        } catch (IOException ex) {
+                            throw HyracksDataException.create(ex);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    @Override
+    public FunctionIdentifier getIdentifier() {
+        return BuiltinFunctions.STRING_REGEXP_MATCHES;
+    }
+}
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
new file mode 100644
index 0000000..da6a206
--- /dev/null
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.io.IOException;
+
+import org.apache.asterix.builders.IAsterixListBuilder;
+import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.common.annotations.MissingNullInOutFunction;
+import org.apache.asterix.om.functions.BuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.AOrderedListType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AbstractCollectionType;
+import org.apache.asterix.om.types.BuiltinType;
+import 
org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+
+/**
+ * This function takes 2 arguments, a string, and a pattern
+ */
+@MissingNullInOutFunction
+public class StringRegExpSplitDescriptor extends 
AbstractScalarFunctionDynamicDescriptor {
+    private static final long serialVersionUID = 1L;
+
+    public static final IFunctionDescriptorFactory FACTORY = 
StringRegExpSplitDescriptor::new;
+
+    @Override
+    public IScalarEvaluatorFactory createEvaluatorFactory(final 
IScalarEvaluatorFactory[] args) {
+        return new IScalarEvaluatorFactory() {
+            private static final long serialVersionUID = 1L;
+
+            @Override
+            public IScalarEvaluator createScalarEvaluator(IEvaluatorContext 
ctx) throws HyracksDataException {
+                return new AbstractBinaryStringEval(ctx, args[0], args[1], 
getIdentifier(), sourceLoc) {
+
+                    private final UTF8StringBuilder stringBuilder = new 
UTF8StringBuilder();
+                    private final GrowableArray stringBuilderArray = new 
GrowableArray();
+
+                    private final IAsterixListBuilder listBuilder = new 
OrderedListBuilder();
+                    private final AbstractCollectionType collectionType =
+                            new AOrderedListType(BuiltinType.ASTRING, 
BuiltinType.ASTRING.getTypeName());
+
+                    @Override
+                    protected void process(UTF8StringPointable srcPtr, 
UTF8StringPointable patternPtr,
+                            IPointable result) throws HyracksDataException {
+                        String[] splits = 
srcPtr.toString().split(patternPtr.toString());
+
+                        // Result is a list of type strings
+                        listBuilder.reset(collectionType);
+
+                        try {
+                            // Add all the splits to the builder
+                            for (String split : splits) {
+                                stringBuilderArray.reset();
+
+                                // Estimated length is number of characters + 
1 (1 byte for string length)
+                                stringBuilder.reset(stringBuilderArray, 
split.length() + 1);
+                                stringBuilder.appendString(split);
+                                stringBuilder.finish();
+
+                                resultStorage.reset();
+                                
dataOutput.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                                
dataOutput.write(stringBuilderArray.getByteArray(), 0, 
stringBuilderArray.getLength());
+                                listBuilder.addItem(resultStorage);
+                            }
+
+                            resultStorage.reset();
+                            listBuilder.write(dataOutput, true);
+                            result.set(resultStorage);
+                        } catch (IOException ex) {
+                            throw HyracksDataException.create(ex);
+                        }
+                    }
+                };
+            }
+        };
+    }
+
+    @Override
+    public FunctionIdentifier getIdentifier() {
+        return BuiltinFunctions.STRING_REGEXP_SPLIT;
+    }
+}
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
index 0b234f5..778df5b 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
@@ -152,6 +152,16 @@ public class RegExpMatcher {
     }
 
     /**
+     * Returns the matched string. This should be called after checking that 
the find()
+     * method returns true.
+     *
+     * @return The matched string
+     */
+    public String group() {
+        return matcher.group();
+    }
+
+    /**
      * @return the first matched position of the regular expression pattern in 
the source string.
      */
     public int position() {
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
index 769f853..515518d 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java
@@ -417,12 +417,14 @@ import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpContainsDescr
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpContainsWithFlagDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpLikeDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpLikeWithFlagDescriptor;
+import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpMatchesDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionOffset1Descriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionOffset1WithFlagDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithFlagDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagDescriptor;
+import 
org.apache.asterix.runtime.evaluators.functions.StringRegExpSplitDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor;
 import org.apache.asterix.runtime.evaluators.functions.StringReplaceDescriptor;
 import 
org.apache.asterix.runtime.evaluators.functions.StringReplaceWithLimitDescriptor;
@@ -999,6 +1001,8 @@ public final class FunctionCollection implements 
IFunctionCollection {
         fc.add(StringRegExpPositionOffset1WithFlagDescriptor.FACTORY);
         fc.add(StringRegExpReplaceDescriptor.FACTORY);
         fc.add(StringRegExpReplaceWithFlagDescriptor.FACTORY);
+        fc.add(StringRegExpMatchesDescriptor.FACTORY);
+        fc.add(StringRegExpSplitDescriptor.FACTORY);
         fc.add(StringInitCapDescriptor.FACTORY);
         fc.add(StringTrimDescriptor.FACTORY);
         fc.add(StringLTrimDescriptor.FACTORY);
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
index 9dafef1..71ca652 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
@@ -45,6 +45,9 @@ public class UTF8CharSequence implements CharSequence {
         if (end != start) {
             carSeq.buf = new char[carSeq.length];
             System.arraycopy(buf, start, carSeq.buf, 0, carSeq.length);
+        } else {
+            // subSequence with start = end will return an empty char[]
+            carSeq.buf = new char[0];
         }
         return carSeq;
     }

Reply via email to