This is an automated email from the ASF dual-hosted git repository. htowaileb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push: new db0e80e [ASTERIXDB-2680][FUN] Add support to regexp_matches() and regexp_split() db0e80e is described below commit db0e80ee91adab00bdcb2a34aa649e1ebfb41a9d Author: Hussain Towaileb <hussain.towai...@couchbase.com> AuthorDate: Wed Dec 4 13:11:29 2019 +0300 [ASTERIXDB-2680][FUN] Add support to regexp_matches() and regexp_split() - user model changes: yes - storage format changes: no - interface changes: no Details: - Added support to regexp_matches(). - Added support to regexp_split(). - Added test cases for regexp_matches(). - Added test cases for regexp_split(). - Changed behavior of UTF8CharSequence when subSequence is called with start = end, originally it returns a null char[] array which causes an NPE, now it returns an empty char[]. Change-Id: Iccf5ba14f5c8b8cf4bcd6dd6e412bb515d68dd74 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/4243 Contrib: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Hussain Towaileb <hussai...@gmail.com> Reviewed-by: Dmitry Lychagin <dmitry.lycha...@couchbase.com> --- .../001/regexp_matches.000.ddl.sqlpp | 30 ++++++ .../001/regexp_matches.001.update.sqlpp | 29 ++++++ .../001/regexp_matches.002.query.sqlpp | 22 ++++ .../001/regexp_matches.003.ddl.sqlpp | 20 ++++ .../002/regexp_matches.000.ddl.sqlpp | 30 ++++++ .../002/regexp_matches.001.update.sqlpp | 28 ++++++ .../002/regexp_matches.002.query.sqlpp | 28 ++++++ .../002/regexp_matches.003.ddl.sqlpp | 20 ++++ .../003/regexp_matches.000.query.sqlpp | 27 +++++ .../004/regexp_matches.000.query.sqlpp | 26 +++++ .../regexp_split/001/regexp_split.000.ddl.sqlpp | 30 ++++++ .../regexp_split/001/regexp_split.001.update.sqlpp | 36 +++++++ .../regexp_split/001/regexp_split.002.query.sqlpp | 22 ++++ .../regexp_split/001/regexp_split.003.ddl.sqlpp | 20 ++++ .../regexp_split/002/regexp_split.000.ddl.sqlpp | 30 ++++++ .../regexp_split/002/regexp_split.001.update.sqlpp | 28 ++++++ .../regexp_split/002/regexp_split.002.query.sqlpp | 28 ++++++ .../regexp_split/002/regexp_split.003.ddl.sqlpp | 20 ++++ .../regexp_split/003/regexp_split.000.query.sqlpp | 34 +++++++ .../regexp_split/004/regexp_split.000.query.sqlpp | 26 +++++ .../regexp_matches/001/regexp_matches.002.adm | 6 ++ .../regexp_matches/002/regexp_matches.002.adm | 1 + .../regexp_matches/003/regexp_matches.000.adm | 1 + .../regexp_matches/004/regexp_matches.000.adm | 1 + .../string/regexp_split/001/regexp_split.002.adm | 13 +++ .../string/regexp_split/002/regexp_split.002.adm | 1 + .../string/regexp_split/003/regexp_split.000.adm | 1 + .../string/regexp_split/004/regexp_split.000.adm | 1 + .../test/resources/runtimets/testsuite_sqlpp.xml | 40 ++++++++ .../lang/common/util/CommonFunctionMapUtil.java | 2 + .../asterix/om/functions/BuiltinFunctions.java | 6 ++ .../functions/AbstractBinaryStringEval.java | 10 +- .../functions/StringRegExpMatchesDescriptor.java | 111 +++++++++++++++++++++ .../functions/StringRegExpSplitDescriptor.java | 108 ++++++++++++++++++++ .../evaluators/functions/utils/RegExpMatcher.java | 10 ++ .../runtime/functions/FunctionCollection.java | 4 + .../hyracks/data/std/util/UTF8CharSequence.java | 3 + 37 files changed, 848 insertions(+), 5 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp new file mode 100644 index 0000000..dda55b0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.000.ddl.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +id: int32 +}; + +drop dataset test if exists; +create dataset test(test) primary key id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp new file mode 100644 index 0000000..e15e988 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.001.update.sqlpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +insert into test([ +{"id": 1, "f1": "So, 'twas better Betty Botter bought a bit of better butter", "f2": "\\b[Bb]\\w+"}, +{"id": 2, "f1": "So, 'twas better Betty Botter bought a bit of better butter", "f2": "\\b[Bb]\\w+ \\b[Bb]\\w+"}, +{"id": 3, "f1": "abracadabra", "f2": "[abc]"}, +{"id": 4, "f1": "abc", "f2": ""}, +{"id": 5, "f1": "abc", "f2": "123"}, +{"id": 6, "f1": "", "f2": ""} +]); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp new file mode 100644 index 0000000..83f2d9a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.002.query.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select value regexp_matches(f1, f2) from test order by id asc; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp new file mode 100644 index 0000000..269f673 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/001/regexp_matches.003.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp new file mode 100644 index 0000000..dda55b0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.000.ddl.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +id: int32 +}; + +drop dataset test if exists; +create dataset test(test) primary key id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp new file mode 100644 index 0000000..d77abf0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.001.update.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +insert into test([ +{"id": 1, "f1": missing, "f2": null}, +{"id": 2, "f1": null, "f2": missing}, +{"id": 3, "f1": null, "f2": "[abc]"}, +{"id": 4, "f1": 13, "f2": ""}, +{"id": 5, "f1": "abc", "f2": true} +]); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp new file mode 100644 index 0000000..2ae8bbb --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.002.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select value [ +(select value regexp_matches(f1, f2) is missing from test where id = 1)[0], +(select value regexp_matches(f1, f2) is missing from test where id = 2)[0], +(select value regexp_matches(f1, f2) is null from test where id = 3)[0], +(select value regex_matches(f1, f2) is null from test where id = 4)[0], +(select value regex_matches(f1, f2) is null from test where id = 5)[0] +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp new file mode 100644 index 0000000..269f673 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/002/regexp_matches.003.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp new file mode 100644 index 0000000..a387704 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/003/regexp_matches.000.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ +regexp_matches("So, 'twas better Betty Botter bought a bit of better butter", "\\b[Bb]\\w+"), +regexp_matches("So, 'twas better Betty Botter bought a bit of better butter", "\\b[Bb]\\w+ \\b[Bb]\\w+"), +regexp_matches("abracadabra", "[abc]"), +regex_matches("abc", ""), +regex_matches("abc", "123"), +regex_matches("", "") +]; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp new file mode 100644 index 0000000..f127ce3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_matches/004/regexp_matches.000.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ +regexp_matches(missing, null) is missing, +regexp_matches(null, missing) is missing, +regexp_matches(null, "[abc]") is null, +regexp_matches(13, "") is null, +regexp_matches("abc", true) is null +]; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp new file mode 100644 index 0000000..dda55b0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.000.ddl.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +id: int32 +}; + +drop dataset test if exists; +create dataset test(test) primary key id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp new file mode 100644 index 0000000..d4f38c5 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.001.update.sqlpp @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +insert into test([ +{"id": 1, "f1": "C:\\Program Files\\asterixdb\\server\\bin", "f2": "[\\\\]"}, +{"id": 2, "f1": "/opt/asterixdb/bin", "f2": "/"}, +{"id": 3, "f1": "a + b - c * d / e < f > g >= h <= i == j", "f2": "\\s*[a-zA-Z]+\\s*"}, +{"id": 4, "f1": "abc", "f2": ""}, +{"id": 5, "f1": "abc", "f2": "/"}, +{"id": 6, "f1": "", "f2": ""}, +{"id": 7, "f1": "", "f2": "1"}, +{"id": 8, "f1": "1", "f2": ""}, +{"id": 9, "f1": "1", "f2": "2"}, +{"id": 10, "f1": "1", "f2": "1"}, +{"id": 11, "f1": "12", "f2": "1"}, +{"id": 12, "f1": "12", "f2": "2"}, +{"id": 13, "f1": "121", "f2": "2"} +]); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp new file mode 100644 index 0000000..897b20b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.002.query.sqlpp @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select value regexp_split(f1, f2) from test order by id asc; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp new file mode 100644 index 0000000..269f673 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/001/regexp_split.003.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp new file mode 100644 index 0000000..dda55b0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.000.ddl.sqlpp @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test if exists; +create dataverse test; +use test; + +drop type test if exists; +create type test as open { +id: int32 +}; + +drop dataset test if exists; +create dataset test(test) primary key id; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp new file mode 100644 index 0000000..d77abf0 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.001.update.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +insert into test([ +{"id": 1, "f1": missing, "f2": null}, +{"id": 2, "f1": null, "f2": missing}, +{"id": 3, "f1": null, "f2": "[abc]"}, +{"id": 4, "f1": 13, "f2": ""}, +{"id": 5, "f1": "abc", "f2": true} +]); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp new file mode 100644 index 0000000..bbb8b7b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.002.query.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use test; + +select value [ +(select value regexp_split(f1, f2) is missing from test where id = 1)[0], +(select value regexp_split(f1, f2) is missing from test where id = 2)[0], +(select value regexp_split(f1, f2) is null from test where id = 3)[0], +(select value regexp_split(f1, f2) is null from test where id = 4)[0], +(select value regexp_split(f1, f2) is null from test where id = 5)[0] +]; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp new file mode 100644 index 0000000..269f673 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/002/regexp_split.003.ddl.sqlpp @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse test; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp new file mode 100644 index 0000000..a48af93 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/003/regexp_split.000.query.sqlpp @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ +regexp_split("C:\\Program Files\\asterixdb\\server\\bin", "[\\\\]"), +regexp_split("/opt/asterixdb/bin", "/"), +regexp_split("a + b - c * d / e < f > g >= h <= i == j", "\\s*[a-zA-Z]+\\s*"), +regexp_split("abc", ""), +regexp_split("abc", "/"), +regexp_split("", ""), +regex_split("", "1"), +regex_split("1", ""), +regex_split("1", "2"), +regex_split("1", "1"), +regex_split("12", "1"), +regex_split("12", "2"), +regex_split("121", "2") +]; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp new file mode 100644 index 0000000..69d0ca7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/regexp_split/004/regexp_split.000.query.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +[ +regexp_split(missing, null) is missing, +regexp_split(null, missing) is missing, +regexp_split(null, "[abc]") is null, +regexp_split(13, "") is null, +regexp_split("abc", true) is null +]; \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm new file mode 100644 index 0000000..1aa8d8c --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/001/regexp_matches.002.adm @@ -0,0 +1,6 @@ +[ "better", "Betty", "Botter", "bought", "bit", "better", "butter" ] +[ "better Betty", "Botter bought", "better butter" ] +[ "a", "b", "a", "c", "a", "a", "b", "a" ] +[ "", "", "", "" ] +[ ] +[ "" ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm new file mode 100644 index 0000000..2f00e1d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/002/regexp_matches.002.adm @@ -0,0 +1 @@ +[ true, true, true, true, true ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm new file mode 100644 index 0000000..32f80be --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/003/regexp_matches.000.adm @@ -0,0 +1 @@ +[ [ "better", "Betty", "Botter", "bought", "bit", "better", "butter" ], [ "better Betty", "Botter bought", "better butter" ], [ "a", "b", "a", "c", "a", "a", "b", "a" ], [ "", "", "", "" ], [ ], [ "" ] ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm new file mode 100644 index 0000000..2f00e1d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_matches/004/regexp_matches.000.adm @@ -0,0 +1 @@ +[ true, true, true, true, true ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm new file mode 100644 index 0000000..96a4a2e --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/001/regexp_split.002.adm @@ -0,0 +1,13 @@ +[ "C:", "Program Files", "asterixdb", "server", "bin" ] +[ "", "opt", "asterixdb", "bin" ] +[ "", "+", "-", "*", "/", "<", ">", ">=", "<=", "==" ] +[ "a", "b", "c" ] +[ "abc" ] +[ "" ] +[ "" ] +[ "1" ] +[ "1" ] +[ ] +[ "", "2" ] +[ "1" ] +[ "1", "1" ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm new file mode 100644 index 0000000..2f00e1d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/002/regexp_split.002.adm @@ -0,0 +1 @@ +[ true, true, true, true, true ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm new file mode 100644 index 0000000..796e544 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/003/regexp_split.000.adm @@ -0,0 +1 @@ +[ [ "C:", "Program Files", "asterixdb", "server", "bin" ], [ "", "opt", "asterixdb", "bin" ], [ "", "+", "-", "*", "/", "<", ">", ">=", "<=", "==" ], [ "a", "b", "c" ], [ "abc" ], [ "" ], [ "" ], [ "1" ], [ "1" ], [ ], [ "", "2" ], [ "1" ], [ "1", "1" ] ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm new file mode 100644 index 0000000..2f00e1d --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_split/004/regexp_split.000.adm @@ -0,0 +1 @@ +[ true, true, true, true, true ] \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 5bb68e0..201ec6b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -9213,6 +9213,26 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="regexp_matches/001"> + <output-dir compare="Text">regexp_matches/001</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_matches/002"> + <output-dir compare="Text">regexp_matches/002</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_matches/003"> + <output-dir compare="Text">regexp_matches/003</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_matches/004"> + <output-dir compare="Text">regexp_matches/004</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="regexp_position/offset0/regexp_position"> <output-dir compare="Text">regexp_position/offset0/regexp_position</output-dir> </compilation-unit> @@ -9333,6 +9353,26 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="regexp_split/001"> + <output-dir compare="Text">regexp_split/001</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_split/002"> + <output-dir compare="Text">regexp_split/002</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_split/003"> + <output-dir compare="Text">regexp_split/003</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> + <compilation-unit name="regexp_split/004"> + <output-dir compare="Text">regexp_split/004</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="repeat"> <output-dir compare="Text">repeat</output-dir> </compilation-unit> diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java index a7ec834..c87f4dc 100644 --- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java +++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/CommonFunctionMapUtil.java @@ -70,6 +70,8 @@ public class CommonFunctionMapUtil { addFunctionMapping("regex_position1", "regexp-position1"); addFunctionMapping("regexp_pos1", "regexp-position1"); addFunctionMapping("regex_replace", "regexp-replace"); + addFunctionMapping("regex_matches", "regexp-matches"); + addFunctionMapping("regex_split", "regexp-split"); // Type functions. addFunctionMapping("isnull", "is-null"); // isnull, internal: is-null diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index 53ed003..94303a7 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -440,6 +440,10 @@ public class BuiltinFunctions { new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-replace", 3); public static final FunctionIdentifier STRING_REGEXP_REPLACE_WITH_FLAG = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-replace", 4); + public static final FunctionIdentifier STRING_REGEXP_MATCHES = + new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-matches", 2); + public static final FunctionIdentifier STRING_REGEXP_SPLIT = + new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "regexp-split", 2); public static final FunctionIdentifier STRING_LOWERCASE = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, "lowercase", 1); public static final FunctionIdentifier STRING_UPPERCASE = @@ -1744,6 +1748,8 @@ public class BuiltinFunctions { addFunction(STRING_REGEXP_REPLACE, UniformInputTypeComputer.STRING_STRING_INSTANCE, true); addFunction(STRING_REGEXP_REPLACE_WITH_FLAG, AStringTypeComputer.INSTANCE_NULLABLE, true); addFunction(STRING_REPLACE, UniformInputTypeComputer.STRING_STRING_INSTANCE, true); + addFunction(STRING_REGEXP_MATCHES, UniformInputTypeComputer.STRING_STRING_LIST_INSTANCE, true); + addFunction(STRING_REGEXP_SPLIT, UniformInputTypeComputer.STRING_STRING_LIST_INSTANCE, true); addFunction(STRING_REPLACE_WITH_LIMIT, AStringTypeComputer.INSTANCE_NULLABLE, true); addFunction(STRING_REVERSE, UniformInputTypeComputer.STRING_STRING_INSTANCE, true); addFunction(SUBSTRING_BEFORE, UniformInputTypeComputer.STRING_STRING_INSTANCE, true); diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java index 65fba47..2fc8654 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/AbstractBinaryStringEval.java @@ -46,8 +46,8 @@ public abstract class AbstractBinaryStringEval implements IScalarEvaluator { // Argument pointables. private final IPointable argPtrLeft = new VoidPointable(); private final IPointable argPtrSecond = new VoidPointable(); - private final UTF8StringPointable leftPtr = new UTF8StringPointable(); - private final UTF8StringPointable rightPtr = new UTF8StringPointable(); + private final UTF8StringPointable leftStringPointable = new UTF8StringPointable(); + private final UTF8StringPointable rightStringPointable = new UTF8StringPointable(); // For results. protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage(); @@ -100,12 +100,12 @@ public abstract class AbstractBinaryStringEval implements IScalarEvaluator { } // Sets StringUTF8Pointables. - leftPtr.set(bytes0, offset0 + 1, len0 - 1); - rightPtr.set(bytes1, offset1 + 1, len1 - 1); + leftStringPointable.set(bytes0, offset0 + 1, len0 - 1); + rightStringPointable.set(bytes1, offset1 + 1, len1 - 1); // The actual processing. try { - process(leftPtr, rightPtr, resultPointable); + process(leftStringPointable, rightStringPointable, resultPointable); } catch (IOException e) { throw HyracksDataException.create(e); } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java new file mode 100644 index 0000000..595203b --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpMatchesDescriptor.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.builders.IAsterixListBuilder; +import org.apache.asterix.builders.OrderedListBuilder; +import org.apache.asterix.common.annotations.MissingNullInOutFunction; +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AbstractCollectionType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.asterix.runtime.evaluators.functions.utils.RegExpMatcher; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +/** + * This function takes 2 arguments, a string, and a pattern + */ +@MissingNullInOutFunction +public class StringRegExpMatchesDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = StringRegExpMatchesDescriptor::new; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(IEvaluatorContext ctx) throws HyracksDataException { + return new AbstractBinaryStringEval(ctx, args[0], args[1], getIdentifier(), sourceLoc) { + private final RegExpMatcher matcher = new RegExpMatcher(); + + private final UTF8StringBuilder stringBuilder = new UTF8StringBuilder(); + private final GrowableArray stringBuilderArray = new GrowableArray(); + + private final IAsterixListBuilder listBuilder = new OrderedListBuilder(); + private final AbstractCollectionType collectionType = + new AOrderedListType(BuiltinType.ASTRING, BuiltinType.ASTRING.getTypeName()); + + @Override + protected void process(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, + IPointable result) throws HyracksDataException { + matcher.build(srcPtr, patternPtr); + + // Result is a list of type strings + listBuilder.reset(collectionType); + + try { + // Add all the matches to the builder + while (matcher.find()) { + String match = matcher.group(); + stringBuilderArray.reset(); + + // Estimated length is number of characters + 1 (1 byte for string length) + stringBuilder.reset(stringBuilderArray, match.length() + 1); + stringBuilder.appendString(match); + stringBuilder.finish(); + + resultStorage.reset(); + dataOutput.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + dataOutput.write(stringBuilderArray.getByteArray(), 0, stringBuilderArray.getLength()); + listBuilder.addItem(resultStorage); + } + + resultStorage.reset(); + listBuilder.write(dataOutput, true); + result.set(resultStorage); + } catch (IOException ex) { + throw HyracksDataException.create(ex); + } + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return BuiltinFunctions.STRING_REGEXP_MATCHES; + } +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java new file mode 100644 index 0000000..da6a206 --- /dev/null +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.runtime.evaluators.functions; + +import java.io.IOException; + +import org.apache.asterix.builders.IAsterixListBuilder; +import org.apache.asterix.builders.OrderedListBuilder; +import org.apache.asterix.common.annotations.MissingNullInOutFunction; +import org.apache.asterix.om.functions.BuiltinFunctions; +import org.apache.asterix.om.functions.IFunctionDescriptorFactory; +import org.apache.asterix.om.types.AOrderedListType; +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AbstractCollectionType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; +import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; +import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator; +import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.data.std.api.IPointable; +import org.apache.hyracks.data.std.primitive.UTF8StringPointable; +import org.apache.hyracks.data.std.util.GrowableArray; +import org.apache.hyracks.data.std.util.UTF8StringBuilder; + +/** + * This function takes 2 arguments, a string, and a pattern + */ +@MissingNullInOutFunction +public class StringRegExpSplitDescriptor extends AbstractScalarFunctionDynamicDescriptor { + private static final long serialVersionUID = 1L; + + public static final IFunctionDescriptorFactory FACTORY = StringRegExpSplitDescriptor::new; + + @Override + public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args) { + return new IScalarEvaluatorFactory() { + private static final long serialVersionUID = 1L; + + @Override + public IScalarEvaluator createScalarEvaluator(IEvaluatorContext ctx) throws HyracksDataException { + return new AbstractBinaryStringEval(ctx, args[0], args[1], getIdentifier(), sourceLoc) { + + private final UTF8StringBuilder stringBuilder = new UTF8StringBuilder(); + private final GrowableArray stringBuilderArray = new GrowableArray(); + + private final IAsterixListBuilder listBuilder = new OrderedListBuilder(); + private final AbstractCollectionType collectionType = + new AOrderedListType(BuiltinType.ASTRING, BuiltinType.ASTRING.getTypeName()); + + @Override + protected void process(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, + IPointable result) throws HyracksDataException { + String[] splits = srcPtr.toString().split(patternPtr.toString()); + + // Result is a list of type strings + listBuilder.reset(collectionType); + + try { + // Add all the splits to the builder + for (String split : splits) { + stringBuilderArray.reset(); + + // Estimated length is number of characters + 1 (1 byte for string length) + stringBuilder.reset(stringBuilderArray, split.length() + 1); + stringBuilder.appendString(split); + stringBuilder.finish(); + + resultStorage.reset(); + dataOutput.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + dataOutput.write(stringBuilderArray.getByteArray(), 0, stringBuilderArray.getLength()); + listBuilder.addItem(resultStorage); + } + + resultStorage.reset(); + listBuilder.write(dataOutput, true); + result.set(resultStorage); + } catch (IOException ex) { + throw HyracksDataException.create(ex); + } + } + }; + } + }; + } + + @Override + public FunctionIdentifier getIdentifier() { + return BuiltinFunctions.STRING_REGEXP_SPLIT; + } +} diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java index 0b234f5..778df5b 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java @@ -152,6 +152,16 @@ public class RegExpMatcher { } /** + * Returns the matched string. This should be called after checking that the find() + * method returns true. + * + * @return The matched string + */ + public String group() { + return matcher.group(); + } + + /** * @return the first matched position of the regular expression pattern in the source string. */ public int position() { diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java index 769f853..515518d 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionCollection.java @@ -417,12 +417,14 @@ import org.apache.asterix.runtime.evaluators.functions.StringRegExpContainsDescr import org.apache.asterix.runtime.evaluators.functions.StringRegExpContainsWithFlagDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpLikeDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpLikeWithFlagDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringRegExpMatchesDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionOffset1Descriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionOffset1WithFlagDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpPositionWithFlagDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRegExpReplaceWithFlagDescriptor; +import org.apache.asterix.runtime.evaluators.functions.StringRegExpSplitDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringRepeatDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringReplaceDescriptor; import org.apache.asterix.runtime.evaluators.functions.StringReplaceWithLimitDescriptor; @@ -999,6 +1001,8 @@ public final class FunctionCollection implements IFunctionCollection { fc.add(StringRegExpPositionOffset1WithFlagDescriptor.FACTORY); fc.add(StringRegExpReplaceDescriptor.FACTORY); fc.add(StringRegExpReplaceWithFlagDescriptor.FACTORY); + fc.add(StringRegExpMatchesDescriptor.FACTORY); + fc.add(StringRegExpSplitDescriptor.FACTORY); fc.add(StringInitCapDescriptor.FACTORY); fc.add(StringTrimDescriptor.FACTORY); fc.add(StringLTrimDescriptor.FACTORY); diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java index 9dafef1..71ca652 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java @@ -45,6 +45,9 @@ public class UTF8CharSequence implements CharSequence { if (end != start) { carSeq.buf = new char[carSeq.length]; System.arraycopy(buf, start, carSeq.buf, 0, carSeq.length); + } else { + // subSequence with start = end will return an empty char[] + carSeq.buf = new char[0]; } return carSeq; }