Yingyi Bu has submitted this change and it was merged. Change subject: [ASTERIXDB-1664][FUN][DOC] Fix position/regexp_postion to be 1-based ......................................................................
[ASTERIXDB-1664][FUN][DOC] Fix position/regexp_postion to be 1-based - user model changes: no - storage format changes: no - interface changes: no Details: - Change function position and regexp_position to be 1-based; - Re-organize function documentations. Change-Id: I415e0ba0ac4dfceba95913ff27af95c0c1af5fee Reviewed-on: https://asterix-gerrit.ics.uci.edu/1888 Sonar-Qube: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> BAD: Jenkins <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> --- M asterixdb/asterix-app/src/test/resources/runtimets/results/string/position/position.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position/regexp_position.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position_with_flag/regexp_position_with_flag.1.adm M asterixdb/asterix-doc/pom.xml R asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_common.md A asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_delta.md R asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md A asterixdb/asterix-doc/src/main/markdown/builtins/2_string_delta.md M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionDescriptor.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java 11 files changed, 252 insertions(+), 213 deletions(-) Approvals: Dmitry Lychagin: Looks good to me, approved Jenkins: Verified; No violations found; No violations found; Verified diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/position/position.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/position/position.1.adm index 76e3e97..5c54d04 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/position/position.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/position/position.1.adm @@ -1 +1 @@ -[ 2, -1 ] +[ 3, -1 ] diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position/regexp_position.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position/regexp_position.1.adm index 428917f..83ee615 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position/regexp_position.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position/regexp_position.1.adm @@ -1,15 +1,15 @@ -0 +1 -1 -0 -0 -0 +1 +1 +1 -1 -0 +1 -1 -1 -0 -0 -0 -0 +1 +1 +1 +1 -1 -0 +1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position_with_flag/regexp_position_with_flag.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position_with_flag/regexp_position_with_flag.1.adm index fa99af7..f962896 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position_with_flag/regexp_position_with_flag.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_position_with_flag/regexp_position_with_flag.1.adm @@ -1 +1 @@ -{ "result1": 0 } +{ "result1": 1 } diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml index 9b980e1..d2fbefc 100644 --- a/asterixdb/asterix-doc/pom.xml +++ b/asterixdb/asterix-doc/pom.xml @@ -56,7 +56,7 @@ <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_query.md,4_error_title.md,4_error.md,5_ddl.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md" /> </concat> <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/builtins.md"> - <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,1_numeric.md,2_string.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md" /> + <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,1_numeric_common.md,1_numeric_delta.md,2_string_common.md,2_string_delta.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md" /> </concat> <concat destfile="${project.build.directory}/generated-site/markdown/aql/builtins.md"> <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,1_numeric.md,2_string.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_aql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md" /> diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric.md b/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_common.md similarity index 90% rename from asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric.md rename to asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_common.md index 563860c..9a4c080 100644 --- a/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric.md +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_common.md @@ -357,44 +357,6 @@ { "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0 } -### round_half_to_even ### - * Syntax: - - round_half_to_even(numeric_value, [precision]) - - * Computes the closest numeric value to `numeric_value` that is a multiple of ten to the power of minus `precision`. - `precision` is optional and by default value `0` is used. - * Arguments: - * `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. - * `precision`: an optional `tinyint`/`smallint`/`integer`/`bigint` field representing the - number of digits in the fraction of the the result - * Return Value: - * The rounded value for the given number in the same type as the input argument, - * `missing` if any argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, - * a type error will be raised if: - * the first argument is any other non-numeric value, - * or, the second argument is any other non-tinyint, non-smallint, non-integer, or non-bigint value. - - * Example: - - { - "v1": round_half_to_even(2013), - "v2": round_half_to_even(-4036), - "v3": round_half_to_even(0.8), - "v4": round_half_to_even(float("-2013.256")), - "v5": round_half_to_even(double("-2013.893823748327284")), - "v6": round_half_to_even(double("-2013.893823748327284"), 2), - "v7": round_half_to_even(2013, 4), - "v8": round_half_to_even(float("-2013.256"), 5) - }; - - - * The expected result is: - - { "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0, "v6": -2013.89, "v7": 2013, "v8": -2013.256 } - - ### sign ### * Syntax: diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_delta.md b/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_delta.md new file mode 100644 index 0000000..151233e --- /dev/null +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/1_numeric_delta.md @@ -0,0 +1,57 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> + +### round_half_to_even ### + * Syntax: + + round_half_to_even(numeric_value, [precision]) + + * Computes the closest numeric value to `numeric_value` that is a multiple of ten to the power of minus `precision`. + `precision` is optional and by default value `0` is used. + * Arguments: + * `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. + * `precision`: an optional `tinyint`/`smallint`/`integer`/`bigint` field representing the + number of digits in the fraction of the the result + * Return Value: + * The rounded value for the given number in the same type as the input argument, + * `missing` if any argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, + * a type error will be raised if: + * the first argument is any other non-numeric value, + * or, the second argument is any other non-tinyint, non-smallint, non-integer, or non-bigint value. + + * Example: + + { + "v1": round_half_to_even(2013), + "v2": round_half_to_even(-4036), + "v3": round_half_to_even(0.8), + "v4": round_half_to_even(float("-2013.256")), + "v5": round_half_to_even(double("-2013.893823748327284")), + "v6": round_half_to_even(double("-2013.893823748327284"), 2), + "v7": round_half_to_even(2013, 4), + "v8": round_half_to_even(float("-2013.256"), 5) + }; + + + * The expected result is: + + { "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0, "v6": -2013.89, "v7": 2013, "v8": -2013.256 } + + diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md similarity index 77% rename from asterixdb/asterix-doc/src/main/markdown/builtins/2_string.md rename to asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md index 605fd8d..95d0550 100644 --- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string.md +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md @@ -227,7 +227,7 @@ * The expected result is: - { "v1": 2, "v2": -1 } + { "v1": 3, "v2": -1 } ### regexp_contains ### @@ -322,7 +322,7 @@ * The expected result is: - { "v1": 0, "v2": -1 } + { "v1": 1, "v2": -1 } ### regexp_replace ### @@ -463,112 +463,6 @@ { "v1": true, "v2": false } -### string_concat ### - * Syntax: - - string_concat(array) - - * Concatenates an array of strings `array` into a single string. - * Arguments: - * `array` : an `array` or `multiset` of `string`s (could be `null` or `missing`) to be concatenated. - * Return Value: - * the concatenated `string` value, - * `missing` if the argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, - * `missing` if any element in the input array is `missing`, - * `null` if any element in the input array is `null` but no element in the input array is `missing`, - * any other non-array input value or non-integer element in the input array will cause a type error. - - * Example: - - string_concat(["ASTERIX", " ", "ROCKS!"]); - - - * The expected result is: - - "ASTERIX ROCKS!" - - -### string_join ### - * Syntax: - - string_join(array, string) - - * Joins an array or multiset of strings `array` with the given separator `string` into a single string. - * Arguments: - * `array` : an `array` or `multiset` of strings (could be `null`) to be joined. - * `string` : a `string` to serve as the separator. - * Return Value: - * the joined `string`, - * `missing` if any argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, - * `missing` if the first argument array contains a `missing`, - * `null` if the first argument array contains a `null` but does not contain a `missing`, - * a type error will be raised if: - * the first argument is any other non-array value, or contains any other non-string value, - * or, the second argument is any other non-string value. - - * Example: - - string_join(["ASTERIX", "ROCKS~"], "!! "); - - - * The expected result is: - - "ASTERIX!! ROCKS~" - - -### string_to_codepoint ### - * Syntax: - - string_to_codepoint(string) - - * Converts the string `string` to its code_based representation. - * Arguments: - * `string` : a `string` that will be converted. - * Return Value: - * an `array` of the code points for the string `string`, - * `missing` if the argument is a `missing` value, - * `null` if the argument is a `null` value, - * any other non-string input value will cause a type error. - - * Example: - - string_to_codepoint("Hello ASTERIX!"); - - - * The expected result is: - - [ 72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33 ] - - -### codepoint_to_string ### - * Syntax: - - codepoint_to_string(array) - - * Converts the ordered code_based representation `array` to the corresponding string. - * Arguments: - * `array` : an `array` of integer code_points. - * Return Value: - * a `string` representation of `array`. - * `missing` if the argument is a `missing` value, - * `null` if the argument is a `null` value, - * `missing` if any element in the input array is `missing`, - * `null` if any element in the input array is `null` but no element in the input array is `missing`, - * any other non-array input value or non-integer element in the input array will cause a type error. - - * Example: - - codepoint_to_string([72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33]); - - - * The expected result is: - - "Hello ASTERIX!" - - - ### substr ### * Syntax: @@ -596,57 +490,6 @@ * The expected result is: "str" - - -### substring_before ### - * Syntax: - - substring_before(string, string_pattern) - - * Returns the substring from the given string `string` before the given pattern `string_pattern`. - * Arguments: - * `string` : a `string` to be extracted. - * `string_pattern` : a `string` pattern to be searched. - * Return Value: - * a `string` that represents the substring, - * `missing` if any argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, - * any other non-string input value will cause a type error. - - * Example: - - substring_before(" like x-phone", "x-phone"); - - - * The expected result is: - - " like " - - -### substring_after ### - * Syntax: - - substring_after(string, string_pattern); - - * Returns the substring from the given string `string` after the given pattern `string_pattern`. - * Arguments: - * `string` : a `string` to be extracted. - * `string_pattern` : a `string` pattern to be searched. - * Return Value: - * a `string` that represents the substring, - * `missing` if any argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, - * any other non-string input value will cause a type error. - - - * Example: - - substring_after(" like x-phone", "xph"); - - - * The expected result is: - - "one" ### trim ### diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_delta.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_delta.md new file mode 100644 index 0000000..fcf1c20 --- /dev/null +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_delta.md @@ -0,0 +1,174 @@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> + +### string_concat ### + * Syntax: + + string_concat(array) + + * Concatenates an array of strings `array` into a single string. + * Arguments: + * `array` : an `array` or `multiset` of `string`s (could be `null` or `missing`) to be concatenated. + * Return Value: + * the concatenated `string` value, + * `missing` if the argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, + * `missing` if any element in the input array is `missing`, + * `null` if any element in the input array is `null` but no element in the input array is `missing`, + * any other non-array input value or non-integer element in the input array will cause a type error. + + * Example: + + string_concat(["ASTERIX", " ", "ROCKS!"]); + + + * The expected result is: + + "ASTERIX ROCKS!" + + +### string_join ### + * Syntax: + + string_join(array, string) + + * Joins an array or multiset of strings `array` with the given separator `string` into a single string. + * Arguments: + * `array` : an `array` or `multiset` of strings (could be `null`) to be joined. + * `string` : a `string` to serve as the separator. + * Return Value: + * the joined `string`, + * `missing` if any argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, + * `missing` if the first argument array contains a `missing`, + * `null` if the first argument array contains a `null` but does not contain a `missing`, + * a type error will be raised if: + * the first argument is any other non-array value, or contains any other non-string value, + * or, the second argument is any other non-string value. + + * Example: + + string_join(["ASTERIX", "ROCKS~"], "!! "); + + + * The expected result is: + + "ASTERIX!! ROCKS~" + + +### string_to_codepoint ### + * Syntax: + + string_to_codepoint(string) + + * Converts the string `string` to its code_based representation. + * Arguments: + * `string` : a `string` that will be converted. + * Return Value: + * an `array` of the code points for the string `string`, + * `missing` if the argument is a `missing` value, + * `null` if the argument is a `null` value, + * any other non-string input value will cause a type error. + + * Example: + + string_to_codepoint("Hello ASTERIX!"); + + + * The expected result is: + + [ 72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33 ] + + +### codepoint_to_string ### + * Syntax: + + codepoint_to_string(array) + + * Converts the ordered code_based representation `array` to the corresponding string. + * Arguments: + * `array` : an `array` of integer code_points. + * Return Value: + * a `string` representation of `array`. + * `missing` if the argument is a `missing` value, + * `null` if the argument is a `null` value, + * `missing` if any element in the input array is `missing`, + * `null` if any element in the input array is `null` but no element in the input array is `missing`, + * any other non-array input value or non-integer element in the input array will cause a type error. + + * Example: + + codepoint_to_string([72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33]); + + + * The expected result is: + + "Hello ASTERIX!" + + +### substring_before ### + * Syntax: + + substring_before(string, string_pattern) + + * Returns the substring from the given string `string` before the given pattern `string_pattern`. + * Arguments: + * `string` : a `string` to be extracted. + * `string_pattern` : a `string` pattern to be searched. + * Return Value: + * a `string` that represents the substring, + * `missing` if any argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, + * any other non-string input value will cause a type error. + + * Example: + + substring_before(" like x-phone", "x-phone"); + + + * The expected result is: + + " like " + + +### substring_after ### + * Syntax: + + substring_after(string, string_pattern); + + * Returns the substring from the given string `string` after the given pattern `string_pattern`. + * Arguments: + * `string` : a `string` to be extracted. + * `string_pattern` : a `string` pattern to be searched. + * Return Value: + * a `string` that represents the substring, + * `missing` if any argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, + * any other non-string input value will cause a type error. + + + * Example: + + substring_after(" like x-phone", "xph"); + + + * The expected result is: + + "one" + diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java index 523207c..6f4d116 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringPositionDescriptor.java @@ -54,7 +54,8 @@ @Override protected int compute(UTF8StringPointable left, UTF8StringPointable right) throws IOException { - return UTF8StringPointable.find(left, right, false); + int pos = UTF8StringPointable.find(left, right, false); + return pos < 0 ? pos : pos + 1; } }; } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionDescriptor.java index a41fb56..1bb0fdc 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionDescriptor.java @@ -58,7 +58,8 @@ protected int compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr) throws IOException { matcher.build(srcPtr, patternPtr); - return matcher.postion(); + int pos = matcher.postion(); + return pos < 0 ? pos : pos + 1; } }; } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java index ea04896..35eb3b1 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpPositionWithFlagDescriptor.java @@ -58,7 +58,8 @@ protected int compute(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, UTF8StringPointable flagPtr) throws IOException { matcher.build(srcPtr, patternPtr, flagPtr); - return matcher.postion(); + int pos = matcher.postion(); + return pos < 0 ? pos : pos + 1; } }; } -- To view, visit https://asterix-gerrit.ics.uci.edu/1888 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I415e0ba0ac4dfceba95913ff27af95c0c1af5fee Gerrit-PatchSet: 5 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Yingyi Bu <[email protected]> Gerrit-Reviewer: Dmitry Lychagin <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Yingyi Bu <[email protected]>
