Michael Blow has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/2914
Change subject: [ASTERIXDB-2442][FUN] substring() should return NULL if the operation cannot be performed ...................................................................... [ASTERIXDB-2442][FUN] substring() should return NULL if the operation cannot be performed - user model changes: yes - storage format changes: no - interface changes: no Details: - substring() should return NULL if starting offset is out of bounds for given string or length is negative Change-Id: Ia43a4266a2406ebba65809d527de896ad11fdffa --- M asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql M asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp M asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm M asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java M hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java 15 files changed, 76 insertions(+), 53 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/14/2914/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql index 1c31ea0..f792e7f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql @@ -30,6 +30,9 @@ substring("ABCD", 0, 4), substring("UC Irvine", 3, string-length("UC Irvine") - 3), substring("UC Irvine", 0, string-length("UC Irvine")), -substring(substring("UC Irvine", 3), 0, string-length("Irvine")) +substring(substring("UC Irvine", 3), 0, string-length("Irvine")), +substring('ABCD',-3,2), +substring('ABCD',-10,1), +substring('ABCD',1,-1) ] return $a diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql index ecfcd94..b0e8697 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql @@ -19,4 +19,5 @@ use dataverse test; let $c1 := substring("HEllow",-3) -return {"result1": $c1} +let $c2 := substring("HEllow",-7) +return {"result1": $c1, "result2": $c2} diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp index e1ef68b..d9f936f 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp @@ -32,7 +32,10 @@ substring('ABCD',0,4), substring('UC Irvine',3,(`string-length`('UC Irvine') - 3)), substring('UC Irvine',0,`string-length`('UC Irvine')), - substring(substring('UC Irvine',3),0,`string-length`('Irvine')) + substring(substring('UC Irvine',3),0,`string-length`('Irvine')), + substring('ABCD',-3,2), + substring('ABCD',-10,1), + substring('ABCD',1,-1) ] as a ; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp index ae943d4..6b63289 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp @@ -19,5 +19,7 @@ use test; - -{'result1':test.substring('HEllow',-3)}; +{ + 'result1':substring('HEllow',-3), + 'result2':substring('HEllow',-7) +}; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm index 326e22f..c8cbcf0 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm @@ -1 +1 @@ -{ "str2": "ld", "str4": "g", "str6": "", "str8": "This is a test string", "str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" } +{ "str2": "ld", "str4": "g", "str6": null, "str8": "This is a test string", "str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm index 74bd0d6..4661f40 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm @@ -6,3 +6,6 @@ "Irvine" "UC Irvine" "Irvine" +"BC" +null +null \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm index 04393a4..f60907d 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm @@ -1 +1 @@ -{ "result1": "" } +{ "result1": null } diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm index 197a7af..8355b34 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm @@ -1 +1 @@ -{ "result1": "low" } +{ "result1": "low", "result2": null } diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md index 1c2fd7e..a7b9bbd 100644 --- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md +++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md @@ -536,7 +536,8 @@ * Return Value: * a `string` that represents the substring, * `missing` if any argument is a `missing` value, - * `null` if any argument is a `null` value but no argument is a `missing` value, + * `null` if any argument is a `null` value but no argument is a `missing` value, or if the substring could not + be obtained because the starting offset is not within string bounds or `length` is negative. * a type error will be raised if: * the first argument is any other non-string value, * or, the second argument is not a `tinyint`, `smallint`, `integer`, or `bigint`, diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index 1df617e..d8eeefc 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -1252,7 +1252,7 @@ addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true); addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true); addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true); - addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true); + addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE_NULLABLE, true); addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true); addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true); addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true); diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java index 0f376ba..b2c95c5 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java @@ -24,6 +24,7 @@ import org.apache.asterix.om.exceptions.TypeMismatchException; import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer; import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AUnionType; import org.apache.asterix.om.types.BuiltinType; import org.apache.asterix.om.types.IAType; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; @@ -31,13 +32,16 @@ import org.apache.hyracks.api.exceptions.SourceLocation; public class StringIntToStringTypeComputer extends AbstractResultTypeComputer { - public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(0, 0, 1, 1); + public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(0, 0, 1, 1, false); + + public static final StringIntToStringTypeComputer INSTANCE_NULLABLE = + new StringIntToStringTypeComputer(0, 0, 1, 1, true); public static final StringIntToStringTypeComputer INSTANCE_TRIPLE_STRING = - new StringIntToStringTypeComputer(0, 2, 3, 3); + new StringIntToStringTypeComputer(0, 2, 3, 3, false); public static final StringIntToStringTypeComputer INSTANCE_STRING_REGEXP_REPLACE_WITH_FLAG = - new StringIntToStringTypeComputer(0, 3, 3, 3); + new StringIntToStringTypeComputer(0, 3, 3, 3, false); private final int stringArgIdxMin; @@ -47,11 +51,15 @@ private final int intArgIdxMax; - public StringIntToStringTypeComputer(int stringArgIdxMin, int stringArgIdxMax, int intArgIdxMin, int intArgIdxMax) { + private final boolean nullable; + + public StringIntToStringTypeComputer(int stringArgIdxMin, int stringArgIdxMax, int intArgIdxMin, int intArgIdxMax, + boolean nullable) { this.stringArgIdxMin = stringArgIdxMin; this.stringArgIdxMax = stringArgIdxMax; this.intArgIdxMin = intArgIdxMin; this.intArgIdxMax = intArgIdxMax; + this.nullable = nullable; } @Override @@ -84,7 +92,11 @@ @Override public IAType getResultType(ILogicalExpression expr, IAType... types) throws AlgebricksException { - return BuiltinType.ASTRING; + IAType resultType = BuiltinType.ASTRING; + if (nullable) { + resultType = AUnionType.createNullableType(resultType); + } + return resultType; } private ATypeTag[] getExpectedTypes(boolean expectedStringType, boolean expectedIntType) { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java index a1d46f0..aa31942 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java @@ -21,6 +21,7 @@ import org.apache.asterix.om.exceptions.TypeMismatchException; import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer; import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.AUnionType; import org.apache.asterix.om.types.BuiltinType; import org.apache.asterix.om.types.IAType; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; @@ -53,6 +54,6 @@ @Override public IAType getResultType(ILogicalExpression expr, IAType... types) throws AlgebricksException { - return BuiltinType.ASTRING; + return AUnionType.createNullableType(BuiltinType.ASTRING); } } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java index bda5b2c..52ca6cd 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java @@ -98,19 +98,18 @@ array.reset(); try { int actualStart = start >= 0 ? start - baseOffset : string.getStringLength() + start; - UTF8StringPointable.substr(string, actualStart, Integer.MAX_VALUE, builder, array); - } catch (StringIndexOutOfBoundsException e) { - throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start); + boolean success = + UTF8StringPointable.substr(string, actualStart, Integer.MAX_VALUE, builder, array); + if (success) { + out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + out.write(array.getByteArray(), 0, array.getLength()); + result.set(resultStorage); + } else { + PointableHelper.setNull(result); + } } catch (IOException e) { throw HyracksDataException.create(e); } - try { - out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); - out.write(array.getByteArray(), 0, array.getLength()); - } catch (IOException e) { - throw HyracksDataException.create(e); - } - result.set(resultStorage); } }; } diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java index dd1bd3d..ab0f520 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java @@ -110,20 +110,17 @@ array.reset(); try { int actualStart = start >= 0 ? start - baseOffset : string.getStringLength() + start; - UTF8StringPointable.substr(string, actualStart, len, builder, array); - } catch (StringIndexOutOfBoundsException e) { - throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start + len - 1); + boolean success = UTF8StringPointable.substr(string, actualStart, len, builder, array); + if (success) { + out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); + out.write(array.getByteArray(), 0, array.getLength()); + result.set(resultStorage); + } else { + PointableHelper.setNull(result); + } } catch (IOException e) { throw HyracksDataException.create(e); } - - try { - out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); - out.write(array.getByteArray(), 0, array.getLength()); - } catch (IOException e) { - throw HyracksDataException.create(e); - } - result.set(resultStorage); } }; } diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java index fb0c814..f683615 100644 --- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java +++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java @@ -328,21 +328,25 @@ builder.finish(); } - public void substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out) + /** + * @return {@code true} if substring was successfully written into given {@code out}, or + * {@code false} if substring could not be obtained ({@code charOffset} or {@code charLength} + * are less than 0 or starting position is greater than the input length) + */ + public boolean substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out) throws IOException { - substr(this, charOffset, charLength, builder, out); + return substr(this, charOffset, charLength, builder, out); } - public static void substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder, + /** + * @return {@code true} if substring was successfully written into given {@code out}, or + * {@code false} if substring could not be obtained ({@code charOffset} or {@code charLength} + * are less than 0 or starting position is greater than the input length) + */ + public static boolean substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out) throws IOException { - // Really don't understand why we need to support the charOffset < 0 case. - // At this time, usually there is mistake on user side, we'd better give him a warning. - // assert charOffset >= 0; - if (charOffset < 0) { - charOffset = 0; - } - if (charLength < 0) { - charLength = 0; + if (charOffset < 0 || charLength < 0) { + return false; } int utfLen = src.getUTF8Length(); @@ -353,11 +357,7 @@ chIdx++; } if (byteIdx >= utfLen) { - // Again, why do we tolerant this kind of mistakes? - // throw new StringIndexOutOfBoundsException(charOffset); - builder.reset(out, 0); - builder.finish(); - return; + return false; } builder.reset(out, Math.min(utfLen - byteIdx, (int) (charLength * 1.0 * byteIdx / chIdx))); @@ -368,6 +368,7 @@ byteIdx += src.charSize(src.getMetaDataLength() + byteIdx); } builder.finish(); + return true; } public void substrBefore(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out) -- To view, visit https://asterix-gerrit.ics.uci.edu/2914 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia43a4266a2406ebba65809d527de896ad11fdffa Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: stabilization-f69489 Gerrit-Owner: Michael Blow <mb...@apache.org>