Michael Blow has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/2914

Change subject: [ASTERIXDB-2442][FUN] substring() should return NULL if the 
operation cannot be performed
......................................................................

[ASTERIXDB-2442][FUN] substring() should return NULL if the operation cannot be 
performed

- user model changes: yes
- storage format changes: no
- interface changes: no

Details:
- substring() should return NULL if starting offset is out of bounds
      for given string or length is negative

Change-Id: Ia43a4266a2406ebba65809d527de896ad11fdffa
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
M asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
M 
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
M 
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
M 
hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
15 files changed, 76 insertions(+), 53 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/14/2914/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
index 1c31ea0..f792e7f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
@@ -30,6 +30,9 @@
 substring("ABCD", 0, 4),
 substring("UC Irvine", 3, string-length("UC Irvine") - 3),
 substring("UC Irvine", 0, string-length("UC Irvine")),
-substring(substring("UC Irvine", 3), 0, string-length("Irvine"))
+substring(substring("UC Irvine", 3), 0, string-length("Irvine")),
+substring('ABCD',-3,2),
+substring('ABCD',-10,1),
+substring('ABCD',1,-1)
 ]
 return $a
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
index ecfcd94..b0e8697 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
@@ -19,4 +19,5 @@
 use dataverse test;
 
 let $c1 := substring("HEllow",-3)
-return {"result1": $c1}
+let $c2 := substring("HEllow",-7)
+return {"result1": $c1, "result2": $c2}
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
index e1ef68b..d9f936f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
@@ -32,7 +32,10 @@
     substring('ABCD',0,4),
     substring('UC Irvine',3,(`string-length`('UC Irvine') - 3)),
     substring('UC Irvine',0,`string-length`('UC Irvine')),
-    substring(substring('UC Irvine',3),0,`string-length`('Irvine'))
+    substring(substring('UC Irvine',3),0,`string-length`('Irvine')),
+    substring('ABCD',-3,2),
+    substring('ABCD',-10,1),
+    substring('ABCD',1,-1)
   ] as a
 
 ;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
index ae943d4..6b63289 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
@@ -19,5 +19,7 @@
 
 use test;
 
-
-{'result1':test.substring('HEllow',-3)};
+{
+  'result1':substring('HEllow',-3),
+  'result2':substring('HEllow',-7)
+};
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
index 326e22f..c8cbcf0 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
@@ -1 +1 @@
-{ "str2": "ld", "str4": "g", "str6": "", "str8": "This is a test string", 
"str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" 
}
+{ "str2": "ld", "str4": "g", "str6": null, "str8": "This is a test string", 
"str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" 
}
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
index 74bd0d6..4661f40 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
@@ -6,3 +6,6 @@
 "Irvine"
 "UC Irvine"
 "Irvine"
+"BC"
+null
+null
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
index 04393a4..f60907d 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
@@ -1 +1 @@
-{ "result1": "" }
+{ "result1": null }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
index 197a7af..8355b34 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
@@ -1 +1 @@
-{ "result1": "low" }
+{ "result1": "low", "result2": null }
diff --git 
a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md 
b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
index 1c2fd7e..a7b9bbd 100644
--- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
+++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
@@ -536,7 +536,8 @@
  * Return Value:
     * a `string` that represents the substring,
     * `missing` if any argument is a `missing` value,
-    * `null` if any argument is a `null` value but no argument is a `missing` 
value,
+    * `null` if any argument is a `null` value but no argument is a `missing` 
value, or if the substring could not
+             be obtained because the starting offset is not within string 
bounds or `length` is negative.
     * a type error will be raised if:
         * the first argument is any other non-string value,
         * or, the second argument is not a `tinyint`, `smallint`, `integer`, 
or `bigint`,
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 1df617e..d8eeefc 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -1252,7 +1252,7 @@
         addFunction(STRING_TO_CODEPOINT, 
StringToInt64ListTypeComputer.INSTANCE, true);
         addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
         addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
-        addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true);
+        addFunction(SUBSTRING2, 
StringIntToStringTypeComputer.INSTANCE_NULLABLE, true);
         addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, 
true);
         addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);
         addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true);
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
index 0f376ba..b2c95c5 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
@@ -24,6 +24,7 @@
 import org.apache.asterix.om.exceptions.TypeMismatchException;
 import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
 import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
 import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.IAType;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -31,13 +32,16 @@
 import org.apache.hyracks.api.exceptions.SourceLocation;
 
 public class StringIntToStringTypeComputer extends AbstractResultTypeComputer {
-    public static final StringIntToStringTypeComputer INSTANCE = new 
StringIntToStringTypeComputer(0, 0, 1, 1);
+    public static final StringIntToStringTypeComputer INSTANCE = new 
StringIntToStringTypeComputer(0, 0, 1, 1, false);
+
+    public static final StringIntToStringTypeComputer INSTANCE_NULLABLE =
+            new StringIntToStringTypeComputer(0, 0, 1, 1, true);
 
     public static final StringIntToStringTypeComputer INSTANCE_TRIPLE_STRING =
-            new StringIntToStringTypeComputer(0, 2, 3, 3);
+            new StringIntToStringTypeComputer(0, 2, 3, 3, false);
 
     public static final StringIntToStringTypeComputer 
INSTANCE_STRING_REGEXP_REPLACE_WITH_FLAG =
-            new StringIntToStringTypeComputer(0, 3, 3, 3);
+            new StringIntToStringTypeComputer(0, 3, 3, 3, false);
 
     private final int stringArgIdxMin;
 
@@ -47,11 +51,15 @@
 
     private final int intArgIdxMax;
 
-    public StringIntToStringTypeComputer(int stringArgIdxMin, int 
stringArgIdxMax, int intArgIdxMin, int intArgIdxMax) {
+    private final boolean nullable;
+
+    public StringIntToStringTypeComputer(int stringArgIdxMin, int 
stringArgIdxMax, int intArgIdxMin, int intArgIdxMax,
+            boolean nullable) {
         this.stringArgIdxMin = stringArgIdxMin;
         this.stringArgIdxMax = stringArgIdxMax;
         this.intArgIdxMin = intArgIdxMin;
         this.intArgIdxMax = intArgIdxMax;
+        this.nullable = nullable;
     }
 
     @Override
@@ -84,7 +92,11 @@
 
     @Override
     public IAType getResultType(ILogicalExpression expr, IAType... types) 
throws AlgebricksException {
-        return BuiltinType.ASTRING;
+        IAType resultType = BuiltinType.ASTRING;
+        if (nullable) {
+            resultType = AUnionType.createNullableType(resultType);
+        }
+        return resultType;
     }
 
     private ATypeTag[] getExpectedTypes(boolean expectedStringType, boolean 
expectedIntType) {
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
index a1d46f0..aa31942 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
@@ -21,6 +21,7 @@
 import org.apache.asterix.om.exceptions.TypeMismatchException;
 import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
 import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
 import org.apache.asterix.om.types.BuiltinType;
 import org.apache.asterix.om.types.IAType;
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -53,6 +54,6 @@
 
     @Override
     public IAType getResultType(ILogicalExpression expr, IAType... types) 
throws AlgebricksException {
-        return BuiltinType.ASTRING;
+        return AUnionType.createNullableType(BuiltinType.ASTRING);
     }
 }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
index bda5b2c..52ca6cd 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
@@ -98,19 +98,18 @@
                         array.reset();
                         try {
                             int actualStart = start >= 0 ? start - baseOffset 
: string.getStringLength() + start;
-                            UTF8StringPointable.substr(string, actualStart, 
Integer.MAX_VALUE, builder, array);
-                        } catch (StringIndexOutOfBoundsException e) {
-                            throw new 
RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start);
+                            boolean success =
+                                    UTF8StringPointable.substr(string, 
actualStart, Integer.MAX_VALUE, builder, array);
+                            if (success) {
+                                
out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                                out.write(array.getByteArray(), 0, 
array.getLength());
+                                result.set(resultStorage);
+                            } else {
+                                PointableHelper.setNull(result);
+                            }
                         } catch (IOException e) {
                             throw HyracksDataException.create(e);
                         }
-                        try {
-                            out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
-                            out.write(array.getByteArray(), 0, 
array.getLength());
-                        } catch (IOException e) {
-                            throw HyracksDataException.create(e);
-                        }
-                        result.set(resultStorage);
                     }
                 };
             }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
index dd1bd3d..ab0f520 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
@@ -110,20 +110,17 @@
                         array.reset();
                         try {
                             int actualStart = start >= 0 ? start - baseOffset 
: string.getStringLength() + start;
-                            UTF8StringPointable.substr(string, actualStart, 
len, builder, array);
-                        } catch (StringIndexOutOfBoundsException e) {
-                            throw new 
RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start + len - 
1);
+                            boolean success = 
UTF8StringPointable.substr(string, actualStart, len, builder, array);
+                            if (success) {
+                                
out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+                                out.write(array.getByteArray(), 0, 
array.getLength());
+                                result.set(resultStorage);
+                            } else {
+                                PointableHelper.setNull(result);
+                            }
                         } catch (IOException e) {
                             throw HyracksDataException.create(e);
                         }
-
-                        try {
-                            out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
-                            out.write(array.getByteArray(), 0, 
array.getLength());
-                        } catch (IOException e) {
-                            throw HyracksDataException.create(e);
-                        }
-                        result.set(resultStorage);
                     }
                 };
             }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index fb0c814..f683615 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -328,21 +328,25 @@
         builder.finish();
     }
 
-    public void substr(int charOffset, int charLength, UTF8StringBuilder 
builder, GrowableArray out)
+    /**
+     * @return {@code true} if substring was successfully written into given 
{@code out}, or
+     *         {@code false} if substring could not be obtained ({@code 
charOffset} or {@code charLength}
+     *         are less than 0 or starting position is greater than the input 
length)
+     */
+    public boolean substr(int charOffset, int charLength, UTF8StringBuilder 
builder, GrowableArray out)
             throws IOException {
-        substr(this, charOffset, charLength, builder, out);
+        return substr(this, charOffset, charLength, builder, out);
     }
 
-    public static void substr(UTF8StringPointable src, int charOffset, int 
charLength, UTF8StringBuilder builder,
+    /**
+     * @return {@code true} if substring was successfully written into given 
{@code out}, or
+     *         {@code false} if substring could not be obtained ({@code 
charOffset} or {@code charLength}
+     *         are less than 0 or starting position is greater than the input 
length)
+     */
+    public static boolean substr(UTF8StringPointable src, int charOffset, int 
charLength, UTF8StringBuilder builder,
             GrowableArray out) throws IOException {
-        // Really don't understand why we need to support the charOffset < 0 
case.
-        // At this time, usually there is mistake on user side, we'd better 
give him a warning.
-        // assert charOffset >= 0;
-        if (charOffset < 0) {
-            charOffset = 0;
-        }
-        if (charLength < 0) {
-            charLength = 0;
+        if (charOffset < 0 || charLength < 0) {
+            return false;
         }
 
         int utfLen = src.getUTF8Length();
@@ -353,11 +357,7 @@
             chIdx++;
         }
         if (byteIdx >= utfLen) {
-            // Again, why do we tolerant this kind of mistakes?
-            // throw new StringIndexOutOfBoundsException(charOffset);
-            builder.reset(out, 0);
-            builder.finish();
-            return;
+            return false;
         }
 
         builder.reset(out, Math.min(utfLen - byteIdx, (int) (charLength * 1.0 
* byteIdx / chIdx)));
@@ -368,6 +368,7 @@
             byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
         }
         builder.finish();
+        return true;
     }
 
     public void substrBefore(UTF8StringPointable match, UTF8StringBuilder 
builder, GrowableArray out)

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/2914
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia43a4266a2406ebba65809d527de896ad11fdffa
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: stabilization-f69489
Gerrit-Owner: Michael Blow <mb...@apache.org>

Reply via email to