[ASTERIXDB-2449][FUN] Incorrect NULL/MISSING handling by concat functions

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Infer function return type as unknownable if
  its input list item can be NULL/MISSING
- Always return MISSING if there is a MISSING argument

Change-Id: Idc364b061f3e74bdc9d7715bbadedc957e9e8223
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2946
Sonar-Qube: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <ti...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/bcc93471
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/bcc93471
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/bcc93471

Branch: refs/heads/master
Commit: bcc93471dfcf6cb2947376fec7336684efd91a8b
Parents: 059ab96
Author: Dmitry Lychagin <dmitry.lycha...@couchbase.com>
Authored: Thu Aug 30 19:30:39 2018 -0700
Committer: Dmitry Lychagin <dmitry.lycha...@couchbase.com>
Committed: Fri Aug 31 10:15:40 2018 -0700

----------------------------------------------------------------------
 .../binary/concat2/concat2.1.query.sqlpp        | 25 ++++++++
 .../string-concat2/string-concat2.1.query.sqlpp | 25 ++++++++
 .../string-concat2/string-concat2.2.query.sqlpp | 25 ++++++++
 .../results/binary/concat2/concat2.1.adm        |  1 +
 .../string/string-concat2/string-concat2.1.adm  |  1 +
 .../string/string-concat2/string-concat2.2.adm  |  1 +
 .../resources/runtimets/testsuite_sqlpp.xml     | 10 +++
 .../asterix/om/functions/BuiltinFunctions.java  |  5 +-
 .../typecomputer/impl/ConcatTypeComputer.java   | 64 ++++++++++++++++++++
 .../functions/StringConcatDescriptor.java       | 11 +++-
 .../binary/BinaryConcatDescriptor.java          | 27 ++++-----
 11 files changed, 175 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
new file mode 100644
index 0000000..782cce2
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+  't1': binary_concat([null,hex('aa')]),
+  't2': binary_concat([hex('aa'),null]),
+  't3': binary_concat([null,missing,hex('aa')]),
+  't4': binary_concat([hex('aa'),null,missing])
+};

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
new file mode 100644
index 0000000..3e48631
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+  't1': string_concat([null,'aa']),
+  't2': string_concat(['aa',null]),
+  't3': string_concat([null,missing,'aa']),
+  't4': string_concat(['aa',null,missing])
+};

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
new file mode 100644
index 0000000..12208a7
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+  't1': null || 'aa',
+  't2': 'aa' || null,
+  't3': null || missing || 'aa',
+  't4': 'aa' || null || missing
+};

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 904dc61..04b484e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6619,6 +6619,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="string-concat2">
+        <output-dir compare="Text">string-concat2</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="string-equal1">
         <output-dir compare="Text">string-equal1</output-dir>
       </compilation-unit>
@@ -9837,6 +9842,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="binary">
+      <compilation-unit name="concat2">
+        <output-dir compare="Text">concat2</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="binary">
       <compilation-unit name="subbinary">
         <output-dir compare="Text">subbinary</output-dir>
       </compilation-unit>

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index d8eeefc..49db062 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -68,6 +68,7 @@ import 
org.apache.asterix.om.typecomputer.impl.ClosedRecordConstructorResultType
 import org.apache.asterix.om.typecomputer.impl.CollectionMemberResultType;
 import 
org.apache.asterix.om.typecomputer.impl.CollectionToSequenceTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.ConcatNonNullTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.ConcatTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.DoubleIfTypeComputer;
 import org.apache.asterix.om.typecomputer.impl.FieldAccessByIndexResultType;
 import org.apache.asterix.om.typecomputer.impl.FieldAccessByNameResultType;
@@ -1240,7 +1241,7 @@ public class BuiltinFunctions {
         addFunction(BINARY_LENGTH, UnaryBinaryInt64TypeComputer.INSTANCE, 
true);
         addFunction(PARSE_BINARY, ABinaryTypeComputer.INSTANCE, true);
         addFunction(PRINT_BINARY, AStringTypeComputer.INSTANCE, true);
-        addFunction(BINARY_CONCAT, ABinaryTypeComputer.INSTANCE, true);
+        addFunction(BINARY_CONCAT, ConcatTypeComputer.INSTANCE_BINARY, true);
         addFunction(SUBBINARY_FROM, ABinaryTypeComputer.INSTANCE, true);
         addFunction(SUBBINARY_FROM_TO, ABinaryTypeComputer.INSTANCE, true);
         addFunction(FIND_BINARY, AInt64TypeComputer.INSTANCE, true);
@@ -1251,7 +1252,7 @@ public class BuiltinFunctions {
         addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true);
         addFunction(STRING_TO_CODEPOINT, 
StringToInt64ListTypeComputer.INSTANCE, true);
         addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
-        addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
+        addFunction(STRING_CONCAT, ConcatTypeComputer.INSTANCE_STRING, true);
         addFunction(SUBSTRING2, 
StringIntToStringTypeComputer.INSTANCE_NULLABLE, true);
         addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, 
true);
         addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
new file mode 100644
index 0000000..db59877
--- /dev/null
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.AbstractCollectionType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class ConcatTypeComputer extends AbstractResultTypeComputer {
+
+    public static final ConcatTypeComputer INSTANCE_STRING = new 
ConcatTypeComputer(BuiltinType.ASTRING);
+
+    public static final ConcatTypeComputer INSTANCE_BINARY = new 
ConcatTypeComputer(BuiltinType.ABINARY);
+
+    private final IAType resultType;
+
+    private ConcatTypeComputer(IAType resultType) {
+        this.resultType = resultType;
+    }
+
+    @Override
+    protected IAType getResultType(ILogicalExpression expr, IAType... 
strippedInputTypes) throws AlgebricksException {
+        IAType argType = strippedInputTypes[0];
+        IAType outputType = resultType;
+        if (!argType.getTypeTag().isListType() || 
isUnknownable(((AbstractCollectionType) argType).getItemType())) {
+            outputType = AUnionType.createUnknownableType(outputType);
+        }
+        return outputType;
+    }
+
+    private boolean isUnknownable(IAType type) {
+        switch (type.getTypeTag()) {
+            case ANY:
+            case MISSING:
+            case NULL:
+                return true;
+            case UNION:
+                return ((AUnionType) type).isUnknownableType();
+            default:
+                return false;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
index cb213ae..a2872bf 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
@@ -94,6 +94,7 @@ public class StringConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                             listAccessor.reset(listBytes, listOffset);
                             // calculate length first
                             int utf8Len = 0;
+                            boolean itemIsNull = false;
                             for (int i = 0; i < listAccessor.size(); i++) {
                                 int itemOffset = listAccessor.getItemOffset(i);
                                 ATypeTag itemType = 
listAccessor.getItemType(itemOffset);
@@ -104,9 +105,8 @@ public class StringConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                                 }
                                 if (itemType != ATypeTag.STRING) {
                                     if (itemType == ATypeTag.NULL) {
-                                        nullSerde.serialize(ANull.NULL, out);
-                                        result.set(resultStorage);
-                                        return;
+                                        itemIsNull = true;
+                                        continue;
                                     }
                                     if (itemType == ATypeTag.MISSING) {
                                         
missingSerde.serialize(AMissing.MISSING, out);
@@ -118,6 +118,11 @@ public class StringConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                                 }
                                 utf8Len += 
UTF8StringUtil.getUTFLength(listBytes, itemOffset);
                             }
+                            if (itemIsNull) {
+                                nullSerde.serialize(ANull.NULL, out);
+                                result.set(resultStorage);
+                                return;
+                            }
                             out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
                             int cbytes = 
UTF8StringUtil.encodeUTF8Length(utf8Len, tempLengthArray, 0);
                             out.write(tempLengthArray, 0, cbytes);

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
----------------------------------------------------------------------
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
index 59f3396..4eaa1d1 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
@@ -94,11 +94,17 @@ public class BinaryConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                         try {
                             listAccessor.reset(data, offset);
                             int concatLength = 0;
+                            boolean itemIsNull = false;
                             for (int i = 0; i < listAccessor.size(); i++) {
                                 int itemOffset = listAccessor.getItemOffset(i);
                                 ATypeTag itemType = 
listAccessor.getItemType(itemOffset);
                                 if (itemType != ATypeTag.BINARY) {
-                                    if 
(serializeUnknownIfAnyUnknown(itemType)) {
+                                    if (itemType == ATypeTag.NULL) {
+                                        itemIsNull = true;
+                                        continue;
+                                    }
+                                    if (itemType == ATypeTag.MISSING) {
+                                        
missingSerde.serialize(AMissing.MISSING, dataOutput);
                                         result.set(resultStorage);
                                         return;
                                     }
@@ -107,6 +113,11 @@ public class BinaryConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                                 }
                                 concatLength += 
ByteArrayPointable.getContentLength(data, itemOffset);
                             }
+                            if (itemIsNull) {
+                                nullSerde.serialize(ANull.NULL, dataOutput);
+                                result.set(resultStorage);
+                                return;
+                            }
                             
dataOutput.writeByte(ATypeTag.SERIALIZED_BINARY_TYPE_TAG);
                             int metaLen = 
VarLenIntEncoderDecoder.encode(concatLength, metaBuffer, 0);
                             dataOutput.write(metaBuffer, 0, metaLen);
@@ -122,20 +133,6 @@ public class BinaryConcatDescriptor extends 
AbstractScalarFunctionDynamicDescrip
                         }
                         result.set(resultStorage);
                     }
-
-                    private boolean serializeUnknownIfAnyUnknown(ATypeTag... 
tags) throws HyracksDataException {
-                        for (ATypeTag typeTag : tags) {
-                            if (typeTag == ATypeTag.NULL) {
-                                nullSerde.serialize(ANull.NULL, dataOutput);
-                                return true;
-                            }
-                            if (typeTag == ATypeTag.MISSING) {
-                                missingSerde.serialize(AMissing.MISSING, 
dataOutput);
-                                return true;
-                            }
-                        }
-                        return false;
-                    }
                 };
             }
         };

Reply via email to