[ASTERIXDB-2449][FUN] Incorrect NULL/MISSING handling by concat functions - user model changes: no - storage format changes: no - interface changes: no
Details: - Infer function return type as unknownable if its input list item can be NULL/MISSING - Always return MISSING if there is a MISSING argument Change-Id: Idc364b061f3e74bdc9d7715bbadedc957e9e8223 Reviewed-on: https://asterix-gerrit.ics.uci.edu/2946 Sonar-Qube: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Till Westmann <ti...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/bcc93471 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/bcc93471 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/bcc93471 Branch: refs/heads/master Commit: bcc93471dfcf6cb2947376fec7336684efd91a8b Parents: 059ab96 Author: Dmitry Lychagin <dmitry.lycha...@couchbase.com> Authored: Thu Aug 30 19:30:39 2018 -0700 Committer: Dmitry Lychagin <dmitry.lycha...@couchbase.com> Committed: Fri Aug 31 10:15:40 2018 -0700 ---------------------------------------------------------------------- .../binary/concat2/concat2.1.query.sqlpp | 25 ++++++++ .../string-concat2/string-concat2.1.query.sqlpp | 25 ++++++++ .../string-concat2/string-concat2.2.query.sqlpp | 25 ++++++++ .../results/binary/concat2/concat2.1.adm | 1 + .../string/string-concat2/string-concat2.1.adm | 1 + .../string/string-concat2/string-concat2.2.adm | 1 + .../resources/runtimets/testsuite_sqlpp.xml | 10 +++ .../asterix/om/functions/BuiltinFunctions.java | 5 +- .../typecomputer/impl/ConcatTypeComputer.java | 64 ++++++++++++++++++++ .../functions/StringConcatDescriptor.java | 11 +++- .../binary/BinaryConcatDescriptor.java | 27 ++++----- 11 files changed, 175 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp new file mode 100644 index 0000000..782cce2 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +{ + 't1': binary_concat([null,hex('aa')]), + 't2': binary_concat([hex('aa'),null]), + 't3': binary_concat([null,missing,hex('aa')]), + 't4': binary_concat([hex('aa'),null,missing]) +}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp new file mode 100644 index 0000000..3e48631 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +{ + 't1': string_concat([null,'aa']), + 't2': string_concat(['aa',null]), + 't3': string_concat([null,missing,'aa']), + 't4': string_concat(['aa',null,missing]) +}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp new file mode 100644 index 0000000..12208a7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +{ + 't1': null || 'aa', + 't2': 'aa' || null, + 't3': null || missing || 'aa', + 't4': 'aa' || null || missing +}; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm new file mode 100644 index 0000000..770f2cd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm @@ -0,0 +1 @@ +{ "t1": null, "t2": null } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm new file mode 100644 index 0000000..770f2cd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm @@ -0,0 +1 @@ +{ "t1": null, "t2": null } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm new file mode 100644 index 0000000..770f2cd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm @@ -0,0 +1 @@ +{ "t1": null, "t2": null } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 904dc61..04b484e 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -6619,6 +6619,11 @@ </compilation-unit> </test-case> <test-case FilePath="string"> + <compilation-unit name="string-concat2"> + <output-dir compare="Text">string-concat2</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="string"> <compilation-unit name="string-equal1"> <output-dir compare="Text">string-equal1</output-dir> </compilation-unit> @@ -9837,6 +9842,11 @@ </compilation-unit> </test-case> <test-case FilePath="binary"> + <compilation-unit name="concat2"> + <output-dir compare="Text">concat2</output-dir> + </compilation-unit> + </test-case> + <test-case FilePath="binary"> <compilation-unit name="subbinary"> <output-dir compare="Text">subbinary</output-dir> </compilation-unit> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java index d8eeefc..49db062 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java @@ -68,6 +68,7 @@ import org.apache.asterix.om.typecomputer.impl.ClosedRecordConstructorResultType import org.apache.asterix.om.typecomputer.impl.CollectionMemberResultType; import org.apache.asterix.om.typecomputer.impl.CollectionToSequenceTypeComputer; import org.apache.asterix.om.typecomputer.impl.ConcatNonNullTypeComputer; +import org.apache.asterix.om.typecomputer.impl.ConcatTypeComputer; import org.apache.asterix.om.typecomputer.impl.DoubleIfTypeComputer; import org.apache.asterix.om.typecomputer.impl.FieldAccessByIndexResultType; import org.apache.asterix.om.typecomputer.impl.FieldAccessByNameResultType; @@ -1240,7 +1241,7 @@ public class BuiltinFunctions { addFunction(BINARY_LENGTH, UnaryBinaryInt64TypeComputer.INSTANCE, true); addFunction(PARSE_BINARY, ABinaryTypeComputer.INSTANCE, true); addFunction(PRINT_BINARY, AStringTypeComputer.INSTANCE, true); - addFunction(BINARY_CONCAT, ABinaryTypeComputer.INSTANCE, true); + addFunction(BINARY_CONCAT, ConcatTypeComputer.INSTANCE_BINARY, true); addFunction(SUBBINARY_FROM, ABinaryTypeComputer.INSTANCE, true); addFunction(SUBBINARY_FROM_TO, ABinaryTypeComputer.INSTANCE, true); addFunction(FIND_BINARY, AInt64TypeComputer.INSTANCE, true); @@ -1251,7 +1252,7 @@ public class BuiltinFunctions { addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true); addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true); addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true); - addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true); + addFunction(STRING_CONCAT, ConcatTypeComputer.INSTANCE_STRING, true); addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE_NULLABLE, true); addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true); addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java new file mode 100644 index 0000000..db59877 --- /dev/null +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.asterix.om.typecomputer.impl; + +import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer; +import org.apache.asterix.om.types.AUnionType; +import org.apache.asterix.om.types.AbstractCollectionType; +import org.apache.asterix.om.types.BuiltinType; +import org.apache.asterix.om.types.IAType; +import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; +import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression; + +public class ConcatTypeComputer extends AbstractResultTypeComputer { + + public static final ConcatTypeComputer INSTANCE_STRING = new ConcatTypeComputer(BuiltinType.ASTRING); + + public static final ConcatTypeComputer INSTANCE_BINARY = new ConcatTypeComputer(BuiltinType.ABINARY); + + private final IAType resultType; + + private ConcatTypeComputer(IAType resultType) { + this.resultType = resultType; + } + + @Override + protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException { + IAType argType = strippedInputTypes[0]; + IAType outputType = resultType; + if (!argType.getTypeTag().isListType() || isUnknownable(((AbstractCollectionType) argType).getItemType())) { + outputType = AUnionType.createUnknownableType(outputType); + } + return outputType; + } + + private boolean isUnknownable(IAType type) { + switch (type.getTypeTag()) { + case ANY: + case MISSING: + case NULL: + return true; + case UNION: + return ((AUnionType) type).isUnknownableType(); + default: + return false; + } + } +} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java index cb213ae..a2872bf 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java @@ -94,6 +94,7 @@ public class StringConcatDescriptor extends AbstractScalarFunctionDynamicDescrip listAccessor.reset(listBytes, listOffset); // calculate length first int utf8Len = 0; + boolean itemIsNull = false; for (int i = 0; i < listAccessor.size(); i++) { int itemOffset = listAccessor.getItemOffset(i); ATypeTag itemType = listAccessor.getItemType(itemOffset); @@ -104,9 +105,8 @@ public class StringConcatDescriptor extends AbstractScalarFunctionDynamicDescrip } if (itemType != ATypeTag.STRING) { if (itemType == ATypeTag.NULL) { - nullSerde.serialize(ANull.NULL, out); - result.set(resultStorage); - return; + itemIsNull = true; + continue; } if (itemType == ATypeTag.MISSING) { missingSerde.serialize(AMissing.MISSING, out); @@ -118,6 +118,11 @@ public class StringConcatDescriptor extends AbstractScalarFunctionDynamicDescrip } utf8Len += UTF8StringUtil.getUTFLength(listBytes, itemOffset); } + if (itemIsNull) { + nullSerde.serialize(ANull.NULL, out); + result.set(resultStorage); + return; + } out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG); int cbytes = UTF8StringUtil.encodeUTF8Length(utf8Len, tempLengthArray, 0); out.write(tempLengthArray, 0, cbytes); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/bcc93471/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java index 59f3396..4eaa1d1 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java @@ -94,11 +94,17 @@ public class BinaryConcatDescriptor extends AbstractScalarFunctionDynamicDescrip try { listAccessor.reset(data, offset); int concatLength = 0; + boolean itemIsNull = false; for (int i = 0; i < listAccessor.size(); i++) { int itemOffset = listAccessor.getItemOffset(i); ATypeTag itemType = listAccessor.getItemType(itemOffset); if (itemType != ATypeTag.BINARY) { - if (serializeUnknownIfAnyUnknown(itemType)) { + if (itemType == ATypeTag.NULL) { + itemIsNull = true; + continue; + } + if (itemType == ATypeTag.MISSING) { + missingSerde.serialize(AMissing.MISSING, dataOutput); result.set(resultStorage); return; } @@ -107,6 +113,11 @@ public class BinaryConcatDescriptor extends AbstractScalarFunctionDynamicDescrip } concatLength += ByteArrayPointable.getContentLength(data, itemOffset); } + if (itemIsNull) { + nullSerde.serialize(ANull.NULL, dataOutput); + result.set(resultStorage); + return; + } dataOutput.writeByte(ATypeTag.SERIALIZED_BINARY_TYPE_TAG); int metaLen = VarLenIntEncoderDecoder.encode(concatLength, metaBuffer, 0); dataOutput.write(metaBuffer, 0, metaLen); @@ -122,20 +133,6 @@ public class BinaryConcatDescriptor extends AbstractScalarFunctionDynamicDescrip } result.set(resultStorage); } - - private boolean serializeUnknownIfAnyUnknown(ATypeTag... tags) throws HyracksDataException { - for (ATypeTag typeTag : tags) { - if (typeTag == ATypeTag.NULL) { - nullSerde.serialize(ANull.NULL, dataOutput); - return true; - } - if (typeTag == ATypeTag.MISSING) { - missingSerde.serialize(AMissing.MISSING, dataOutput); - return true; - } - } - return false; - } }; } };