Ali Alsuliman has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/3259
Change subject: [NO ISSUE][COMP] Change logical comparators handling of null/missing ...................................................................... [NO ISSUE][COMP] Change logical comparators handling of null/missing - user model changes: no - storage format changes: no - interface changes: no Details: Change the result of comparing against null and missing. For arrays, comparing against null or missing item will result in incomparable. For records, comparing against a null field will result in incomparable. Change-Id: Id93bea76e13658768e08a98fd373c71a901ceec5 --- M asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.007.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.014.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.021.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.022.adm M asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/records/records.005.adm M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/CompareHashUtil.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/AbstractAGenericBinaryComparator.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalComplexBinaryComparator.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalScalarBinaryComparator.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java M asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/container/ObjectFactories.java 11 files changed, 67 insertions(+), 125 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/59/3259/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.007.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.007.adm index d386253..e1b8022 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.007.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.007.adm @@ -1 +1 @@ -{ "t1": { "c": "[[1.0,4], [5,9,11,14]] = [[1.0,4], [5,9,11,14]]", "r": true }, "t2": { "c": "[[5,2,7], ['green','black'], [date('2013-01-01')]] = [[5,2,7], ['green','black'], [date('2013-01-01')]]", "r": true }, "t3": { "c": "[['white','yellow','brown'], 6] != [['white','yellow','brown'], double('6')]", "r": false }, "t4": { "c": "[['white','yellow','brown'], 6] != [double('6'), ['white','yellow','brown']]", "r": null }, "t5": { "c": "[ [[1,2,3], 'gold', ['sql++', 5]], [tinyint('4'), tinyint('5')], smallint('2')] > [ [[1,2,3], 'gold', ['sql++', 5]], [bigint('4'), int('5')], double('0.2')]", "r": true }, "t6": { "c": "[[[1,2], 99], 77] <= [[['flute',2], 99], 77]", "r": null }, "t7": { "c": "[[[1,2], 99], 77] <= [[[missing,2], 99], 77]" }, "t8": { "c": "[5, [8,1], [[0, 4], 'b']] > [5, [8,1], [[0, 4], 'a', 'c']]", "r": true }, "t9": { "c": "[[1, null], 9] = [[1, 2], 9]", "r": null }, "t10": { "c": "[[1, null], 9] = [[1, 2], 99]", "r": false }, "t11": { "c": "[[1, null], 9] < [[1, 2], 9 ]", "r": null }, "t12": { "c": "[[1, null], 9] < [[1, 2], 99]", "r": null }, "t13": { "c": "[[1, null], 9] > [[1, 2], 9]", "r": null }, "t14": { "c": "[[1, null], 9] > [[1, 2], 99]", "r": null }, "t15": { "c": "[1,2] = {{1,2}}", "r": null }, "t16": { "c": "{'id':99, 'name':'sam'} != [99, 'sam']", "r": null }, "t17": { "c": "[[1, 'string'], 9] = [[1, 2], 9]", "r": null }, "t18": { "c": "[[1, 'string'], 9] = [[1, 2], 99]", "r": null }, "t19": { "c": "[[1, 'string'], 9] < [[1, 2], 9]", "r": null }, "t20": { "c": "[[1, 'string'], 9] < [[1, 2], 99]", "r": null }, "t21": { "c": "[[1, 'string'], 9] > [[1, 2], 9]", "r": null }, "t22": { "c": "[[1, 'string'], 9] > [[1, 2], 99]", "r": null } } \ No newline at end of file +{ "t1": { "c": "[[1.0,4], [5,9,11,14]] = [[1.0,4], [5,9,11,14]]", "r": true }, "t2": { "c": "[[5,2,7], ['green','black'], [date('2013-01-01')]] = [[5,2,7], ['green','black'], [date('2013-01-01')]]", "r": true }, "t3": { "c": "[['white','yellow','brown'], 6] != [['white','yellow','brown'], double('6')]", "r": false }, "t4": { "c": "[['white','yellow','brown'], 6] != [double('6'), ['white','yellow','brown']]", "r": null }, "t5": { "c": "[ [[1,2,3], 'gold', ['sql++', 5]], [tinyint('4'), tinyint('5')], smallint('2')] > [ [[1,2,3], 'gold', ['sql++', 5]], [bigint('4'), int('5')], double('0.2')]", "r": true }, "t6": { "c": "[[[1,2], 99], 77] <= [[['flute',2], 99], 77]", "r": null }, "t7": { "c": "[[[1,2], 99], 77] <= [[[missing,2], 99], 77]", "r": null }, "t8": { "c": "[5, [8,1], [[0, 4], 'b']] > [5, [8,1], [[0, 4], 'a', 'c']]", "r": true }, "t9": { "c": "[[1, null], 9] = [[1, 2], 9]", "r": null }, "t10": { "c": "[[1, null], 9] = [[1, 2], 99]", "r": null }, "t11": { "c": "[[1, null], 9] < [[1, 2], 9]", "r": null }, "t12": { "c": "[[1, null], 9] < [[1, 2], 99]", "r": null }, "t13": { "c": "[[1, null], 9] > [[1, 2], 9]", "r": null }, "t14": { "c": "[[1, null], 9] > [[1, 2], 99]", "r": null }, "t15": { "c": "[1,2] = {{1,2}}", "r": null }, "t16": { "c": "{'id':99, 'name':'sam'} != [99, 'sam']", "r": null }, "t17": { "c": "[[1, 'string'], 9] = [[1, 2], 9]", "r": null }, "t18": { "c": "[[1, 'string'], 9] = [[1, 2], 99]", "r": null }, "t19": { "c": "[[1, 'string'], 9] < [[1, 2], 9]", "r": null }, "t20": { "c": "[[1, 'string'], 9] < [[1, 2], 99]", "r": null }, "t21": { "c": "[[1, 'string'], 9] > [[1, 2], 9]", "r": null }, "t22": { "c": "[[1, 'string'], 9] > [[1, 2], 99]", "r": null } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.014.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.014.adm index 6cfa561..a98f8bc 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.014.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.014.adm @@ -1,2 +1 @@ -{ "id": 3, "array1": [ 2, 1 ], "OP": "< is_missing", "array2": [ null, 3, 3 ] } { "id": 7, "array1": [ 2, 1 ], "OP": "< is_missing" } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.021.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.021.adm index 55b9648..e628210 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.021.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.021.adm @@ -1 +1 @@ -{ "t1": { "c": "[9,2] = null", "r": null }, "t2": { "c": "[9,2] = missing" }, "t3": { "c": "[9,2] > null", "r": null }, "t4": { "c": "[9,2] > missing" }, "t5": { "c": "['red', null] < ['red', null]", "r": null }, "t6": { "c": "[missing,2] < [null,3]" }, "t7": { "c": "[1,2] < [1,2,missing]", "r": true }, "t8": { "c": "[1,2] < [1,2,null]", "r": true }, "t9": { "c": "[null,5] >= [null,5]", "r": null }, "t10": { "c": "[null,8] < [4, 9]", "r": null }, "t11": { "c": "[1,2,missing] != [1,2,missing]" }, "t12": { "c": "[null,1] = [1,1,3]", "r": false }, "t13": { "c": "[null,1] != [1,1,3]", "r": true }, "t14": { "c": "[null,1] > [1,1,3]", "r": null }, "t15": { "c": "[null, null, null] = [null, null, null]", "r": null }, "t16": { "c": "[missing, missing] = [missing, missing]" }, "t17": { "c": "[99, null, 3] = [1, 2, 3]", "r": false }, "t18": { "c": "[1, null, 3] = [1, 2, 3]", "r": null }, "t19": { "c": "[1, missing, 3] = [1, 2, 3]" }, "t20": { "c": "[1, null, missing, 4] = [1, 2, 3, 4]" }, "t2 1": { "c": "[1, null, missing, null, 5] = [1, 2, 3, 4, 5]" }, "t22": { "c": "[1, missing, null, missing, 5] = [1, 2, 3, 4, 5]" }, "t23": { "c": "[1, null, 3] = [1, 2, 99]", "r": false }, "t24": { "c": "[1, missing, 3] = [1, 2, 99]", "r": false }, "t25": { "c": "[1, null, missing, 4] = [1, 2, 3, 99]", "r": false }, "t26": { "c": "[1, null, missing, null, 5] = [1, 2, 3, 4, 99]", "r": false }, "t27": { "c": "[1, missing, null, missing, 5] = [1, 2, 3, 4, 99]", "r": false }, "t28": { "c": "[1, null, 3] != [1, 2, 3]", "r": null }, "t29": { "c": "[1, missing, 3] != [1, 2, 3]" }, "t30": { "c": "[1, null, missing, 4] != [1, 2, 3, 4]" }, "t31": { "c": "[1, null, 3] != [1, 2, 99]", "r": true }, "t32": { "c": "[1, missing, 3] != [1, 2, 99]", "r": true }, "t33": { "c": "[1, null, missing, 4] != [1, 2, 3, 99]", "r": true }, "t34": { "c": "[1, null, 3] < [1, 2, 3]", "r": null }, "t35": { "c": "[1, missing, 3] < [1, 2, 3]" }, "t36": { "c": "[1, null, missing, 4] < [1, 2, 3, 4]" }, "t37": { "c": "[1 , missing, null, 4] < [1, 2, 3, 4]" }, "t38": { "c": "[1, null, 3] < [1, 2, 99]", "r": null }, "t39": { "c": "[1, missing, 3] < [1, 2, 99]" }, "t40": { "c": "[1, null, 99] < [1, 2, 3]", "r": null }, "t41": { "c": "[1, missing, 99] < [1, 2, 3]" }, "t42": { "c": "[99, null, 3] < [1, 2, 3]", "r": false }, "t43": { "c": "[-99, null, 3] < [1, 2, 3]", "r": true }, "t44": { "c": "[99, null, 3] >= [1, 2, 3]", "r": true }, "t45": { "c": "[-99, null, 3] >= [1, 2, 3]", "r": false } } \ No newline at end of file +{ "t1": { "c": "[9,2] = null", "r": null }, "t2": { "c": "[9,2] = missing" }, "t3": { "c": "[9,2] > null", "r": null }, "t4": { "c": "[9,2] > missing" }, "t5": { "c": "['red', null] < ['red', null]", "r": null }, "t6": { "c": "[missing,2] < [null,3]", "r": null }, "t7": { "c": "[1,2] < [1,2,missing]", "r": true }, "t8": { "c": "[1,2] < [1,2,null]", "r": true }, "t9": { "c": "[null,5] >= [null,5]", "r": null }, "t10": { "c": "[null,8] < [4, 9]", "r": null }, "t11": { "c": "[1,2,missing] != [1,2,missing]", "r": null }, "t12": { "c": "[null,1] = [1,1,3]", "r": null }, "t13": { "c": "[null,1] != [1,1,3]", "r": null }, "t14": { "c": "[null,1] > [1,1,3]", "r": null }, "t15": { "c": "[null, null, null] = [null, null, null]", "r": null }, "t16": { "c": "[missing, missing] = [missing, missing]", "r": null }, "t17": { "c": "[99, null, 3] = [1, 2, 3]", "r": null }, "t18": { "c": "[1, null, 3] = [1, 2, 3]", "r": null }, "t19": { "c": "[1, missing, 3] = [1, 2, 3]", "r": null }, "t20": { "c": "[1 , null, missing, 4] = [1, 2, 3, 4]", "r": null }, "t21": { "c": "[1, null, missing, null, 5] = [1, 2, 3, 4, 5]", "r": null }, "t22": { "c": "[1, missing, null, missing, 5] = [1, 2, 3, 4, 5]", "r": null }, "t23": { "c": "[1, null, 3] = [1, 2, 99]", "r": null }, "t24": { "c": "[1, missing, 3] = [1, 2, 99]", "r": null }, "t25": { "c": "[1, null, missing, 4] = [1, 2, 3, 99]", "r": null }, "t26": { "c": "[1, null, missing, null, 5] = [1, 2, 3, 4, 99]", "r": null }, "t27": { "c": "[1, missing, null, missing, 5] = [1, 2, 3, 4, 99]", "r": null }, "t28": { "c": "[1, null, 3] != [1, 2, 3]", "r": null }, "t29": { "c": "[1, missing, 3] != [1, 2, 3]", "r": null }, "t30": { "c": "[1, null, missing, 4] != [1, 2, 3, 4]", "r": null }, "t31": { "c": "[1, null, 3] != [1, 2, 99]", "r": null }, "t32": { "c": "[1, missing, 3] != [1, 2, 99]", "r": null }, "t33": { "c": "[1, null, missing, 4] != [1, 2, 3, 99]", "r": null }, "t34": { "c": "[1, null, 3] < [1, 2, 3]", "r": null }, "t35": { "c": "[1, missing, 3] < [1, 2, 3]", "r": null }, "t36": { "c": "[1, null, missing, 4] < [1, 2, 3, 4]", "r": null }, "t37": { "c": "[1, missing, null, 4] < [1, 2, 3, 4]", "r": null }, "t38": { "c": "[1, null, 3] < [1, 2, 99]", "r": null }, "t39": { "c": "[1, missing, 3] < [1, 2, 99]", "r": null }, "t40": { "c": "[1, null, 99] < [1, 2, 3]", "r": null }, "t41": { "c": "[1, missing, 99] < [1, 2, 3]", "r": null }, "t42": { "c": "[99, null, 3] < [1, 2, 3]", "r": null }, "t43": { "c": "[-99, null, 3] < [1, 2, 3]", "r": null }, "t44": { "c": "[99, null, 3] >= [1, 2, 3]", "r": null }, "t45": { "c": "[-99, null, 3] >= [1, 2, 3]", "r": null } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.022.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.022.adm index 9fc20b1..6ec62e3 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.022.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/arrays/arrays.022.adm @@ -1 +1 @@ -{ "t1": { "c": "[1, 'string'] != [2, 9]", "r": null }, "t2": { "c": "[1, 'string'] > [2, 9]", "r": null }, "t3": { "c": "[9, {'id': 2}] < [1, {'id': 3}]", "r": null }, "t4": { "c": "[1, 2] = ['string', 2, 3, 4]", "r": null }, "t5": { "c": "[null, 2, 3, 4, 5] = [1, 2]", "r": false }, "t6": { "c": "[1, null, 3] = [1, 2, 'string']", "r": null }, "t7": { "c": "[1, null] = [2, 5]", "r": false }, "t8": { "c": "[1, null, 3, 7] = [1, 2, 9, 5]", "r": false }, "t9": { "c": "[null, 'string'] < [1, 2]", "r": null }, "t10": { "c": "[missing, 'string'] < [1, 2]", "r": null }, "t12": { "c": "[null, {'id':3}] < [2, {'id': 4}]", "r": null }, "t13": { "c": "[null, {'id':3}, 8] < [2, {'id': 4}, 9]", "r": null }, "t14": { "c": "[88, [7, 1], [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t15": { "c": "[88, null, [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t16": { "c": "[88, missing, [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t17": { "c": "[null, 88, [['strin g', 44]]] > [3, 8, [[5, 4]]]", "r": null }, "t18": { "c": "[null, missing, 88, [['string', 44]]] > [3, 5, 8, [[5, 4]]]", "r": null }, "t19": { "c": "[88, [7, 1], [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": true }, "t20": { "c": "[88, null, [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": true }, "t21": { "c": "[88, missing, [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": true }, "t22": { "c": "[null, 88, [[-1, -44]]] > [3, 8, [[5, 4]]]", "r": null }, "t23": { "c": "[null, missing, 88, [[-1, -44]]] > [3, 5, 8, [[5, 4]]]" }, "t24": { "c": "[missing, null, 88, [[-1, -44]]] > [3, 5, 8, [[5, 4]]]" }, "t25": { "c": "[1, null, 9, missing] < [1, 2, 3, 4]", "r": null }, "t26": { "c": "[1, null, 3, missing] < [1, 2, 3, 4]" }, "t27": { "c": "[1, null, missing, 4] < [1, 2, 3, 4]" }, "t28": { "c": "[1, null, missing, 9] < [1, 2, 3, 4]" }, "t29": { "c": "[1, null, 9, missing] = [1, 2, 3, 4]", "r": false }, "t30": { "c": "[1, null, 3, missing] = [1, 2, 3, 4]" }, "t31": { "c": "[1, null, missing, 4] = [1, 2, 3, 4]" }, "t32": { "c": "[1, null, missing, 9] = [1, 2, 3, 4]", "r": false } } \ No newline at end of file +{ "t1": { "c": "[1, 'string'] != [2, 9]", "r": null }, "t2": { "c": "[1, 'string'] > [2, 9]", "r": null }, "t3": { "c": "[9, {'id': 2}] < [1, {'id': 3}]", "r": null }, "t4": { "c": "[1, 2] = ['string', 2, 3, 4]", "r": null }, "t5": { "c": "[null, 2, 3, 4, 5] = [1, 2]", "r": null }, "t6": { "c": "[1, null, 3] = [1, 2, 'string']", "r": null }, "t7": { "c": "[1, null] = [2, 5]", "r": null }, "t8": { "c": "[1, null, 3, 7] = [1, 2, 9, 5]", "r": null }, "t9": { "c": "[null, 'string'] < [1, 2]", "r": null }, "t10": { "c": "[missing, 'string'] < [1, 2]", "r": null }, "t12": { "c": "[null, {'id':3}] < [2, {'id': 4}]", "r": null }, "t13": { "c": "[null, {'id':3}, 8] < [2, {'id': 4}, 9]", "r": null }, "t14": { "c": "[88, [7, 1], [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t15": { "c": "[88, null, [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t16": { "c": "[88, missing, [['string', 44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t17": { "c": "[null, 88, [['string', 44]]] > [3, 8, [[5, 4]]]", "r": null }, "t18": { "c": "[null, missing, 88, [['string', 44]]] > [3, 5, 8, [[5, 4]]]", "r": null }, "t19": { "c": "[88, [7, 1], [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": true }, "t20": { "c": "[88, null, [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t21": { "c": "[88, missing, [[-1, -44]]] > [3, [-2, -3], [[5, 4]]]", "r": null }, "t22": { "c": "[null, 88, [[-1, -44]]] > [3, 8, [[5, 4]]]", "r": null }, "t23": { "c": "[null, missing, 88, [[-1, -44]]] > [3, 5, 8, [[5, 4]]]", "r": null }, "t24": { "c": "[missing, null, 88, [[-1, -44]]] > [3, 5, 8, [[5, 4]]]", "r": null }, "t25": { "c": "[1, null, 9, missing] < [1, 2, 3, 4]", "r": null }, "t26": { "c": "[1, null, 3, missing] < [1, 2, 3, 4]", "r": null }, "t27": { "c": "[1, null, missing, 4] < [1, 2, 3, 4]", "r": null }, "t28": { "c": "[1, null, missing, 9] < [1, 2, 3, 4]", "r": null }, "t29": { "c": "[1, null, 9, missing] = [1, 2, 3, 4]", "r": null }, "t30": { "c": "[1, null, 3, missing] = [1, 2, 3, 4]", "r": null }, "t31": { "c": "[1, null, missing, 4] = [1, 2, 3, 4]", "r": null }, "t32": { "c": "[1, null, missing, 9] = [1, 2, 3, 4]", "r": null } } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/records/records.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/records/records.005.adm index 4b7aac1..be11304 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/records/records.005.adm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/comparison/records/records.005.adm @@ -1 +1 @@ -{ "t1": { "c": "{'a': 2, 'b': null} = {'a': 2, 'b': 3}", "r": null }, "t2": { "c": "{'a': 2, 'b': missing} = {'a': 2, 'b': 3}", "r": false }, "t3": { "c": "{'list': [1, null], 'f': 3} = {'f': 3, 'list': [1, 2]}", "r": null }, "t4": { "c": "{'list': [1, missing], 'f': 3} = {'f': 3, 'list': [1, 2]}" }, "t5": { "c": "{'a': 4, 'b': null} = {'a': 2, 'b': 3}", "r": false } } \ No newline at end of file +{ "t1": { "c": "{'a': 2, 'b': null} = {'a': 2, 'b': 3}", "r": null }, "t2": { "c": "{'a': 2, 'b': missing} = {'a': 2, 'b': 3}", "r": false }, "t3": { "c": "{'list': [1, null], 'f': 3} = {'f': 3, 'list': [1, 2]}", "r": null }, "t4": { "c": "{'list': [1, missing], 'f': 3} = {'f': 3, 'list': [1, 2]}", "r": null }, "t5": { "c": "{'a': 4, 'b': null} = {'a': 2, 'b': 3}", "r": null } } \ No newline at end of file diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/CompareHashUtil.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/CompareHashUtil.java index 833dde1..a42e5aa 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/CompareHashUtil.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/CompareHashUtil.java @@ -79,14 +79,11 @@ throw new IllegalStateException(); } - public static IAType getType(ARecordType recordType, int fieldIdx, IVisitablePointable fieldValue) - throws HyracksDataException { + public static IAType getType(ARecordType recordType, int fieldIdx, ATypeTag fieldTag) throws HyracksDataException { IAType[] fieldTypes = recordType.getFieldTypes(); if (fieldIdx >= fieldTypes.length) { - byte tag = fieldValue.getByteArray()[fieldValue.getStartOffset()]; - ATypeTag fieldRuntimeTag = VALUE_TYPE_MAPPING[tag]; - return fieldRuntimeTag.isDerivedType() ? DefaultOpenFieldType.getDefaultOpenFieldType(fieldRuntimeTag) - : TypeTagUtil.getBuiltinTypeByTag(fieldRuntimeTag); + return fieldTag.isDerivedType() ? DefaultOpenFieldType.getDefaultOpenFieldType(fieldTag) + : TypeTagUtil.getBuiltinTypeByTag(fieldTag); } return fieldTypes[fieldIdx]; } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/AbstractAGenericBinaryComparator.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/AbstractAGenericBinaryComparator.java index aa6a168..da50c56 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/AbstractAGenericBinaryComparator.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/AbstractAGenericBinaryComparator.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.dataflow.data.nontagged.comparators; +import static org.apache.asterix.om.types.ATypeTag.VALUE_TYPE_MAPPING; + import java.io.IOException; import java.util.Comparator; import java.util.List; @@ -113,7 +115,7 @@ protected final IAType rightType; // a storage to promote a value private final ArrayBackedValueStorage castBuffer; - private final IObjectPool<IMutableValueStorage, ATypeTag> storageAllocator; + private final IObjectPool<IMutableValueStorage, Void> storageAllocator; private final IObjectPool<IPointable, Void> voidPointableAllocator; // used for record comparison, sorting field names private final PointableAllocator recordAllocator; @@ -416,6 +418,7 @@ int leftFieldIdx, rightFieldIdx; IAType leftFieldType, rightFieldType; IVisitablePointable leftFieldName, leftFieldValue, rightFieldName, rightFieldValue; + ATypeTag fieldTag; while (!leftNamesHeap.isEmpty() && !rightNamesHeap.isEmpty()) { leftFieldName = leftNamesHeap.poll(); rightFieldName = rightNamesHeap.poll(); @@ -431,9 +434,10 @@ rightFieldIdx = CompareHashUtil.getIndex(rightFieldsNames, rightFieldName); leftFieldValue = leftFieldsValues.get(leftFieldIdx); rightFieldValue = rightFieldsValues.get(rightFieldIdx); - leftFieldType = CompareHashUtil.getType(leftRecordType, leftFieldIdx, leftFieldValue); - rightFieldType = CompareHashUtil.getType(rightRecordType, rightFieldIdx, rightFieldValue); - + fieldTag = VALUE_TYPE_MAPPING[leftFieldValue.getByteArray()[leftFieldValue.getStartOffset()]]; + leftFieldType = CompareHashUtil.getType(leftRecordType, leftFieldIdx, fieldTag); + fieldTag = VALUE_TYPE_MAPPING[rightFieldValue.getByteArray()[rightFieldValue.getStartOffset()]]; + rightFieldType = CompareHashUtil.getType(rightRecordType, rightFieldIdx, fieldTag); result = compare(leftFieldType, leftFieldValue.getByteArray(), leftFieldValue.getStartOffset(), leftFieldValue.getLength(), rightFieldType, rightFieldValue.getByteArray(), rightFieldValue.getStartOffset(), rightFieldValue.getLength()); diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalComplexBinaryComparator.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalComplexBinaryComparator.java index 87d21da..a5924b9 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalComplexBinaryComparator.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalComplexBinaryComparator.java @@ -20,19 +20,21 @@ import static org.apache.asterix.om.types.ATypeTag.SERIALIZED_MISSING_TYPE_TAG; import static org.apache.asterix.om.types.ATypeTag.VALUE_TYPE_MAPPING; +import static org.apache.asterix.om.util.container.ObjectFactories.BIT_SET_FACTORY; +import static org.apache.asterix.om.util.container.ObjectFactories.STORAGE_FACTORY; +import static org.apache.asterix.om.util.container.ObjectFactories.VOID_FACTORY; import java.io.IOException; import java.util.BitSet; import java.util.List; -import org.apache.asterix.builders.AbvsBuilderFactory; import org.apache.asterix.dataflow.data.common.ILogicalBinaryComparator; import org.apache.asterix.dataflow.data.common.ListAccessorUtil; +import org.apache.asterix.dataflow.data.nontagged.CompareHashUtil; import org.apache.asterix.formats.nontagged.BinaryComparatorFactoryProvider; import org.apache.asterix.om.base.IAObject; import org.apache.asterix.om.pointables.ARecordVisitablePointable; import org.apache.asterix.om.pointables.PointableAllocator; -import org.apache.asterix.om.pointables.base.DefaultOpenFieldType; import org.apache.asterix.om.pointables.base.IVisitablePointable; import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils; import org.apache.asterix.om.types.ARecordType; @@ -40,7 +42,6 @@ import org.apache.asterix.om.types.AbstractCollectionType; import org.apache.asterix.om.types.EnumDeserializer; import org.apache.asterix.om.types.IAType; -import org.apache.asterix.om.util.container.IObjectFactory; import org.apache.asterix.om.util.container.IObjectPool; import org.apache.asterix.om.util.container.ListObjectPool; import org.apache.hyracks.api.dataflow.value.IBinaryComparator; @@ -48,36 +49,30 @@ import org.apache.hyracks.data.std.api.IMutableValueStorage; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.api.IValueReference; -import org.apache.hyracks.data.std.primitive.VoidPointable; import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; -import org.apache.hyracks.util.string.UTF8StringUtil; public class LogicalComplexBinaryComparator implements ILogicalBinaryComparator { - private static final IObjectFactory<BitSet, Void> BIT_SET_FACTORY = (type) -> new BitSet(); - private static final IObjectFactory<IPointable, Void> VOID_FACTORY = (type) -> new VoidPointable(); private final IAType leftType; private final IAType rightType; private final boolean isEquality; private final LogicalScalarBinaryComparator scalarComparator; - private final IObjectPool<IMutableValueStorage, ATypeTag> storageAllocator; + private final IObjectPool<IMutableValueStorage, Void> storageAllocator; private final IObjectPool<IPointable, Void> voidPointableAllocator; private final IObjectPool<BitSet, Void> bitSetAllocator; private final PointableAllocator pointableAllocator; private final IBinaryComparator utf8Comp; - private final StringBuilder builder; - public LogicalComplexBinaryComparator(IAType leftType, IAType rightType, boolean isEquality) { + LogicalComplexBinaryComparator(IAType leftType, IAType rightType, boolean isEquality) { this.leftType = leftType; this.rightType = rightType; this.isEquality = isEquality; this.scalarComparator = new LogicalScalarBinaryComparator(isEquality); - storageAllocator = new ListObjectPool<>(new AbvsBuilderFactory()); + storageAllocator = new ListObjectPool<>(STORAGE_FACTORY); voidPointableAllocator = new ListObjectPool<>(VOID_FACTORY); bitSetAllocator = new ListObjectPool<>(BIT_SET_FACTORY); pointableAllocator = new PointableAllocator(); utf8Comp = BinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator(); - builder = new StringBuilder(); } @Override @@ -91,15 +86,10 @@ } // make sure both left and right are complex types if (!leftRuntimeTag.isDerivedType() || !rightRuntimeTag.isDerivedType()) { - throw new IllegalStateException("Input types are not complex type"); + throw new IllegalStateException("Input data is not complex type"); } - try { - return compareComplex(leftType, leftRuntimeTag, leftBytes, leftStart, leftLen, rightType, rightRuntimeTag, - rightBytes, rightStart, rightLen); - } finally { - storageAllocator.reset(); - voidPointableAllocator.reset(); - } + return compareComplex(leftType, leftRuntimeTag, leftBytes, leftStart, leftLen, rightType, rightRuntimeTag, + rightBytes, rightStart, rightLen); } @Override @@ -148,14 +138,8 @@ if (leftRuntimeTag != rightRuntimeTag) { return Result.INCOMPARABLE; } - IAType leftCompileType = TypeComputeUtils.getActualType(leftType); - if (leftCompileType.getTypeTag() == ATypeTag.ANY) { - leftCompileType = DefaultOpenFieldType.getDefaultOpenFieldType(leftRuntimeTag); - } - IAType rightCompileType = TypeComputeUtils.getActualType(rightType); - if (rightCompileType.getTypeTag() == ATypeTag.ANY) { - rightCompileType = DefaultOpenFieldType.getDefaultOpenFieldType(rightRuntimeTag); - } + IAType leftCompileType = TypeComputeUtils.getActualTypeOrOpen(leftType, leftRuntimeTag); + IAType rightCompileType = TypeComputeUtils.getActualTypeOrOpen(rightType, rightRuntimeTag); switch (leftRuntimeTag) { case MULTISET: return compareMultisets(leftCompileType, leftRuntimeTag, leftBytes, leftStart, rightCompileType, @@ -187,13 +171,10 @@ // TODO(ali): optimize to not need this storage, will require optimizing records comparison to not use visitable ArrayBackedValueStorage leftStorage = (ArrayBackedValueStorage) storageAllocator.allocate(null); ArrayBackedValueStorage rightStorage = (ArrayBackedValueStorage) storageAllocator.allocate(null); - Result unknownResult = null; Result determiningResult = null; Result tempResult; - byte leftItemTagByte; - byte rightItemTagByte; - ATypeTag leftItemRuntimeTag; - ATypeTag rightItemRuntimeTag; + byte leftItemTagByte, rightItemTagByte; + ATypeTag leftItemRuntimeTag, rightItemRuntimeTag; try { for (int i = 0; i < leftNumItems && i < rightNumItems; i++) { ListAccessorUtil.getItem(leftBytes, leftStart, i, leftListTag, leftItemTag, leftItem, leftStorage); @@ -215,38 +196,18 @@ rightItem.getLength()); } - if (tempResult == Result.INCOMPARABLE) { - return tempResult; + if (tempResult == Result.INCOMPARABLE || tempResult == Result.MISSING || tempResult == Result.NULL) { + return Result.INCOMPARABLE; } // skip to next pair if current one is equal or the result of the comparison has already been decided - if (tempResult != Result.EQ && determiningResult == null) { - // tempResult = NULL, MISSING, LT, GT - if ((tempResult == Result.NULL || tempResult == Result.MISSING)) { - // keep unknown response if there is no yet a determining result switching to missing if found - if (unknownResult != Result.MISSING) { - unknownResult = tempResult; - } - } else { - // tempResult = LT, GT - determiningResult = tempResult; - } + if (determiningResult == null && tempResult != Result.EQ) { + determiningResult = tempResult; } } - // reaching here means the two arrays are comparable - if (isEquality && leftNumItems != rightNumItems) { - return ILogicalBinaryComparator.asResult(Integer.compare(leftNumItems, rightNumItems)); - } - // for >, < make unknownResult the determiningResult if unknownResult was encountered before finding one - if (!isEquality && unknownResult != null) { - determiningResult = unknownResult; - } if (determiningResult != null) { return determiningResult; - } - if (unknownResult != null) { - return unknownResult; } return ILogicalBinaryComparator.asResult(Integer.compare(leftNumItems, rightNumItems)); } catch (IOException e) { @@ -288,20 +249,12 @@ List<IVisitablePointable> leftFieldNames = leftRecord.getFieldNames(); List<IVisitablePointable> rightFieldValues = rightRecord.getFieldValues(); List<IVisitablePointable> rightFieldNames = rightRecord.getFieldNames(); - IVisitablePointable leftFieldValue; - IVisitablePointable leftFieldName; - IVisitablePointable rightFieldValue; - IVisitablePointable rightFieldName; + IVisitablePointable leftFieldValue, leftFieldName, rightFieldValue, rightFieldName; int leftNumFields = leftFieldNames.size(); int rightNumFields = rightFieldNames.size(); - IAType leftFieldType; - IAType rightFieldType; - ATypeTag leftFTag; - ATypeTag rightFTag; + IAType leftFieldType, rightFieldType; + ATypeTag leftFTag, rightFTag; Result tempCompResult; - Result unknownResult = null; - Result determiningResult = null; - String complexFieldName; boolean foundFieldInRight; boolean notEqual = false; notMatched.set(0, rightNumFields); @@ -311,6 +264,7 @@ // ignore if the field value is missing if (leftFTag != ATypeTag.MISSING) { + // start looking for the field in the right record foundFieldInRight = false; leftFieldName = leftFieldNames.get(i); for (int k = 0; k < rightNumFields; k++) { @@ -326,9 +280,8 @@ if (leftFTag == ATypeTag.NULL || rightFTag == ATypeTag.NULL) { tempCompResult = Result.NULL; } else if (leftFTag.isDerivedType() && rightFTag.isDerivedType()) { - complexFieldName = getComplexFieldName(leftFieldName); - leftFieldType = getComplexFieldType(leftRecordType, complexFieldName, leftFTag); - rightFieldType = getComplexFieldType(rightRecordType, complexFieldName, rightFTag); + leftFieldType = CompareHashUtil.getType(leftRecordType, i, leftFTag); + rightFieldType = CompareHashUtil.getType(rightRecordType, k, rightFTag); tempCompResult = compareComplex(leftFieldType, leftFTag, leftFieldValue.getByteArray(), leftFieldValue.getStartOffset(), leftFieldValue.getLength(), @@ -341,15 +294,12 @@ rightFieldValue.getLength()); } - if (tempCompResult == Result.INCOMPARABLE) { - return tempCompResult; + if (tempCompResult == Result.INCOMPARABLE || tempCompResult == Result.MISSING + || tempCompResult == Result.NULL) { + return Result.INCOMPARABLE; } - if (tempCompResult == Result.MISSING || tempCompResult == Result.NULL) { - if (unknownResult != Result.MISSING) { - unknownResult = tempCompResult; - } - } else if (tempCompResult != Result.EQ && determiningResult == null) { - determiningResult = tempCompResult; + if (tempCompResult != Result.EQ) { + notEqual = true; } } break; @@ -365,43 +315,24 @@ // LT or GT does not make a difference since this is an answer to equality return Result.LT; } - // two fields with the same name but having different values - if (determiningResult != null) { - return determiningResult; - } // check if there is a field in the right record that does not exist in left record byte rightFieldTag; - for (int i = 0; i < rightNumFields; i++) { + for (int i = notMatched.nextSetBit(0); i >= 0 && i < rightNumFields; i = notMatched.nextSetBit(i + 1)) { rightFieldValue = rightFieldValues.get(i); rightFieldTag = rightFieldValue.getByteArray()[rightFieldValue.getStartOffset()]; - if (notMatched.get(i) && rightFieldTag != SERIALIZED_MISSING_TYPE_TAG) { - notEqual = true; - break; + if (rightFieldTag != SERIALIZED_MISSING_TYPE_TAG) { + // LT or GT does not make a difference since this is an answer to equality + return Result.LT; } } - if (notEqual) { - return Result.LT; - } + // reaching here means every field in the left record exists in the right and vice versa - if (unknownResult != null) { - return unknownResult; - } return Result.EQ; } finally { pointableAllocator.freeRecord(rightRecord); pointableAllocator.freeRecord(leftRecord); bitSetAllocator.free(notMatched); } - } - - private IAType getComplexFieldType(ARecordType recordType, String fieldName, ATypeTag fieldRuntimeTag) { - IAType fieldType = recordType.getFieldType(fieldName); - return fieldType == null ? DefaultOpenFieldType.getDefaultOpenFieldType(fieldRuntimeTag) : fieldType; - } - - private String getComplexFieldName(IValueReference fieldName) { - builder.setLength(0); - return UTF8StringUtil.toString(builder, fieldName.getByteArray(), fieldName.getStartOffset() + 1).toString(); } private boolean equalNames(IValueReference fieldName1, IValueReference fieldName2) throws HyracksDataException { diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalScalarBinaryComparator.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalScalarBinaryComparator.java index f4896dc..a25c90e 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalScalarBinaryComparator.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/comparators/LogicalScalarBinaryComparator.java @@ -74,7 +74,7 @@ private final boolean isEquality; - public LogicalScalarBinaryComparator(boolean isEquality) { + LogicalScalarBinaryComparator(boolean isEquality) { this.isEquality = isEquality; } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java index 7cd69ab..cc33faa 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.dataflow.data.nontagged.hash; +import static org.apache.asterix.om.types.ATypeTag.VALUE_TYPE_MAPPING; + import java.io.DataOutput; import java.io.IOException; import java.util.Comparator; @@ -82,7 +84,7 @@ private final ArrayBackedValueStorage valueBuffer = new ArrayBackedValueStorage(); private final DataOutput valueOut = valueBuffer.getDataOutput(); private final IObjectPool<IPointable, Void> voidPointableAllocator; - private final IObjectPool<IMutableValueStorage, ATypeTag> storageAllocator; + private final IObjectPool<IMutableValueStorage, Void> storageAllocator; private final IAType type; private final int seed; // used for record hashing, sorting field names first @@ -189,6 +191,7 @@ CompareHashUtil.addToHeap(fieldsNames, fieldsValues, namesHeap); IVisitablePointable fieldName, fieldValue; IAType fieldType; + ATypeTag fieldTag; int hash = 0; int fieldIdx; while (!namesHeap.isEmpty()) { @@ -196,7 +199,8 @@ // TODO(ali): currently doing another lookup to find the target field index and get its value & type fieldIdx = CompareHashUtil.getIndex(fieldsNames, fieldName); fieldValue = fieldsValues.get(fieldIdx); - fieldType = CompareHashUtil.getType(recordType, fieldIdx, fieldValue); + fieldTag = VALUE_TYPE_MAPPING[fieldValue.getByteArray()[fieldValue.getStartOffset()]]; + fieldType = CompareHashUtil.getType(recordType, fieldIdx, fieldTag); hash ^= MurmurHash3BinaryHash.hash(fieldName.getByteArray(), fieldName.getStartOffset(), fieldName.getLength(), seed) ^ hash(fieldType, fieldValue.getByteArray(), fieldValue.getStartOffset(), diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/container/ObjectFactories.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/container/ObjectFactories.java index e9cb345..763e1d4 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/container/ObjectFactories.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/container/ObjectFactories.java @@ -18,19 +18,26 @@ */ package org.apache.asterix.om.util.container; -import org.apache.asterix.builders.AbvsBuilderFactory; -import org.apache.asterix.om.types.ATypeTag; +import java.util.BitSet; + import org.apache.hyracks.data.std.api.IMutableValueStorage; import org.apache.hyracks.data.std.api.IPointable; import org.apache.hyracks.data.std.primitive.VoidPointable; +import org.apache.hyracks.data.std.util.ArrayBackedValueStorage; // TODO(ali): look for all classes creating factories and extract them to here + +/** + * Object factories must be used in conjunction with {@link IObjectPool} to reuse objects. They should not be used + * to create objects outside the context of a pool. + */ public class ObjectFactories { private ObjectFactories() { } public static final IObjectFactory<IPointable, Void> VOID_FACTORY = (type) -> new VoidPointable(); - // TODO(ali): use lambda for the storage, too - public static final IObjectFactory<IMutableValueStorage, ATypeTag> STORAGE_FACTORY = new AbvsBuilderFactory(); + public static final IObjectFactory<IMutableValueStorage, Void> STORAGE_FACTORY = + (type) -> new ArrayBackedValueStorage(); + public static final IObjectFactory<BitSet, Void> BIT_SET_FACTORY = (type) -> new BitSet(); } -- To view, visit https://asterix-gerrit.ics.uci.edu/3259 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id93bea76e13658768e08a98fd373c71a901ceec5 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Ali Alsuliman <ali.al.solai...@gmail.com>