>From Shahrzad Shirazi <[email protected]>: Shahrzad Shirazi has uploaded this change for review. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21307?usp=email )
Change subject: WIP:Hashing -0.0 ...................................................................... WIP:Hashing -0.0 Change-Id: I02434aa90004008bd2e2c76cbf506597626190de --- A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.16.update.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.17.query.sqlpp A asterixdb/asterix-app/src/test/resources/runtimets/results/hash-based-or/hash-based-or/hash-based-or.17.adm M asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java 4 files changed, 116 insertions(+), 1 deletion(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/07/21307/1 diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.16.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.16.update.sqlpp new file mode 100644 index 0000000..6f61389 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.16.update.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +upsert INTO Employee ( + [ + { 'name': 'Bill', + 'id' : 2, + 'deptno': 'K55', + 'salary': 2000, + 'age':20, + 'level':0.0, + 'salary':234 }, + + { 'name': 'Fred', + 'deptno': 'K55', + 'id' : 12, + 'salary': 3000, + 'age':42, + 'level':0 }, + + { 'name': 'Fred2', + 'deptno': 'K15', + 'id' : 13, + 'salary': 2500, + 'age':23, + 'level':-0.0 }, + + { 'name': 'Fred1', + 'deptno': null, + 'id' : 31, + 'salary': 2500, + 'age':42, + 'level':2 }, + { 'name': 'Fred11', + 'deptno': null, + 'id' : 33, + 'salary': null, + 'age':0, + 'level':4 } + + ] +); \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.17.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.17.query.sqlpp new file mode 100644 index 0000000..22414be --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/hash-based-or/hash-based-or/hash-based-or.17.query.sqlpp @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +USE gby; + +SET `rewrite_or_as_join` "false"; +SET `compiler.disjunction.hash.threshold` "-1"; + + +Select * +FROM Employee e +WHERE e.level=0.0 or e.level=2 +order by e.id; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/hash-based-or/hash-based-or/hash-based-or.17.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/hash-based-or/hash-based-or/hash-based-or.17.adm new file mode 100644 index 0000000..1a7b8f7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/hash-based-or/hash-based-or/hash-based-or.17.adm @@ -0,0 +1,4 @@ +{ "e": { "age": 20, "id": 2, "name": "Bill", "deptno": "K55", "salary": 2000, "level": 0.0 } } +{ "e": { "age": 42, "id": 12, "name": "Fred", "deptno": "K55", "salary": 3000, "level": 0 } } +{ "e": { "age": 23, "id": 13, "name": "Fred2", "deptno": "K15", "salary": 2500, "level": -0.0 } } +{ "e": { "age": 42, "id": 31, "name": "Fred1", "deptno": null, "salary": 2500, "level": 2 } } diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java index caab273..af8bdc8 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/hash/AMurmurHash3BinaryHashFunctionFamily.java @@ -26,6 +26,7 @@ import java.io.IOException; import org.apache.asterix.dataflow.data.common.ListAccessorUtil; +import org.apache.asterix.dataflow.data.nontagged.serde.ADoubleSerializerDeserializer; import org.apache.asterix.dataflow.data.nontagged.serde.AOrderedListSerializerDeserializer; import org.apache.asterix.om.pointables.nonvisitor.RecordField; import org.apache.asterix.om.pointables.nonvisitor.SortedRecord; @@ -121,6 +122,8 @@ } return MurmurHash3BinaryHash.hash(valueBuffer.getByteArray(), valueBuffer.getStartOffset(), valueBuffer.getLength(), seed); + case DOUBLE: + return hashCanonicalDouble(bytes, offset); case ARRAY: try { return hashArray(type, bytes, offset); @@ -129,12 +132,31 @@ } case OBJECT: return hashRecord(type, bytes, offset); - case DOUBLE: default: return MurmurHash3BinaryHash.hash(bytes, offset, length, seed); } } + private int hashCanonicalDouble(byte[] bytes, int offset) throws HyracksDataException { + final double numericValue = ADoubleSerializerDeserializer.getDouble(bytes, offset + 1); + final double canonicalValue; + if (numericValue == 0.0d) { + canonicalValue = 0.0d; + } else if (Double.isNaN(numericValue)) { + canonicalValue = Double.NaN; + } else { + canonicalValue = numericValue; + } + try { + valueOut.writeByte(ATypeTag.DOUBLE.serialize()); + valueOut.writeDouble(canonicalValue); + } catch (IOException e) { + throw HyracksDataException.create(ErrorCode.NUMERIC_PROMOTION_ERROR, e.getMessage()); + } + return MurmurHash3BinaryHash.hash(valueBuffer.getByteArray(), valueBuffer.getStartOffset(), + valueBuffer.getLength(), seed); + } + private int hashArray(IAType type, byte[] bytes, int offset) throws IOException { IAType arrayType = TypeComputeUtils.getActualTypeOrOpen(type, ATypeTag.ARRAY); IAType itemType = ((AbstractCollectionType) arrayType).getItemType(); -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21307?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: newchange Gerrit-Project: asterixdb Gerrit-Branch: lumina Gerrit-Change-Id: I02434aa90004008bd2e2c76cbf506597626190de Gerrit-Change-Number: 21307 Gerrit-PatchSet: 1 Gerrit-Owner: Shahrzad Shirazi <[email protected]>
