This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 3d79c9f39392d6e2e5127b716788e4335014606b Author: Dmitry Lychagin <[email protected]> AuthorDate: Wed Feb 16 16:47:08 2022 -0800 [ASTERIXDB-3016][RT] Fix failure in hash groupby - user model changes: no - storage format changes: no - interface changes: no Details: - Modify hash group by to force garbage collection on the hash table if a tuple could not be inserted into it - Make hash group by clean up its run files in case of an error Change-Id: I7a133fa1d0555ebbcb7a9e3cb7445757716c9a2a Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/15325 Integration-Tests: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Reviewed-by: Dmitry Lychagin <[email protected]> Reviewed-by: Till Westmann <[email protected]> --- .../query-ASTERIXDB-3016.1.ddl.sqlpp | 28 ++++++++++++++++++++++ .../query-ASTERIXDB-3016.2.update.sqlpp | 26 ++++++++++++++++++++ .../query-ASTERIXDB-3016.3.query.sqlpp | 27 +++++++++++++++++++++ .../query-ASTERIXDB-3016.3.adm | 1 + .../test/resources/runtimets/testsuite_sqlpp.xml | 5 ++++ .../std/group/HashSpillableTableFactory.java | 28 +++++++++++++++------- .../ExternalGroupWriteOperatorNodePushable.java | 14 +++++++---- .../std/structures/ISerializableTable.java | 2 +- 8 files changed, 117 insertions(+), 14 deletions(-) diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp new file mode 100644 index 0000000..fdfee88 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.1.ddl.sqlpp @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse tpcds if exists; +create dataverse tpcds; + +use tpcds; + +create dataset item(i_item_sk string not unknown) open type primary key i_item_sk; + +create dataset inventory(inv_date_sk string not unknown, inv_item_sk string not unknown, + inv_warehouse_sk string not unknown) open type primary key inv_date_sk, inv_item_sk, inv_warehouse_sk; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp new file mode 100644 index 0000000..f8fe178 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.2.update.sqlpp @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use tpcds; + +set `import-private-functions` `true`; + +insert into item (select value object_remove(t, "table_name") from tpcds_datagen("item", 0.5) t); + +insert into inventory (select value object_remove(t, "table_name") from tpcds_datagen("inventory", 0.5) t); diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp new file mode 100644 index 0000000..01158b9 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.query.sqlpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +use tpcds; + +SELECT ROUND(AVG(inv.inv_quantity_on_hand), 1) qoh, i.i_product_name +FROM inventory inv, item i +WHERE inv.inv_item_sk /*+hash-bcast*/ = i.i_item_sk +/*+ hash */ GROUP BY i.i_product_name +ORDER BY qoh, i.i_product_name +LIMIT 1; diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm new file mode 100644 index 0000000..96494f4 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/group-by/query-ASTERIXDB-3016/query-ASTERIXDB-3016.3.adm @@ -0,0 +1 @@ +{ "qoh": 402.0, "i_product_name": "ableoughtn st" } \ No newline at end of file diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 8b6b7c0..81567ff 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -6099,6 +6099,11 @@ <output-dir compare="Text">hash-group-by-decor</output-dir> </compilation-unit> </test-case> + <test-case FilePath="group-by"> + <compilation-unit name="query-ASTERIXDB-3016"> + <output-dir compare="Text">query-ASTERIXDB-3016</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="index-join"> <test-case FilePath="index-join"> diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java index 4f0c304..1e5c121 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/HashSpillableTableFactory.java @@ -177,16 +177,21 @@ public class HashSpillableTableFactory implements ISpillableTableFactory { } // Checks whether the garbage collection is required and conducts a garbage collection if so. - if (hashTableForTuplePointer.isGarbageCollectionNeeded()) { + collectGarbageInHashTableForTuplePointer(false); + bufferManager.clearPartition(partition); + } + + private boolean collectGarbageInHashTableForTuplePointer(boolean force) throws HyracksDataException { + if (force || hashTableForTuplePointer.isGarbageCollectionNeeded()) { int numberOfFramesReclaimed = hashTableForTuplePointer.collectGarbage(bufferAccessor, tpcIntermediate); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Garbage Collection on Hash table is done. Deallocated frames:" + numberOfFramesReclaimed); } + return numberOfFramesReclaimed != -1; } - - bufferManager.clearPartition(partition); + return false; } private int getPartition(int entryInHashTable) { @@ -234,11 +239,18 @@ public class HashSpillableTableFactory implements ISpillableTableFactory { } // Insertion to the hash table - if (!hashTableForTuplePointer.insert(entryInHashTable, pointer)) { - // To preserve the atomicity of this method, we need to undo the effect - // of the above bufferManager.insertTuple() call since the given insertion has failed. - bufferManager.cancelInsertTuple(pid); - return false; + boolean inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer); + if (!inserted) { + // Force garbage collection on the hash table and attempt to insert again + if (collectGarbageInHashTableForTuplePointer(true)) { + inserted = hashTableForTuplePointer.insert(entryInHashTable, pointer); + } + if (!inserted) { + // To preserve the atomicity of this method, we need to undo the effect + // of the above bufferManager.insertTuple() call since the given insertion has failed. + bufferManager.cancelInsertTuple(pid); + return false; + } } return true; diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java index 1a6f4ef..8618528 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupWriteOperatorNodePushable.java @@ -142,11 +142,15 @@ public class ExternalGroupWriteOperatorNodePushable extends AbstractUnaryOutputS runs[i].getFileSize(), gbyFields, fdFields, groupByComparators, nmkComputer, mergeAggregatorFactory, partialAggRecordDesc, outRecordDesc, frameLimit, level); RunFileWriter[] runFileWriters = new RunFileWriter[partitionTable.getNumPartitions()]; - int[] sizeInTuplesNextLevel = - buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters); - for (int idFile = 0; idFile < runFileWriters.length; idFile++) { - if (runFileWriters[idFile] != null) { - generatedRuns.add(runFileWriters[idFile]); + int[] sizeInTuplesNextLevel; + try { + sizeInTuplesNextLevel = + buildGroup(runs[i].createDeleteOnCloseReader(), partitionTable, runFileWriters); + } finally { + for (int idFile = 0; idFile < runFileWriters.length; idFile++) { + if (runFileWriters[idFile] != null) { + generatedRuns.add(runFileWriters[idFile]); + } } } diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java index 51f9984..58ad213 100644 --- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java +++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/structures/ISerializableTable.java @@ -54,7 +54,7 @@ public interface ISerializableTable { * required to access the real tuple to calculate the original hash value * @param tpc: * hash function - * @return the number of frames that are reclaimed. + * @return the number of frames that are reclaimed. The value -1 is returned when no compaction was happened. * @throws HyracksDataException */ int collectGarbage(ITuplePointerAccessor bufferAccessor, ITuplePartitionComputer tpc) throws HyracksDataException;
