This is an automated email from the ASF dual-hosted git repository.
luochen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new 3f75e5b [ASTERIXDB-2783] Fix hash collision for hash join/groupby
3f75e5b is described below
commit 3f75e5b627493eae80c89f8eaa0f075fb31ab3af
Author: luochen <[email protected]>
AuthorDate: Sat Sep 26 13:19:38 2020 -0700
[ASTERIXDB-2783] Fix hash collision for hash join/groupby
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Use a random seed for hash join/groupby to avoid hash collisions
with the hash partitioning
- Slightly increase the join memory so that the large object join
test case can still pass.
Change-Id: If2aa02384129293e80015efc3d1f60b57f98909c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8123
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Dmitry Lychagin <[email protected]>
---
asterixdb/asterix-app/src/main/resources/cc.conf | 2 +-
asterixdb/asterix-app/src/main/resources/cc3.conf | 2 +-
asterixdb/asterix-app/src/test/resources/cc-compression.conf | 2 +-
asterixdb/asterix-app/src/test/resources/cc-ssl.conf | 2 +-
.../results/api/cluster_state_1/cluster_state_1.1.regexadm | 2 +-
.../api/cluster_state_1_less/cluster_state_1_less.1.regexadm | 2 +-
.../external/ExternalGroupBuildOperatorNodePushable.java | 7 ++++++-
.../std/join/OptimizedHybridHashJoinOperatorDescriptor.java | 11 +++++++++--
8 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/asterixdb/asterix-app/src/main/resources/cc.conf
b/asterixdb/asterix-app/src/main/resources/cc.conf
index ccd35f8..d5da6d4 100644
--- a/asterixdb/asterix-app/src/main/resources/cc.conf
+++ b/asterixdb/asterix-app/src/main/resources/cc.conf
@@ -55,7 +55,7 @@ log.level = INFO
compiler.framesize=32KB
compiler.sortmemory=320KB
compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
compiler.textsearchmemory=160KB
compiler.windowmemory=192KB
compiler.sort.parallel=false
diff --git a/asterixdb/asterix-app/src/main/resources/cc3.conf
b/asterixdb/asterix-app/src/main/resources/cc3.conf
index 88362aa..d2a8556 100644
--- a/asterixdb/asterix-app/src/main/resources/cc3.conf
+++ b/asterixdb/asterix-app/src/main/resources/cc3.conf
@@ -51,7 +51,7 @@ log.level = WARN
compiler.framesize=32KB
compiler.sortmemory=320KB
compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=1024KB
compiler.textsearchmemory=160KB
compiler.windowmemory=192KB
compiler.parallelism=3
diff --git a/asterixdb/asterix-app/src/test/resources/cc-compression.conf
b/asterixdb/asterix-app/src/test/resources/cc-compression.conf
index c8d9780..a3047a0 100644
--- a/asterixdb/asterix-app/src/test/resources/cc-compression.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc-compression.conf
@@ -50,7 +50,7 @@ log.level = INFO
compiler.framesize=32KB
compiler.sortmemory=320KB
compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
compiler.textsearchmemory=160KB
compiler.windowmemory=192KB
messaging.frame.size=4096
diff --git a/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
b/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
index 499e9fc..1c0a68f 100644
--- a/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
+++ b/asterixdb/asterix-app/src/test/resources/cc-ssl.conf
@@ -62,7 +62,7 @@ log.level = INFO
compiler.framesize=32KB
compiler.sortmemory=320KB
compiler.groupmemory=160KB
-compiler.joinmemory=256KB
+compiler.joinmemory=768KB
compiler.textsearchmemory=160KB
compiler.windowmemory=192KB
messaging.frame.size=4096
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index e377fd1..cc96921 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -12,7 +12,7 @@
"compiler\.groupmemory" : 163840,
"compiler\.indexonly" : true,
"compiler\.internal\.sanitycheck" : true,
- "compiler\.joinmemory" : 262144,
+ "compiler\.joinmemory" : 786432,
"compiler\.parallelism" : 0,
"compiler\.sort\.parallel" : false,
"compiler\.sort\.samples" : 100,
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index e51d12f..68d3079 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -12,7 +12,7 @@
"compiler\.groupmemory" : 163840,
"compiler\.indexonly" : true,
"compiler\.internal\.sanitycheck" : false,
- "compiler\.joinmemory" : 262144,
+ "compiler\.joinmemory" : 1048576,
"compiler\.parallelism" : 3,
"compiler\.sort\.parallel" : true,
"compiler\.sort\.samples" : 100,
diff --git
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
index 43f57af..20d223e 100644
---
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
+++
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/external/ExternalGroupBuildOperatorNodePushable.java
@@ -38,6 +38,11 @@ import org.apache.logging.log4j.Logger;
public class ExternalGroupBuildOperatorNodePushable extends
AbstractUnaryInputSinkOperatorNodePushable
implements IRunFileWriterGenerator {
+ /**
+ * Use a random seed to avoid hash collision with the hash exchange
operator.
+ * See https://issues.apache.org/jira/browse/ASTERIXDB-2783 for more
details.
+ */
+ private static final int INIT_SEED = 573275022;
private static final Logger LOGGER = LogManager.getLogger();
private final IHyracksTaskContext ctx;
@@ -85,7 +90,7 @@ public class ExternalGroupBuildOperatorNodePushable extends
AbstractUnaryInputSi
state = new ExternalGroupState(ctx.getJobletContext().getJobId(),
stateId);
ISpillableTable table = spillableTableFactory.buildSpillableTable(ctx,
tableSize, fileSize, keyFields,
comparators, firstNormalizerComputer, aggregatorFactory,
inRecordDescriptor, outRecordDescriptor,
- framesLimit, 0);
+ framesLimit, INIT_SEED);
RunFileWriter[] runFileWriters = new
RunFileWriter[table.getNumPartitions()];
this.externalGroupBy = new ExternalHashGroupBy(this, table,
runFileWriters, inRecordDescriptor);
diff --git
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index 97f9c24..c142113 100644
---
a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -107,6 +107,12 @@ import org.apache.logging.log4j.Logger;
*/
public class OptimizedHybridHashJoinOperatorDescriptor extends
AbstractOperatorDescriptor {
+ /**
+ * Use a random seed to avoid hash collision with the hash exchange
operator.
+ * See https://issues.apache.org/jira/browse/ASTERIXDB-2783 for more
details.
+ */
+ private static final int INIT_SEED = 982028031;
+
private static final int BUILD_AND_PARTITION_ACTIVITY_ID = 0;
private static final int PARTITION_AND_JOIN_ACTIVITY_ID = 1;
@@ -269,10 +275,11 @@ public class OptimizedHybridHashJoinOperatorDescriptor
extends AbstractOperatorD
ctx.getJobletContext().getJobId(), new
TaskId(getActivityId(), partition));
ITuplePartitionComputer probeHpc =
- new FieldHashPartitionComputerFamily(probeKeys,
propHashFunctionFactories).createPartitioner(0);
+ new FieldHashPartitionComputerFamily(probeKeys,
propHashFunctionFactories)
+ .createPartitioner(INIT_SEED);
ITuplePartitionComputer buildHpc =
new FieldHashPartitionComputerFamily(buildKeys,
buildHashFunctionFactories)
- .createPartitioner(0);
+ .createPartitioner(INIT_SEED);
boolean failed = false;
@Override