HIVE-12288: Bloom-1 filters for Vectorized map-joins (Gopal V, reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bf280ff Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bf280ff Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bf280ff Branch: refs/heads/master-fixed Commit: 3bf280ff0bb848b069298dd55dd03bd6e3dac97d Parents: 973268b Author: Gopal V <[email protected]> Authored: Thu Nov 5 22:18:11 2015 -0800 Committer: Gopal V <[email protected]> Committed: Thu Nov 5 22:18:11 2015 -0800 ---------------------------------------------------------------------- .../ql/exec/persistence/HybridHashTableContainer.java | 11 +++++++++++ 1 file changed, 11 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3bf280ff/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 632ba4f..a0c9b98 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -985,6 +985,17 @@ public class HybridHashTableContainer int keyHash = HashCodeUtil.murmurHash(bytes, offset, length); partitionId = keyHash & (hashPartitions.length - 1); + if (!bloom1.testLong(keyHash)) { + /* + * if the keyHash is missing in the bloom filter, then the value cannot exist in any of the + * spilled partition - return NOMATCH + */ + dummyRow = null; + aliasFilter = (byte) 0xff; + hashMapResult.forget(); + return JoinResult.NOMATCH; + } + // If the target hash table is on disk, spill this row to disk as well to be processed later if (isOnDisk(partitionId)) { return JoinUtil.JoinResult.SPILL;
