http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 0192fb5..cee9100 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -67,7 +67,7 @@ public class MapJoinDesc extends JoinDesc implements Serializable { private boolean isBucketMapJoin; // Hash table memory usage allowed; used in case of non-staged mapjoin. - private float hashtableMemoryUsage; + private float hashtableMemoryUsage; // This is a percentage value between 0 and 1 protected boolean genJoinKeys = true; private boolean isHybridHashJoin;
http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java index eb38b19..a45275b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java @@ -35,7 +35,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { random = new Random(47496); VectorMapJoinFastLongHashMap map = - new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -55,7 +55,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { public void testPutGetMultiple() throws Exception { random = new Random(2990); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -77,7 +77,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { public void testGetNonExistent() throws Exception { random = new Random(16916); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -101,7 +101,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { random = new Random(26078); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -126,7 +126,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { random = new Random(22470); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -147,7 +147,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { random = new Random(40719); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); @@ -172,7 +172,7 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { random = new Random(46809); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0); + VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); RandomLongStream randomLongKeyStream = new RandomLongStream(random); http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java index 3c1b29a..944bda6 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java @@ -35,7 +35,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { random = new Random(47496); VectorMapJoinFastMultiKeyHashMap map = - new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -55,7 +55,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { public void testPutGetMultiple() throws Exception { random = new Random(2990); - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -77,7 +77,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { public void testGetNonExistent() throws Exception { random = new Random(16916); - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -101,7 +101,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { random = new Random(26078); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -126,7 +126,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { random = new Random(22470); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); @@ -147,7 +147,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { random = new Random(5231); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); @@ -178,7 +178,7 @@ public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { random = new Random(46809); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, 0); + VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q index 096c890..e92504a 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q @@ -42,6 +42,8 @@ select * from dest2; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=200; +set hive.mapjoin.hybridgrace.minwbsize=100; +set hive.mapjoin.hybridgrace.minnumpartitions=2; -- A SMB join followed by a mutli-insert explain http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q new file mode 100644 index 0000000..c7d925e --- /dev/null +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -0,0 +1,258 @@ +-- Hybrid Grace Hash Join +-- Test basic functionalities: +-- 1. Various cases when hash partitions spill +-- 2. Partitioned table spilling +-- 3. Vectorization + +SELECT 1; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=1300000; +set hive.mapjoin.optimized.hashtable.wbsize=880000; +set hive.mapjoin.hybridgrace.memcheckfrequency=1024; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Two partitions are created. One in memory, one on disk on creation. +-- The one in memory will eventually exceed memory limit, but won't spill. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint + where c.cint < 2000000000) t1 +; + +set hive.auto.convert.join.noconditionaltask.size=3000000; +set hive.mapjoin.optimized.hashtable.wbsize=100000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for inner join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 16 partitions are created: 3 in memory, 13 on disk on creation. +-- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + inner join alltypesorc cd + on cd.cint = c.cint) t1 +; + + + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Base result for outer join +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- 32 partitions are created. 3 in memory, 29 on disk on creation. +explain +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + +select count(*) from +(select c.ctinyint + from alltypesorc c + left outer join alltypesorc cd + on cd.cint = c.cint) t1 +; + + +-- Partitioned table +create table parttbl (key string, value char(20)) partitioned by (dt char(10)); +insert overwrite table parttbl partition(dt='2000-01-01') + select * from src; +insert overwrite table parttbl partition(dt='2000-01-02') + select * from src1; + +set hive.auto.convert.join.noconditionaltask.size=30000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- No spill, base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- No spill, 2 partitions created in memory +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + + +set hive.auto.convert.join.noconditionaltask.size=20000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- Spill case base result +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +set hive.mapjoin.hybridgrace.hashtable=true; + +-- Spill case, one partition in memory, one spilled on creation +explain +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +select count(*) from +(select p1.value + from parttbl p1 + inner join parttbl p2 + on p1.key = p2.key) t1 +; + +drop table parttbl; + + +-- Test vectorization +-- Test case borrowed from vector_decimal_mapjoin.q +CREATE TABLE decimal_mapjoin STORED AS ORC AS + SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, + CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, + cint + FROM alltypesorc; + +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=50000000; +set hive.mapjoin.optimized.hashtable.wbsize=10000; +SET hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; +SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 + FROM decimal_mapjoin l + JOIN decimal_mapjoin r ON l.cint = r.cint + WHERE l.cint = 6981; + +DROP TABLE decimal_mapjoin; http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q new file mode 100644 index 0000000..dd425f4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/hybridgrace_hashjoin_2.q @@ -0,0 +1,152 @@ +-- Hybrid Grace Hash Join +-- Test n-way join +SELECT 1; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.cbo.enable=false; + + +-- 3-way mapjoin (1 big table, 2 small tables) +SELECT 1; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key); + + +-- 4-way mapjoin (1 big table, 3 small tables) +SELECT 1; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN srcpart w ON (x.key = w.key) +JOIN src y ON (y.key = x.key); + + +-- 2 sets of 3-way mapjoin under 2 different tasks +SELECT 1; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.key = z.key) +JOIN src y ON (y.key = x.key) +UNION +SELECT COUNT(*) +FROM src1 x JOIN srcpart z ON (x.value = z.value) +JOIN src y ON (y.value = x.value); + + +-- A chain of 2 sets of 3-way mapjoin under the same task +SELECT 1; + +set hive.mapjoin.hybridgrace.hashtable=false; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +set hive.mapjoin.hybridgrace.hashtable=true; + +EXPLAIN +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; + +SELECT COUNT(*) +FROM src1 x +JOIN srcpart z1 ON (x.key = z1.key) +JOIN src y1 ON (x.key = y1.key) +JOIN srcpart z2 ON (x.value = z2.value) +JOIN src y2 ON (x.value = y2.value) +WHERE z1.key < 'zzzzzzzz' AND z2.key < 'zzzzzzzzzz' + AND y1.value < 'zzzzzzzz' AND y2.value < 'zzzzzzzzzz'; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/hybridhashjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/hybridhashjoin.q b/ql/src/test/queries/clientpositive/hybridhashjoin.q deleted file mode 100644 index fbd48ea..0000000 --- a/ql/src/test/queries/clientpositive/hybridhashjoin.q +++ /dev/null @@ -1,250 +0,0 @@ -set hive.auto.convert.join=true; -set hive.auto.convert.join.noconditionaltask.size=1300000; -set hive.mapjoin.optimized.hashtable.wbsize=880000; -set hive.mapjoin.hybridgrace.memcheckfrequency=1024; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Two partitions are created. One in memory, one on disk on creation. --- The one in memory will eventually exceed memory limit, but won't spill. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint - where c.cint < 2000000000) t1 -; - -set hive.auto.convert.join.noconditionaltask.size=3000000; -set hive.mapjoin.optimized.hashtable.wbsize=100000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for inner join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 16 partitions are created: 3 in memory, 13 on disk on creation. --- 1 partition is spilled during first round processing, which ends up having 2 in memory, 14 on disk -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - inner join alltypesorc cd - on cd.cint = c.cint) t1 -; - - - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Base result for outer join -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- 32 partitions are created. 3 in memory, 29 on disk on creation. -explain -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - -select count(*) from -(select c.ctinyint - from alltypesorc c - left outer join alltypesorc cd - on cd.cint = c.cint) t1 -; - - --- Partitioned table -create table parttbl (key string, value char(20)) partitioned by (dt char(10)); -insert overwrite table parttbl partition(dt='2000-01-01') - select * from src; -insert overwrite table parttbl partition(dt='2000-01-02') - select * from src1; - -set hive.auto.convert.join.noconditionaltask.size=30000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- No spill, base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- No spill, 2 partitions created in memory -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - - -set hive.auto.convert.join.noconditionaltask.size=20000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; - -set hive.mapjoin.hybridgrace.hashtable=false; - --- Spill case base result -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -set hive.mapjoin.hybridgrace.hashtable=true; - --- Spill case, one partition in memory, one spilled on creation -explain -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -select count(*) from -(select p1.value - from parttbl p1 - inner join parttbl p2 - on p1.key = p2.key) t1 -; - -drop table parttbl; - - --- Test vectorization --- Test case borrowed from vector_decimal_mapjoin.q -CREATE TABLE decimal_mapjoin STORED AS ORC AS - SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(20,10)) AS cdecimal1, - CAST (((cdouble*9.3)/13) AS DECIMAL(23,14)) AS cdecimal2, - cint - FROM alltypesorc; - -SET hive.auto.convert.join=true; -SET hive.auto.convert.join.noconditionaltask=true; -SET hive.auto.convert.join.noconditionaltask.size=50000000; -set hive.mapjoin.optimized.hashtable.wbsize=10000; -SET hive.vectorized.execution.enabled=true; -set hive.mapjoin.hybridgrace.hashtable=false; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -set hive.mapjoin.hybridgrace.hashtable=true; - -EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; -SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 - FROM decimal_mapjoin l - JOIN decimal_mapjoin r ON l.cint = r.cint - WHERE l.cint = 6981; - -DROP TABLE decimal_mapjoin; http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_join_hash.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez_join_hash.q b/ql/src/test/queries/clientpositive/tez_join_hash.q index 3571cd5..67d89f8 100644 --- a/ql/src/test/queries/clientpositive/tez_join_hash.q +++ b/ql/src/test/queries/clientpositive/tez_join_hash.q @@ -14,6 +14,8 @@ SELECT count(*) FROM src, orc_src where src.key=orc_src.key; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=3000; +set hive.mapjoin.hybridgrace.minwbsize=350; +set hive.mapjoin.hybridgrace.minnumpartitions=8; explain select count(*) from (select x.key as key, y.value as value from http://git-wip-us.apache.org/repos/asf/hive/blob/c72d073c/ql/src/test/queries/clientpositive/tez_smb_main.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/tez_smb_main.q b/ql/src/test/queries/clientpositive/tez_smb_main.q index 6398762..1802709 100644 --- a/ql/src/test/queries/clientpositive/tez_smb_main.q +++ b/ql/src/test/queries/clientpositive/tez_smb_main.q @@ -42,6 +42,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=2000; +set hive.mapjoin.hybridgrace.minwbsize=500; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -50,6 +52,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=1000; +set hive.mapjoin.hybridgrace.minwbsize=250; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count (*) from tab a join tab_part b on a.key = b.key; @@ -58,6 +62,8 @@ select count(*) from tab a join tab_part b on a.key = b.key; set hive.auto.convert.join.noconditionaltask.size=500; +set hive.mapjoin.hybridgrace.minwbsize=125; +set hive.mapjoin.hybridgrace.minnumpartitions=4; explain select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value; select count(*) from tab a join tab_part b on a.key = b.key join src1 c on a.value = c.value;