Repository: tajo Updated Branches: refs/heads/branch-0.8.0 50d433f53 -> ebc60c51e
TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys. (hyoungjunkim via hyunsik) Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/ebc60c51 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/ebc60c51 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/ebc60c51 Branch: refs/heads/branch-0.8.0 Commit: ebc60c51e819432fda8a19618cb4ff9323168ddd Parents: 50d433f Author: Hyunsik Choi <[email protected]> Authored: Fri Mar 28 23:10:49 2014 +0900 Committer: Hyunsik Choi <[email protected]> Committed: Fri Mar 28 23:35:47 2014 +0900 ---------------------------------------------------------------------- CHANGES.txt | 5 ++++- .../queries/TestBuiltinFunctions/testAvgDouble.sql | 2 +- .../queries/TestCaseByCases/testTAJO415Case.sql | 5 ++++- .../queries/TestGroupByQuery/testGroupBy4.sql | 10 +++++++++- .../testJoinCoReferredEvalsWithSameExprs1.sql | 2 +- .../testJoinCoReferredEvalsWithSameExprs2.sql | 2 +- .../resources/queries/TestNetTypes/testGroupby.sql | 9 ++++++++- .../resources/queries/TestNetTypes/testGroupby2.sql | 10 +++++++++- .../results/TestBuiltinFunctions/testAvgDouble.result | 4 ++-- .../results/TestBuiltinFunctions/testRandom.result | 1 - .../results/TestCaseByCases/testTAJO415Case.result | 2 +- .../results/TestGroupByQuery/testGroupBy4.result | 4 ++-- .../TestGroupByQuery/testHavingWithNamedTarget.result | 2 +- .../testJoinCoReferredEvalsWithSameExprs1.result | 6 +++--- .../testJoinCoReferredEvalsWithSameExprs2.result | 2 +- .../resources/results/TestNetTypes/testGroupby.result | 6 +++--- .../resources/results/TestNetTypes/testGroupby2.result | 4 ++-- .../main/java/org/apache/tajo/rpc/NettyClientBase.java | 3 ++- .../main/java/org/apache/tajo/storage/LazyTuple.java | 12 +----------- .../src/main/java/org/apache/tajo/storage/VTuple.java | 13 ++----------- 20 files changed, 57 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 50cc01a..4bde76c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -138,7 +138,10 @@ Release 0.8.0 - unreleased IMPROVEMENTS - TAJO-685: Add prerequisite to the document of network functions and operators. + TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys. + (hyoungjunkim via hyunsik) + + TAJO-685: Add prerequisite to the document of network functions and operators. (jihoon) TAJO-644: Support quoted identifiers. (hyunsik) http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql index 1c8c749..a9afc5c 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql @@ -1 +1 @@ -select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey; \ No newline at end of file +select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey order by l_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql index 4a73b04..4b915d3 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql @@ -30,4 +30,7 @@ from ( group by o_orderkey -) b on (a.c_custkey = b.o_orderkey); \ No newline at end of file +) b on (a.c_custkey = b.o_orderkey) + +order by + c_custkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql index df354d4..4f2c63a 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql @@ -1 +1,9 @@ -select l_orderkey as gkey, count(1) as unique_key from lineitem group by lineitem.l_orderkey; \ No newline at end of file +select + l_orderkey as gkey, + count(1) as unique_key +from + lineitem +group by + lineitem.l_orderkey +order by + gkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql index edd69ff..3638393 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql @@ -11,4 +11,4 @@ from where r_regionkey = n_regionkey and r_regionkey > 0 order by - n_regionkey + n_nationkey; \ No newline at end of file + n_regionkey + n_nationkey, n_regionkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql index 03b7f8e..a5b75d4 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql @@ -19,4 +19,4 @@ group by ((r_regionkey + n_regionkey) / 2) order by - n_regionkey + n_nationkey; \ No newline at end of file + n_regionkey + n_nationkey, n_regionkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql index e3e61d8..27353a9 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql @@ -1 +1,8 @@ -select name, addr, count(1) from table1 group by name, addr; \ No newline at end of file +select + name, addr, count(1) +from + table1 +group by + name, addr +order by + name, addr; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql index c39c3b9..6c3c357 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql @@ -1 +1,9 @@ -select addr, count(*) from table1 group by addr; \ No newline at end of file +select + addr, + count(*) +from + table1 +group by + addr +order by + addr; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result index 33b954e..bd2a69f 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result @@ -1,5 +1,5 @@ l_orderkey,revenue ------------------------------- 1,0.065 -3,0.08 -2,0.0 \ No newline at end of file +2,0.0 +3,0.08 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result deleted file mode 100644 index 9f2a456..0000000 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result +++ /dev/null @@ -1 +0,0 @@ -select l_orderkey, random(3) as rndnum from lineitem group by l_orderkey, rndnum; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result index 4b02873..675037b 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result @@ -1,7 +1,7 @@ c_custkey,o_orderkey,cnt ------------------------------- 1,1,1 -3,3,1 2,2,1 +3,3,1 4,0,1 5,0,1 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result index 714e73d..22c6664 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result @@ -1,5 +1,5 @@ gkey,unique_key ------------------------------- 1,2 -3,2 -2,1 \ No newline at end of file +2,1 +3,2 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result index 8e6eabd..627db72 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result @@ -1,5 +1,5 @@ l_orderkey,total,num ------------------------------- -1,1.0,3 3,2.5,3 +1,1.0,3 2,2.0,1 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result index 2b07dba..23776b6 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result @@ -5,11 +5,11 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result 4,1,1,2,2,1 8,4,4,8,8,4 9,3,3,6,6,3 -10,3,3,6,6,3 10,2,2,4,4,2 +10,3,3,6,6,3 11,2,2,4,4,2 -14,4,4,8,8,4 14,2,2,4,4,2 +14,4,4,8,8,4 15,4,4,8,8,4 17,4,4,8,8,4 18,1,1,2,2,1 @@ -17,6 +17,6 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result 22,3,3,6,6,3 23,2,2,4,4,2 24,4,4,8,8,4 -25,3,3,6,6,3 25,1,1,2,2,1 +25,3,3,6,6,3 26,3,3,6,6,3 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result index 91bb289..0e31362 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result @@ -5,8 +5,8 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result,total 4,1,1,2,2,1,2 8,4,4,8,8,4,8 9,3,3,6,6,3,6 -10,3,3,6,6,3,6 10,2,2,4,4,2,4 +10,3,3,6,6,3,6 11,2,2,4,4,2,4 14,2,2,4,4,2,4 14,4,4,8,8,4,8 http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result index 5bedfc2..95847a6 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result @@ -1,7 +1,7 @@ name,addr,?count ------------------------------- +ooo,127.0.0.1,1 ppp,127.0.1.1,1 qqq,127.0.0.8,1 -xxx,127.0.1.1,1 -ooo,127.0.0.1,1 -rrr,127.0.0.1,1 \ No newline at end of file +rrr,127.0.0.1,1 +xxx,127.0.1.1,1 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result index 25cd91f..ffd1d9a 100644 --- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result @@ -1,5 +1,5 @@ addr,?count ------------------------------- -127.0.1.1,2 127.0.0.1,2 -127.0.0.8,1 \ No newline at end of file +127.0.0.8,1 +127.0.1.1,2 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java ---------------------------------------------------------------------- diff --git a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java index 8373c37..60d3aa6 100644 --- a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java +++ b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java @@ -45,6 +45,7 @@ public abstract class NettyClientBase implements Closeable { public void init(InetSocketAddress addr, ChannelPipelineFactory pipeFactory, ClientSocketChannelFactory factory) throws IOException { try { + this.bootstrap = new ClientBootstrap(factory); this.bootstrap.setPipelineFactory(pipeFactory); // TODO - should be configurable @@ -57,7 +58,7 @@ public abstract class NettyClientBase implements Closeable { connect(addr); } catch (Throwable t) { close(); - throw new IOException(t.getCause()); + throw new IOException("Connect error to " + addr + " cause " + t.getMessage(), t.getCause()); } } http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java ---------------------------------------------------------------------- diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java index 3e7ca5f..27d2691 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java @@ -215,17 +215,7 @@ public class LazyTuple implements Tuple, Cloneable { @Override public int hashCode() { - int hashCode = 37; - for (int i = 0; i < values.length; i++) { - Datum d = get(i); - if (d != null) { - hashCode ^= (d.hashCode() * 41); - } else { - hashCode = hashCode ^ (i + 17); - } - } - - return hashCode; + return Arrays.hashCode(values); } @Override http://git-wip-us.apache.org/repos/asf/tajo/blob/ebc60c51/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java ---------------------------------------------------------------------- diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java index 72a4566..22d4fd9 100644 --- a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java +++ b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java @@ -198,19 +198,10 @@ public class VTuple implements Tuple, Cloneable { str.append(")"); return str.toString(); } - + @Override public int hashCode() { - int hashCode = 37; - for (int i=0; i < values.length; i++) { - if(values[i] != null) { - hashCode ^= (values[i].hashCode() * 41); - } else { - hashCode = hashCode ^ (i + 17); - } - } - - return hashCode; + return Arrays.hashCode(values); } @Override
