http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestTPCH/testQ2FourJoins.plan ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTPCH/testQ2FourJoins.plan b/tajo-core/src/test/resources/results/TestTPCH/testQ2FourJoins.plan index ef0f8f8..945b6e4 100644 --- a/tajo-core/src/test/resources/results/TestTPCH/testQ2FourJoins.plan +++ b/tajo-core/src/test/resources/results/TestTPCH/testQ2FourJoins.plan @@ -34,14 +34,14 @@ JOIN(14)(INNER) => target list: default.partsupp.ps_partkey (INT4), default.partsupp.ps_supplycost (FLOAT8), default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT) => out schema: {(8) default.partsupp.ps_partkey (INT4), default.partsupp.ps_supplycost (FLOAT8), default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT)} => in schema: {(10) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8), default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} - SCAN(5) on default.partsupp - => target list: default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8) - => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} - => in schema: {(5) default.partsupp.ps_availqty (INT4), default.partsupp.ps_comment (TEXT), default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} SCAN(3) on default.supplier => target list: default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4) => out schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} + SCAN(5) on default.partsupp + => target list: default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8) + => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} + => in schema: {(5) default.partsupp.ps_availqty (INT4), default.partsupp.ps_comment (TEXT), default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} explain ------------------------------- ------------------------------------------------------------------------------- @@ -77,32 +77,32 @@ Block Id: eb_0000000000000_0000_000001 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) +[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.partsupp.ps_suppkey (INT4), num=32) -SCAN(3) on default.supplier - => target list: default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4) - => out schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} - => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} +SCAN(5) on default.partsupp + => target list: default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8) + => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} + => in schema: {(5) default.partsupp.ps_availqty (INT4), default.partsupp.ps_comment (TEXT), default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} ======================================================= Block Id: eb_0000000000000_0000_000002 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.partsupp.ps_suppkey (INT4), num=32) +[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) -SCAN(5) on default.partsupp - => target list: default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8) - => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} - => in schema: {(5) default.partsupp.ps_availqty (INT4), default.partsupp.ps_comment (TEXT), default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} +SCAN(3) on default.supplier + => target list: default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4) + => out schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} + => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} ======================================================= Block Id: eb_0000000000000_0000_000003 [INTERMEDIATE] ======================================================= [Incoming] -[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) -[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.partsupp.ps_suppkey (INT4), num=32) +[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.partsupp.ps_suppkey (INT4), num=32) +[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) [Outgoing] [q_0000000000000_0000] 3 => 5 (type=HASH_SHUFFLE, key=default.partsupp.ps_partkey (INT4), num=32) @@ -113,11 +113,11 @@ JOIN(11)(INNER) => out schema: {(8) default.partsupp.ps_partkey (INT4), default.partsupp.ps_supplycost (FLOAT8), default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT)} => in schema: {(10) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8), default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} SCAN(17) on eb_0000000000000_0000_000002 - => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} - => in schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} - SCAN(16) on eb_0000000000000_0000_000001 => out schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} + SCAN(16) on eb_0000000000000_0000_000001 + => out schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} + => in schema: {(3) default.partsupp.ps_partkey (INT4), default.partsupp.ps_suppkey (INT4), default.partsupp.ps_supplycost (FLOAT8)} ======================================================= Block Id: eb_0000000000000_0000_000004 [LEAF]
http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestTPCH/testTPCHQ5.plan ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTPCH/testTPCHQ5.plan b/tajo-core/src/test/resources/results/TestTPCH/testTPCHQ5.plan index a466b20..77c3f59 100644 --- a/tajo-core/src/test/resources/results/TestTPCH/testTPCHQ5.plan +++ b/tajo-core/src/test/resources/results/TestTPCH/testTPCHQ5.plan @@ -27,37 +27,37 @@ SORT(8) => out schema: {(3) default.nation.n_name (TEXT), default.nation.n_nationkey (INT4), default.nation.n_regionkey (INT4)} => in schema: {(4) default.nation.n_comment (TEXT), default.nation.n_name (TEXT), default.nation.n_nationkey (INT4), default.nation.n_regionkey (INT4)} JOIN(18)(INNER) - => Join Cond: default.lineitem.l_suppkey (INT4) = default.supplier.s_suppkey (INT4) + => Join Cond: default.customer.c_custkey (INT4) = default.orders.o_custkey (INT4) AND default.lineitem.l_suppkey (INT4) = default.supplier.s_suppkey (INT4) => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.supplier.s_nationkey (INT4) => out schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.supplier.s_nationkey (INT4)} - => in schema: {(6) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(8) ?multiply (FLOAT8), default.customer.c_custkey (INT4), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} JOIN(17)(INNER) - => Join Cond: default.customer.c_nationkey (INT4) = default.supplier.s_nationkey (INT4) - => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(3) default.customer.c_nationkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - SCAN(0) on default.customer - => target list: default.customer.c_nationkey (INT4) - => out schema: {(1) default.customer.c_nationkey (INT4)} - => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)} - SCAN(3) on default.supplier - => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} - JOIN(16)(INNER) => Join Cond: default.lineitem.l_orderkey (INT4) = default.orders.o_orderkey (INT4) - => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4) - => out schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4)} - => in schema: {(6) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.orders.o_orderkey (INT4)} + => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4) + => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4)} + => in schema: {(7) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} SCAN(1) on default.orders => filter: default.orders.o_orderdate (TEXT) >= 1994-01-01 AND default.orders.o_orderdate (TEXT) < 1995-01-01 - => target list: default.orders.o_orderkey (INT4) - => out schema: {(1) default.orders.o_orderkey (INT4)} + => target list: default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4) + => out schema: {(2) default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)} SCAN(2) on default.lineitem => target list: default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.lineitem.l_extendedprice (FLOAT8) * 1.0 - default.lineitem.l_discount (FLOAT8) as ?multiply => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} => in schema: {(16) default.lineitem.l_comment (TEXT), default.lineitem.l_commitdate (TEXT), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_linenumber (INT4), default.lineitem.l_linestatus (TEXT), default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4), default.lineitem.l_quantity (FLOAT8), default.lineitem.l_receiptdate (TEXT), default.lineitem.l_returnflag (TEXT), default.lineitem.l_shipdate (TEXT), default.lineitem.l_shipinstruct (TEXT), default.lineitem.l_shipmode (TEXT), default.lineitem.l_suppkey (INT4), default.lineitem.l_tax (FLOAT8)} + JOIN(16)(INNER) + => Join Cond: default.customer.c_nationkey (INT4) = default.supplier.s_nationkey (INT4) + => target list: default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) + => out schema: {(3) default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(4) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + SCAN(3) on default.supplier + => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) + => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} + SCAN(0) on default.customer + => target list: default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4) + => out schema: {(2) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4)} + => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)} explain ------------------------------- ------------------------------------------------------------------------------- @@ -101,95 +101,95 @@ Block Id: eb_0000000000000_0000_000001 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.lineitem.l_orderkey (INT4), num=32) +[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.customer.c_nationkey (INT4), num=32) -SCAN(2) on default.lineitem - => target list: default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.lineitem.l_extendedprice (FLOAT8) * 1.0 - default.lineitem.l_discount (FLOAT8) as ?multiply - => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} - => in schema: {(16) default.lineitem.l_comment (TEXT), default.lineitem.l_commitdate (TEXT), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_linenumber (INT4), default.lineitem.l_linestatus (TEXT), default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4), default.lineitem.l_quantity (FLOAT8), default.lineitem.l_receiptdate (TEXT), default.lineitem.l_returnflag (TEXT), default.lineitem.l_shipdate (TEXT), default.lineitem.l_shipinstruct (TEXT), default.lineitem.l_shipmode (TEXT), default.lineitem.l_suppkey (INT4), default.lineitem.l_tax (FLOAT8)} +SCAN(0) on default.customer + => target list: default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4) + => out schema: {(2) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4)} + => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)} ======================================================= Block Id: eb_0000000000000_0000_000002 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.orders.o_orderkey (INT4), num=32) +[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_nationkey (INT4), num=32) -SCAN(1) on default.orders - => filter: default.orders.o_orderdate (TEXT) >= 1994-01-01 AND default.orders.o_orderdate (TEXT) < 1995-01-01 - => target list: default.orders.o_orderkey (INT4) - => out schema: {(1) default.orders.o_orderkey (INT4)} - => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)} +SCAN(3) on default.supplier + => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) + => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} ======================================================= Block Id: eb_0000000000000_0000_000004 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 4 => 6 (type=HASH_SHUFFLE, key=default.supplier.s_nationkey (INT4), num=32) +[q_0000000000000_0000] 4 => 6 (type=HASH_SHUFFLE, key=default.lineitem.l_orderkey (INT4), num=32) -SCAN(3) on default.supplier - => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(7) default.supplier.s_acctbal (FLOAT8), default.supplier.s_address (TEXT), default.supplier.s_comment (TEXT), default.supplier.s_name (TEXT), default.supplier.s_nationkey (INT4), default.supplier.s_phone (TEXT), default.supplier.s_suppkey (INT4)} +SCAN(2) on default.lineitem + => target list: default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.lineitem.l_extendedprice (FLOAT8) * 1.0 - default.lineitem.l_discount (FLOAT8) as ?multiply + => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} + => in schema: {(16) default.lineitem.l_comment (TEXT), default.lineitem.l_commitdate (TEXT), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_linenumber (INT4), default.lineitem.l_linestatus (TEXT), default.lineitem.l_orderkey (INT4), default.lineitem.l_partkey (INT4), default.lineitem.l_quantity (FLOAT8), default.lineitem.l_receiptdate (TEXT), default.lineitem.l_returnflag (TEXT), default.lineitem.l_shipdate (TEXT), default.lineitem.l_shipinstruct (TEXT), default.lineitem.l_shipmode (TEXT), default.lineitem.l_suppkey (INT4), default.lineitem.l_tax (FLOAT8)} ======================================================= Block Id: eb_0000000000000_0000_000005 [LEAF] ======================================================= [Outgoing] -[q_0000000000000_0000] 5 => 6 (type=HASH_SHUFFLE, key=default.customer.c_nationkey (INT4), num=32) +[q_0000000000000_0000] 5 => 6 (type=HASH_SHUFFLE, key=default.orders.o_orderkey (INT4), num=32) -SCAN(0) on default.customer - => target list: default.customer.c_nationkey (INT4) - => out schema: {(1) default.customer.c_nationkey (INT4)} - => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)} +SCAN(1) on default.orders + => filter: default.orders.o_orderdate (TEXT) >= 1994-01-01 AND default.orders.o_orderdate (TEXT) < 1995-01-01 + => target list: default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4) + => out schema: {(2) default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} + => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)} ======================================================= Block Id: eb_0000000000000_0000_000003 [INTERMEDIATE] ======================================================= [Incoming] -[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.lineitem.l_orderkey (INT4), num=32) -[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.orders.o_orderkey (INT4), num=32) +[q_0000000000000_0000] 1 => 3 (type=HASH_SHUFFLE, key=default.customer.c_nationkey (INT4), num=32) +[q_0000000000000_0000] 2 => 3 (type=HASH_SHUFFLE, key=default.supplier.s_nationkey (INT4), num=32) [Outgoing] -[q_0000000000000_0000] 3 => 7 (type=HASH_SHUFFLE, key=default.lineitem.l_suppkey (INT4), num=32) +[q_0000000000000_0000] 3 => 7 (type=HASH_SHUFFLE, key=default.customer.c_custkey (INT4), default.supplier.s_suppkey (INT4), num=32) JOIN(16)(INNER) - => Join Cond: default.lineitem.l_orderkey (INT4) = default.orders.o_orderkey (INT4) - => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4) - => out schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4)} - => in schema: {(6) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.orders.o_orderkey (INT4)} + => Join Cond: default.customer.c_nationkey (INT4) = default.supplier.s_nationkey (INT4) + => target list: default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) + => out schema: {(3) default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(4) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} SCAN(23) on eb_0000000000000_0000_000002 - => out schema: {(1) default.orders.o_orderkey (INT4)} - => in schema: {(1) default.orders.o_orderkey (INT4)} + => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} SCAN(22) on eb_0000000000000_0000_000001 - => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} - => in schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} + => out schema: {(2) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4)} + => in schema: {(2) default.customer.c_custkey (INT4), default.customer.c_nationkey (INT4)} ======================================================= Block Id: eb_0000000000000_0000_000006 [INTERMEDIATE] ======================================================= [Incoming] -[q_0000000000000_0000] 4 => 6 (type=HASH_SHUFFLE, key=default.supplier.s_nationkey (INT4), num=32) -[q_0000000000000_0000] 5 => 6 (type=HASH_SHUFFLE, key=default.customer.c_nationkey (INT4), num=32) +[q_0000000000000_0000] 4 => 6 (type=HASH_SHUFFLE, key=default.lineitem.l_orderkey (INT4), num=32) +[q_0000000000000_0000] 5 => 6 (type=HASH_SHUFFLE, key=default.orders.o_orderkey (INT4), num=32) [Outgoing] -[q_0000000000000_0000] 6 => 7 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) +[q_0000000000000_0000] 6 => 7 (type=HASH_SHUFFLE, key=default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), num=32) JOIN(17)(INNER) - => Join Cond: default.customer.c_nationkey (INT4) = default.supplier.s_nationkey (INT4) - => target list: default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4) - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(3) default.customer.c_nationkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => Join Cond: default.lineitem.l_orderkey (INT4) = default.orders.o_orderkey (INT4) + => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4) + => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4)} + => in schema: {(7) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} SCAN(25) on eb_0000000000000_0000_000005 - => out schema: {(1) default.customer.c_nationkey (INT4)} - => in schema: {(1) default.customer.c_nationkey (INT4)} + => out schema: {(2) default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} + => in schema: {(2) default.orders.o_custkey (INT4), default.orders.o_orderkey (INT4)} SCAN(24) on eb_0000000000000_0000_000004 - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} + => in schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_orderkey (INT4), default.lineitem.l_suppkey (INT4)} ======================================================= Block Id: eb_0000000000000_0000_000008 [LEAF] @@ -221,23 +221,23 @@ Block Id: eb_0000000000000_0000_000007 [INTERMEDIATE] ======================================================= [Incoming] -[q_0000000000000_0000] 3 => 7 (type=HASH_SHUFFLE, key=default.lineitem.l_suppkey (INT4), num=32) -[q_0000000000000_0000] 6 => 7 (type=HASH_SHUFFLE, key=default.supplier.s_suppkey (INT4), num=32) +[q_0000000000000_0000] 3 => 7 (type=HASH_SHUFFLE, key=default.customer.c_custkey (INT4), default.supplier.s_suppkey (INT4), num=32) +[q_0000000000000_0000] 6 => 7 (type=HASH_SHUFFLE, key=default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), num=32) [Outgoing] [q_0000000000000_0000] 7 => 11 (type=HASH_SHUFFLE, key=default.supplier.s_nationkey (INT4), num=32) JOIN(18)(INNER) - => Join Cond: default.lineitem.l_suppkey (INT4) = default.supplier.s_suppkey (INT4) + => Join Cond: default.customer.c_custkey (INT4) = default.orders.o_custkey (INT4) AND default.lineitem.l_suppkey (INT4) = default.supplier.s_suppkey (INT4) => target list: ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.supplier.s_nationkey (INT4) => out schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.supplier.s_nationkey (INT4)} - => in schema: {(6) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(8) ?multiply (FLOAT8), default.customer.c_custkey (INT4), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} SCAN(27) on eb_0000000000000_0000_000006 - => out schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} - => in schema: {(2) default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => out schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4)} + => in schema: {(5) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4), default.orders.o_custkey (INT4)} SCAN(26) on eb_0000000000000_0000_000003 - => out schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4)} - => in schema: {(4) ?multiply (FLOAT8), default.lineitem.l_discount (FLOAT8), default.lineitem.l_extendedprice (FLOAT8), default.lineitem.l_suppkey (INT4)} + => out schema: {(3) default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} + => in schema: {(3) default.customer.c_custkey (INT4), default.supplier.s_nationkey (INT4), default.supplier.s_suppkey (INT4)} ======================================================= Block Id: eb_0000000000000_0000_000010 [INTERMEDIATE] http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestUnionQuery/testUnion12.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestUnionQuery/testUnion12.result b/tajo-core/src/test/resources/results/TestUnionQuery/testUnion12.result index c130afa..4050574 100644 --- a/tajo-core/src/test/resources/results/TestUnionQuery/testUnion12.result +++ b/tajo-core/src/test/resources/results/TestUnionQuery/testUnion12.result @@ -3,4 +3,5 @@ col1,col2,col3 R,46796.47,1993-11-24F PROMO BURNISHED COPPER,90100.0,1993goldenrod lavender spring chocolate lace LARGE BRUSHED BRASS,90200.0,1993blush thistle blue yellow saddle -STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk \ No newline at end of file +STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk +SMALL PLATED BRASS,90400.0,1993cornflower chocolate smoke green pink \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestUnionQuery/testUnion13.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestUnionQuery/testUnion13.result b/tajo-core/src/test/resources/results/TestUnionQuery/testUnion13.result index c130afa..4050574 100644 --- a/tajo-core/src/test/resources/results/TestUnionQuery/testUnion13.result +++ b/tajo-core/src/test/resources/results/TestUnionQuery/testUnion13.result @@ -3,4 +3,5 @@ col1,col2,col3 R,46796.47,1993-11-24F PROMO BURNISHED COPPER,90100.0,1993goldenrod lavender spring chocolate lace LARGE BRUSHED BRASS,90200.0,1993blush thistle blue yellow saddle -STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk \ No newline at end of file +STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk +SMALL PLATED BRASS,90400.0,1993cornflower chocolate smoke green pink \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll12.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll12.result b/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll12.result index c130afa..4050574 100644 --- a/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll12.result +++ b/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll12.result @@ -3,4 +3,5 @@ col1,col2,col3 R,46796.47,1993-11-24F PROMO BURNISHED COPPER,90100.0,1993goldenrod lavender spring chocolate lace LARGE BRUSHED BRASS,90200.0,1993blush thistle blue yellow saddle -STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk \ No newline at end of file +STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk +SMALL PLATED BRASS,90400.0,1993cornflower chocolate smoke green pink \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll13.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll13.result b/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll13.result index c130afa..4050574 100644 --- a/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll13.result +++ b/tajo-core/src/test/resources/results/TestUnionQuery/testUnionAll13.result @@ -3,4 +3,5 @@ col1,col2,col3 R,46796.47,1993-11-24F PROMO BURNISHED COPPER,90100.0,1993goldenrod lavender spring chocolate lace LARGE BRUSHED BRASS,90200.0,1993blush thistle blue yellow saddle -STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk \ No newline at end of file +STANDARD POLISHED BRASS,90300.0,1993spring green yellow purple cornsilk +SMALL PLATED BRASS,90400.0,1993cornflower chocolate smoke green pink \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-core/src/test/tpch/part.tbl ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/tpch/part.tbl b/tajo-core/src/test/tpch/part.tbl index 5fbdb46..6e6fa72 100644 --- a/tajo-core/src/test/tpch/part.tbl +++ b/tajo-core/src/test/tpch/part.tbl @@ -1,3 +1,4 @@ -1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi| -2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|15|LG CASE|902.00|lar accounts amo| -3|spring green yellow purple cornsilk|Manufacturer#4|Brand#42|STANDARD POLISHED BRASS|21|WRAP CASE|903.00|egular deposits hag| +1|goldenrod lavender spring chocolate lace|Manufacturer#1|Brand#13|PROMO BURNISHED COPPER|7|JUMBO PKG|901.00|ly. slyly ironi +2|blush thistle blue yellow saddle|Manufacturer#1|Brand#13|LARGE BRUSHED BRASS|15|LG CASE|902.00|lar accounts amo +3|spring green yellow purple cornsilk|Manufacturer#4|Brand#42|STANDARD POLISHED BRASS|21|WRAP CASE|903.00|egular deposits hag +4|cornflower chocolate smoke green pink|Manufacturer#3|Brand#34|SMALL PLATED BRASS|14|MED DRUM|904.00|p furiously r \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalOptimizer.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalOptimizer.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalOptimizer.java index 18a8859..2760722 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalOptimizer.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalOptimizer.java @@ -19,8 +19,6 @@ package org.apache.tajo.plan; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceStability; @@ -30,22 +28,20 @@ import org.apache.tajo.SessionVars; import org.apache.tajo.algebra.JoinType; import org.apache.tajo.conf.TajoConf; import org.apache.tajo.conf.TajoConf.ConfVars; -import org.apache.tajo.util.ReflectionUtil; -import org.apache.tajo.util.graph.DirectedGraphCursor; import org.apache.tajo.plan.expr.AlgebraicUtil; import org.apache.tajo.plan.expr.EvalNode; -import org.apache.tajo.plan.joinorder.FoundJoinOrder; -import org.apache.tajo.plan.joinorder.GreedyHeuristicJoinOrderAlgorithm; -import org.apache.tajo.plan.joinorder.JoinGraph; -import org.apache.tajo.plan.joinorder.JoinOrderAlgorithm; +import org.apache.tajo.plan.expr.EvalTreeUtil; +import org.apache.tajo.plan.joinorder.*; import org.apache.tajo.plan.logical.*; -import org.apache.tajo.plan.rewrite.*; +import org.apache.tajo.plan.rewrite.BaseLogicalPlanRewriteEngine; +import org.apache.tajo.plan.rewrite.LogicalPlanRewriteRuleProvider; import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor; +import org.apache.tajo.util.ReflectionUtil; +import org.apache.tajo.util.TUtil; +import org.apache.tajo.util.graph.DirectedGraphCursor; -import java.util.LinkedHashSet; -import java.util.Set; -import java.util.Stack; +import java.util.*; import static org.apache.tajo.plan.LogicalPlan.BlockEdge; import static org.apache.tajo.plan.joinorder.GreedyHeuristicJoinOrderAlgorithm.getCost; @@ -72,12 +68,6 @@ public class LogicalOptimizer { rulesAfterToJoinOpt.addRewriteRule(provider.getPostRules()); } - public void addRuleAfterToJoinOpt(LogicalPlanRewriteRule rewriteRule) { - if (rewriteRule != null) { - rulesAfterToJoinOpt.addRewriteRule(rewriteRule); - } - } - @VisibleForTesting public LogicalNode optimize(LogicalPlan plan) throws PlanningException { OverridableConf conf = new OverridableConf(new TajoConf(), @@ -97,7 +87,7 @@ public class LogicalOptimizer { optimizeJoinOrder(plan, blockCursor.nextBlock()); } } else { - LOG.info("Skip Join Optimized."); + LOG.info("Skip join order optimization"); } rulesAfterToJoinOpt.rewrite(context, plan); return plan.getRootBlock().getRoot(); @@ -113,12 +103,13 @@ public class LogicalOptimizer { // finding relations and filter expressions JoinGraphContext joinGraphContext = JoinGraphBuilder.buildJoinGraph(plan, block); - // finding join order and restore remain filter order - FoundJoinOrder order = joinOrderAlgorithm.findBestOrder(plan, block, - joinGraphContext.joinGraph, joinGraphContext.relationsForProduct); + // finding join order and restore remaining filters + FoundJoinOrder order = joinOrderAlgorithm.findBestOrder(plan, block, joinGraphContext); // replace join node with FoundJoinOrder. JoinNode newJoinNode = order.getOrderedJoin(); + LogicalNode newNode = handleRemainingFiltersIfNecessary(joinGraphContext, plan, block, newJoinNode); + JoinNode old = PlannerUtil.findTopNode(block.getRoot(), NodeType.JOIN); JoinTargetCollector collector = new JoinTargetCollector(); @@ -130,7 +121,7 @@ public class LogicalOptimizer { } else { newJoinNode.setTargets(targets.toArray(new Target[targets.size()])); } - PlannerUtil.replaceNode(plan, block.getRoot(), old, newJoinNode); + PlannerUtil.replaceNode(plan, block.getRoot(), old, newNode); // End of replacement logic String optimizedOrder = JoinOrderStringBuilder.buildJoinOrderString(plan, block); @@ -139,6 +130,58 @@ public class LogicalOptimizer { } } + /** + * During join order optimization, every condition is checked whether it is a join condition or not. + * So, after join order is optimized, there can be remaining conditions which are not join conditions. + * This function handles such remaining conditions. It creates a new selection node for those conditions if required + * or add them to the existing selection node. + * + * @param joinGraphContext join graph context + * @param plan logical plan + * @param block query block + * @param newJoinNode the top join node after join order optimization + * @return the top logical node after handling remaining conditions + */ + private static LogicalNode handleRemainingFiltersIfNecessary(JoinGraphContext joinGraphContext, + LogicalPlan plan, + LogicalPlan.QueryBlock block, + JoinNode newJoinNode) { + // Gather filters from remaining join edges + Collection<JoinEdge> joinEdges = joinGraphContext.getJoinGraph().getEdgesAll(); + Collection<EvalNode> markAsEvaluated = new HashSet<EvalNode>(joinGraphContext.getEvaluatedJoinConditions()); + markAsEvaluated.addAll(joinGraphContext.getEvaluatedJoinFilters()); + Set<EvalNode> remainingQuals = new HashSet<EvalNode>(joinGraphContext.getCandidateJoinFilters()); + for (JoinEdge eachEdge : joinEdges) { + for (EvalNode eachQual : eachEdge.getJoinQual()) { + if (!markAsEvaluated.contains(eachQual)) { + remainingQuals.add(eachQual); + } + } + } + + if (!remainingQuals.isEmpty()) { + LogicalNode topParent = PlannerUtil.findTopParentNode(block.getRoot(), NodeType.JOIN); + if (topParent.getType() == NodeType.SELECTION) { + SelectionNode topParentSelect = (SelectionNode) topParent; + Set<EvalNode> filters = TUtil.newHashSet(); + filters.addAll(TUtil.newHashSet(AlgebraicUtil.toConjunctiveNormalFormArray(topParentSelect.getQual()))); + filters.addAll(remainingQuals); + topParentSelect.setQual(AlgebraicUtil.createSingletonExprFromCNF( + filters.toArray(new EvalNode[filters.size()]))); + return newJoinNode; + } else { + SelectionNode newSelection = plan.createNode(SelectionNode.class); + newSelection.setQual(AlgebraicUtil.createSingletonExprFromCNF( + remainingQuals.toArray(new EvalNode[remainingQuals.size()]))); + newSelection.setChild(newJoinNode); + return newSelection; + } + } + + + return newJoinNode; + } + private static class JoinTargetCollector extends BasicLogicalPlanVisitor<Set<Target>, LogicalNode> { @Override public LogicalNode visitJoin(Set<Target> ctx, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode node, @@ -155,12 +198,23 @@ public class LogicalOptimizer { } } - private static class JoinGraphContext { - JoinGraph joinGraph = new JoinGraph(); - Set<EvalNode> quals = Sets.newHashSet(); - Set<String> relationsForProduct = Sets.newHashSet(); - } - + /** + * The first phase of the join order optimization is building a join graph from the given query. + * This initial join graph forms a tree which consists of only relation vertexes in an order of their occurrences in + * the query. For example, let me suppose the following query. + * + * default> select * from t1 inner join t2 left outer join t3 inner join t4; + * + * In this example, the initial join graph is: + * + * t1 - (inner join) - t2 - (left outer join) - t3 - (inner join) - t4. + * + * This means that the default join order is left to right. Join queries can be always processed with the + * default join order. This join order will be optimized by {@link JoinOrderAlgorithm}. + * + * JoinGraphBuilder builds an initial join graph as illustrated above. + * + */ private static class JoinGraphBuilder extends BasicLogicalPlanVisitor<JoinGraphContext, LogicalNode> { private final static JoinGraphBuilder instance; @@ -175,37 +229,84 @@ public class LogicalOptimizer { */ public static JoinGraphContext buildJoinGraph(LogicalPlan plan, LogicalPlan.QueryBlock block) throws PlanningException { - JoinGraphContext joinGraphContext = new JoinGraphContext(); - instance.visit(joinGraphContext, plan, block); - return joinGraphContext; + JoinGraphContext context = new JoinGraphContext(); + instance.visit(context, plan, block); + return context; } + @Override + public LogicalNode visit(JoinGraphContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, + LogicalNode node, Stack<LogicalNode> stack) throws PlanningException { + if (node.getType() != NodeType.TABLE_SUBQUERY) { + super.visit(context, plan, block, node, stack); + } + + return node; + } + + @Override public LogicalNode visitFilter(JoinGraphContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, SelectionNode node, Stack<LogicalNode> stack) throws PlanningException { + // all join predicate candidates must be collected before building the join tree + context.addCandidateJoinFilters( + TUtil.newList(AlgebraicUtil.toConjunctiveNormalFormArray(node.getQual()))); super.visitFilter(context, plan, block, node, stack); - context.quals.addAll(Lists.newArrayList(AlgebraicUtil.toConjunctiveNormalFormArray(node.getQual()))); return node; } @Override - public LogicalNode visitJoin(JoinGraphContext joinGraphContext, LogicalPlan plan, LogicalPlan.QueryBlock block, + public LogicalNode visitJoin(JoinGraphContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode joinNode, Stack<LogicalNode> stack) throws PlanningException { - super.visitJoin(joinGraphContext, plan, block, joinNode, stack); + super.visitJoin(context, plan, block, joinNode, stack); + + // given a join node, find the relations which are nearest to the join in the query. + RelationNode leftChild = findMostRightRelation(plan, block, joinNode.getLeftChild()); + RelationNode rightChild = findMostLeftRelation(plan, block, joinNode.getRightChild()); + RelationVertex leftVertex = new RelationVertex(leftChild); + RelationVertex rightVertex = new RelationVertex(rightChild); + + JoinEdge edge = context.getJoinGraph().addJoin(context, joinNode.getJoinSpec(), leftVertex, rightVertex); + + // find all possible predicates for this join edge + Set<EvalNode> joinConditions = TUtil.newHashSet(); if (joinNode.hasJoinQual()) { - joinGraphContext.joinGraph.addJoin(plan, block, joinNode); - } else { - LogicalNode leftChild = joinNode.getLeftChild(); - LogicalNode rightChild = joinNode.getRightChild(); - if (leftChild instanceof RelationNode) { - RelationNode rel = (RelationNode) leftChild; - joinGraphContext.relationsForProduct.add(rel.getCanonicalName()); - } - if (rightChild instanceof RelationNode) { - RelationNode rel = (RelationNode) rightChild; - joinGraphContext.relationsForProduct.add(rel.getCanonicalName()); + Set<EvalNode> originPredicates = joinNode.getJoinSpec().getPredicates(); + for (EvalNode predicate : joinNode.getJoinSpec().getPredicates()) { + if (EvalTreeUtil.isJoinQual(block, leftVertex.getSchema(), rightVertex.getSchema(), predicate, false)) { + if (JoinOrderingUtil.checkIfEvaluatedAtEdge(predicate, edge, true)) { + joinConditions.add(predicate); + } + } else { + joinConditions.add(predicate); + } } + // find predicates which cannot be evaluated at this join + originPredicates.removeAll(joinConditions); + context.addCandidateJoinConditions(originPredicates); + originPredicates.clear(); + originPredicates.addAll(joinConditions); + } + + joinConditions.addAll(JoinOrderingUtil.findJoinConditionForJoinVertex(context.getCandidateJoinConditions(), edge, + true)); + joinConditions.addAll(JoinOrderingUtil.findJoinConditionForJoinVertex(context.getCandidateJoinFilters(), edge, + false)); + context.markAsEvaluatedJoinConditions(joinConditions); + context.markAsEvaluatedJoinFilters(joinConditions); + edge.addJoinPredicates(joinConditions); + if (edge.getJoinType() == JoinType.INNER && edge.getJoinQual().isEmpty()) { + edge.getJoinSpec().setType(JoinType.CROSS); + } + + if (PlannerUtil.isCommutativeJoinType(edge.getJoinType())) { + JoinEdge commutativeEdge = context.getCachedOrNewJoinEdge(edge.getJoinSpec(), edge.getRightVertex(), + edge.getLeftVertex()); + commutativeEdge.addJoinPredicates(joinConditions); + context.getJoinGraph().addEdge(commutativeEdge.getLeftVertex(), commutativeEdge.getRightVertex(), + commutativeEdge); } + return joinNode; } } @@ -308,4 +409,81 @@ public class LogicalOptimizer { return joinNode; } } + + /** + * Find the most left relation node in the join tree. For the join tree, please refer to {@link JoinGraphBuilder}. + * + * @param plan logical plan + * @param block query block in which the given join node is involved + * @param from logical node where the search starts + * @return found relation + * @throws PlanningException + */ + public static RelationNode findMostLeftRelation(LogicalPlan plan, LogicalPlan.QueryBlock block, LogicalNode from) + throws PlanningException { + RelationNodeFinderContext context = new RelationNodeFinderContext(); + context.findMostLeft = true; + RelationNodeFinder finder = new RelationNodeFinder(); + finder.visit(context, plan, block, from, new Stack<LogicalNode>()); + return context.founds.isEmpty() ? null : context.founds.iterator().next(); + } + + /** + * Find the most right relation node in the join tree. For the join tree, please refer to {@link JoinGraphBuilder}. + * + * @param plan logical plan + * @param block query block in which the given join node is involved + * @param from logical node where the search starts + * @return found relation + * @throws PlanningException + */ + public static RelationNode findMostRightRelation(LogicalPlan plan, LogicalPlan.QueryBlock block, LogicalNode from) + throws PlanningException { + RelationNodeFinderContext context = new RelationNodeFinderContext(); + context.findMostRight = true; + RelationNodeFinder finder = new RelationNodeFinder(); + finder.visit(context, plan, block, from, new Stack<LogicalNode>()); + return context.founds.isEmpty() ? null : context.founds.iterator().next(); + } + + private static class RelationNodeFinderContext { + private Set<RelationNode> founds = TUtil.newHashSet(); + private boolean findMostLeft; + private boolean findMostRight; + } + + /** + * RelationNodeFinder finds the most left/right vertex from the given node in the join graph. + */ + private static class RelationNodeFinder extends BasicLogicalPlanVisitor<RelationNodeFinderContext,LogicalNode> { + + @Override + public LogicalNode visit(RelationNodeFinderContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, + LogicalNode node, Stack<LogicalNode> stack) throws PlanningException { + if (node.getType() != NodeType.TABLE_SUBQUERY) { + super.visit(context, plan, block, node, stack); + } + + if (node instanceof RelationNode) { + context.founds.add((RelationNode) node); + } + + return node; + } + + @Override + public LogicalNode visitJoin(RelationNodeFinderContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, + JoinNode node, Stack<LogicalNode> stack) throws PlanningException { + stack.push(node); + LogicalNode result = null; + if (context.findMostLeft) { + result = visit(context, plan, block, node.getLeftChild(), stack); + } + if (context.findMostRight) { + result = visit(context, plan, block, node.getRightChild(), stack); + } + stack.pop(); + return result; + } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java index 581fde7..85d8d55 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java @@ -2181,7 +2181,7 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex return false; } - if (PlannerUtil.isOuterJoin(node.getJoinType())) { + if (PlannerUtil.isOuterJoinType(node.getJoinType())) { /* * For outer joins, only predicates which are specified at the on clause can be evaluated during processing join. * Other predicates from the where clause must be evaluated after the join. http://git-wip-us.apache.org/repos/asf/tajo/blob/bedce3aa/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java ---------------------------------------------------------------------- diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java index 862cb8a..7eb05d7 100644 --- a/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java +++ b/tajo-plan/src/main/java/org/apache/tajo/plan/joinorder/GreedyHeuristicJoinOrderAlgorithm.java @@ -19,187 +19,191 @@ package org.apache.tajo.plan.joinorder; import org.apache.tajo.algebra.JoinType; -import org.apache.tajo.catalog.Schema; +import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.plan.LogicalPlan; -import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.plan.PlanningException; -import org.apache.tajo.catalog.SchemaUtil; import org.apache.tajo.plan.expr.AlgebraicUtil; import org.apache.tajo.plan.logical.*; +import org.apache.tajo.plan.util.PlannerUtil; import org.apache.tajo.util.StringUtils; +import org.apache.tajo.util.TUtil; -import java.util.*; +import java.util.List; +import java.util.Set; /** * This is a greedy heuristic algorithm to find a bushy join tree. This algorithm finds * the best join order with join conditions and pushed-down join conditions to - * all join operators. + * appropriate join operators. */ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { + public static final double DEFAULT_SELECTION_FACTOR = 0.1; @Override - public FoundJoinOrder findBestOrder(LogicalPlan plan, LogicalPlan.QueryBlock block, JoinGraph joinGraph, - Set<String> relationsWithoutQual) throws PlanningException { - - // Setup a remain relation set to be joined - // Why we should use LinkedHashSet? - it should keep the deterministic for the order of joins. - // Otherwise, join orders can be different even if join costs are the same to each other. - Set<LogicalNode> remainRelations = new LinkedHashSet<LogicalNode>(); - for (RelationNode relation : block.getRelations()) { - remainRelations.add(relation); - } - - LogicalNode latestJoin; - JoinEdge bestPair; - - while (remainRelations.size() > 1) { - Set<LogicalNode> checkingRelations = new LinkedHashSet<LogicalNode>(); - - for (LogicalNode relation : remainRelations) { - Collection <String> relationStrings = PlannerUtil.getRelationLineageWithinQueryBlock(plan, relation); - List<JoinEdge> joinEdges = new ArrayList<JoinEdge>(); - String relationCollection = StringUtils.join(relationStrings, ","); - List<JoinEdge> joinEdgesForGiven = joinGraph.getIncomingEdges(relationCollection); - if (joinEdgesForGiven != null) { - joinEdges.addAll(joinEdgesForGiven); - } - if (relationStrings.size() > 1) { - for (String relationString: relationStrings) { - joinEdgesForGiven = joinGraph.getIncomingEdges(relationString); - if (joinEdgesForGiven != null) { - joinEdges.addAll(joinEdgesForGiven); - } - } - } - - // check if the relation is the last piece of outer join - boolean endInnerRelation = false; - for (JoinEdge joinEdge: joinEdges) { - switch(joinEdge.getJoinType()) { - case LEFT_ANTI: - case RIGHT_ANTI: - case LEFT_SEMI: - case RIGHT_SEMI: - case LEFT_OUTER: - case RIGHT_OUTER: - case FULL_OUTER: - endInnerRelation = true; - if (checkingRelations.size() <= 1) { - checkingRelations.add(relation); - } - break; - default: - break; - } - } - - if (endInnerRelation) { - break; - } - - checkingRelations.add(relation); - } - - remainRelations.removeAll(checkingRelations); - - // Find the best join pair among all joinable operators in candidate set. - while (checkingRelations.size() > 1) { - LinkedHashSet<String[]> removingJoinEdges = new LinkedHashSet<String[]>(); - bestPair = getBestPair(plan, joinGraph, checkingRelations, removingJoinEdges); - - checkingRelations.remove(bestPair.getLeftRelation()); - checkingRelations.remove(bestPair.getRightRelation()); - for (String[] joinEdge: removingJoinEdges) { - // remove the edge of the best pair from join graph - joinGraph.removeEdge(joinEdge[0], joinEdge[1]); - } + public FoundJoinOrder findBestOrder(LogicalPlan plan, LogicalPlan.QueryBlock block, JoinGraphContext graphContext) + throws PlanningException { - latestJoin = createJoinNode(plan, bestPair); - checkingRelations.add(latestJoin); + Set<JoinVertex> vertexes = TUtil.newHashSet(); + for (RelationNode relationNode : block.getRelations()) { + vertexes.add(new RelationVertex(relationNode)); + } - // all logical nodes should be registered to corresponding blocks - block.registerNode(latestJoin); + // As illustrated at LogicalOptimizer.JoinGraphBuilder, the join graph initially forms a kind of tree. + // This join graph can be updated by adding new join edges or removing existing join edges + // during join order optimization. + JoinEdgeFinderContext context = new JoinEdgeFinderContext(); + JoinGraph joinGraph = graphContext.getJoinGraph(); + while (vertexes.size() > 1) { + JoinEdge bestPair = getBestPair(context, graphContext, vertexes); + JoinedRelationsVertex newVertex = new JoinedRelationsVertex(bestPair); + + // A root vertex is the join vertex where the graph traverse is started. + // The root vertex should be updated if the previous root vertex is merged into a new one. + if (graphContext.getRootVertexes().contains(bestPair.getLeftVertex())) { + graphContext.replaceRootVertexes(bestPair.getLeftVertex(), newVertex); + } else if (PlannerUtil.isCommutativeJoinType(bestPair.getJoinType()) + && graphContext.getRootVertexes().contains(bestPair.getRightVertex())) { + graphContext.replaceRootVertexes(bestPair.getRightVertex(), newVertex); } - // new Logical block should be the first entry of new Set - checkingRelations.addAll(remainRelations); - remainRelations = checkingRelations; + // Once a best pair is chosen, some existing join edges should be removed and new join edges should be added. + // + // There can be some join edges which are equal to or symmetric with the best pair. + // They cannot be chosen anymore, and thus should be removed from the join graph. + // + // The chosen best pair will be regarded as a join vertex again. + // So, the join edges which share any vertexes with the best pair should be updated, too. + Set<JoinEdge> willBeRemoved = TUtil.newHashSet(); + Set<JoinEdge> willBeAdded = TUtil.newHashSet(); + + // Find every join edges which should be updated. + prepareGraphUpdate(graphContext, joinGraph, bestPair, newVertex, willBeAdded, willBeRemoved); + + // Update the join graph + updateGraph(graphContext, joinGraph, bestPair, willBeAdded, willBeRemoved); + + // Update the join vertex set + vertexes.remove(bestPair.getLeftVertex()); + vertexes.remove(bestPair.getRightVertex()); + vertexes.add(newVertex); } - JoinNode joinTree = (JoinNode) remainRelations.iterator().next(); + JoinNode joinTree = (JoinNode) vertexes.iterator().next().buildPlan(plan, block); // all generated nodes should be registered to corresponding blocks block.registerNode(joinTree); return new FoundJoinOrder(joinTree, getCost(joinTree)); } - private static JoinNode createJoinNode(LogicalPlan plan, JoinEdge joinEdge) { - LogicalNode left = joinEdge.getLeftRelation(); - LogicalNode right = joinEdge.getRightRelation(); - - JoinNode joinNode = plan.createNode(JoinNode.class); + private void updateGraph(JoinGraphContext context, JoinGraph graph, JoinEdge bestPair, + Set<JoinEdge> willBeAdded, Set<JoinEdge> willBeRemoved) { + for (JoinEdge edge : willBeRemoved) { + graph.removeEdge(edge.getLeftVertex(), edge.getRightVertex()); + context.addCandidateJoinConditions(edge.getJoinQual()); + } - if (PlannerUtil.isCommutativeJoin(joinEdge.getJoinType())) { - // if only one operator is relation - if ((left instanceof RelationNode) && !(right instanceof RelationNode)) { - // for left deep - joinNode.init(joinEdge.getJoinType(), right, left); - } else { - // if both operators are relation or if both are relations - // we don't need to concern the left-right position. - joinNode.init(joinEdge.getJoinType(), left, right); - } - } else { - joinNode.init(joinEdge.getJoinType(), left, right); + for (JoinEdge edge : willBeAdded) { + graph.addEdge(edge.getLeftVertex(), edge.getRightVertex(), edge); + context.removeCandidateJoinConditions(edge.getJoinQual()); + context.removeCandidateJoinFilters(edge.getJoinQual()); } - Schema mergedSchema = SchemaUtil.merge(joinNode.getLeftChild().getOutSchema(), - joinNode.getRightChild().getOutSchema()); - joinNode.setInSchema(mergedSchema); - joinNode.setOutSchema(mergedSchema); - if (joinEdge.hasJoinQual()) { - joinNode.setJoinQual(AlgebraicUtil.createSingletonExprFromCNF(joinEdge.getJoinQual())); + // Join quals involved by the best pair should be removed. + context.markAsEvaluatedJoinConditions(bestPair.getJoinQual()); + context.markAsEvaluatedJoinFilters(bestPair.getJoinQual()); + } + + /** + * Once a best pair is found, some existing join edges which are equal to or symmetric with the best pair should be + * removed and new join edges should be added. + * They cannot be chosen as the best pair anymore, and thus should be removed from the join graph. + * This method finds such join edges to prepare updating the join graph. + * + * @param context + * @param graph + * @param bestPair + * @param vertex + * @param willBeAdded + * @param willBeRemoved + */ + private void prepareGraphUpdate(JoinGraphContext context, JoinGraph graph, JoinEdge bestPair, + JoinedRelationsVertex vertex, Set<JoinEdge> willBeAdded, Set<JoinEdge> willBeRemoved) { + prepareGraphUpdate(context, graph.getOutgoingEdges(bestPair.getLeftVertex()), vertex, true, + willBeAdded, willBeRemoved); + + prepareGraphUpdate(context, graph.getIncomingEdges(bestPair.getLeftVertex()), vertex, false, + willBeAdded, willBeRemoved); + + prepareGraphUpdate(context, graph.getOutgoingEdges(bestPair.getRightVertex()), vertex, true, + willBeAdded, willBeRemoved); + + prepareGraphUpdate(context, graph.getIncomingEdges(bestPair.getRightVertex()), vertex, false, + willBeAdded, willBeRemoved); + } + + private void prepareGraphUpdate(JoinGraphContext context, List<JoinEdge> edges, + JoinedRelationsVertex vertex, boolean isLeftVertex, + Set<JoinEdge> willBeAdded, Set<JoinEdge> willBeRemoved) { + if (edges != null) { + for (JoinEdge edge : edges) { + if (!JoinOrderingUtil.isEqualsOrSymmetric(vertex.getJoinEdge(), edge)) { + if (isLeftVertex) { + willBeAdded.add(context.getCachedOrNewJoinEdge(edge.getJoinSpec(), vertex, edge.getRightVertex())); + } else { + willBeAdded.add(context.getCachedOrNewJoinEdge(edge.getJoinSpec(), edge.getLeftVertex(), vertex)); + } + } + willBeRemoved.add(edge); + } } - return joinNode; } /** - * Find the best join pair among all joinable operators in candidate set. + * Find the best join pair among all joinable operators in the candidate set. * - * @param plan a logical plan - * @param graph a join graph which consists of vertices and edges, where vertex is relation and - * each edge is join condition. - * @param candidateSet candidate operators to be joined. + * @param context + * @param graphContext a join graph which consists of vertices and edges, where vertex is relation and + * each edge is join condition. + * @param vertexes candidate operators to be joined. * @return The best join pair among them * @throws PlanningException */ - private JoinEdge getBestPair(LogicalPlan plan, JoinGraph graph, Set<LogicalNode> candidateSet, Set<String[]> bestJoinEdges) + private JoinEdge getBestPair(JoinEdgeFinderContext context, JoinGraphContext graphContext, Set<JoinVertex> vertexes) throws PlanningException { double minCost = Double.MAX_VALUE; JoinEdge bestJoin = null; - LinkedHashSet<String[]> relatedJoinEdges = null; - LinkedHashSet<String[]> relatedNonCrossJoinEdges = null; double minNonCrossJoinCost = Double.MAX_VALUE; JoinEdge bestNonCrossJoin = null; - for (LogicalNode outer : candidateSet) { - for (LogicalNode inner : candidateSet) { + // Brute-force algorithm + // check every possible combination of join vertexes. + for (JoinVertex outer : vertexes) { + for (JoinVertex inner : vertexes) { if (outer.equals(inner)) { continue; } - LinkedHashSet<String[]> joinEdgePairs = new LinkedHashSet<String[]>(); - JoinEdge foundJoin = findJoin(plan, graph, outer, inner, joinEdgePairs); + context.reset(); + JoinEdge foundJoin = null; + + // A root vertex is the join vertex where the graph traverse is started. + // For each root vertex, find possible joins between inner and outer join vertexes. + for (JoinVertex eachRoot : graphContext.getRootVertexes()) { + foundJoin = findJoin(context, graphContext, eachRoot, outer, inner); + if (foundJoin != null) break; + } if (foundJoin == null) { continue; } + // The found join edge may not have join quals even though they can be evaluated during join. + // So, possible join quals should be added to the join node before estimating its cost. + JoinOrderingUtil.updateQualIfNecessary(graphContext, foundJoin); double cost = getCost(foundJoin); if (cost < minCost) { minCost = cost; bestJoin = foundJoin; - relatedJoinEdges = joinEdgePairs; } // Keep the min cost join @@ -209,124 +213,211 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { if (cost < minNonCrossJoinCost) { minNonCrossJoinCost = cost; bestNonCrossJoin = foundJoin; - relatedNonCrossJoinEdges = joinEdgePairs; } } } } if (bestNonCrossJoin != null) { - bestJoinEdges.addAll(relatedNonCrossJoinEdges); - return bestNonCrossJoin; + if (bestNonCrossJoin.hasJoinQual()) { + graphContext.markAsEvaluatedJoinConditions(bestNonCrossJoin.getJoinQual()); + } + return swapLeftAndRightIfNecessary(bestNonCrossJoin); + } else if (bestJoin != null) { + if (bestJoin.hasJoinQual()) { + graphContext.markAsEvaluatedJoinFilters(bestJoin.getJoinQual()); + } + return swapLeftAndRightIfNecessary(bestJoin); } else { - bestJoinEdges.addAll(relatedJoinEdges); - return bestJoin; + throw new PlanningException("Cannot find the best join"); } } - /** - * Find a join between two logical operator trees - * - * @return If there is no join condition between two relation, it returns NULL value. - */ - private static JoinEdge findJoin(LogicalPlan plan, JoinGraph graph, LogicalNode outer, LogicalNode inner, Set<String[]> joinEdgePairs) - throws PlanningException { - JoinEdge foundJoinEdge = null; - - // If outer is outer join, make edge key using all relation names in outer. - SortedSet<String> relationNames = - new TreeSet<String>(PlannerUtil.getRelationLineageWithinQueryBlock(plan, outer)); - String outerEdgeKey = StringUtils.join(relationNames, ", "); - for (String innerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, inner)) { - if (graph.hasEdge(outerEdgeKey, innerName)) { - JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, innerName); - String[] joinEdgePair = {outerEdgeKey, innerName}; - joinEdgePairs.add(joinEdgePair); - if (foundJoinEdge == null) { - foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, inner, - existJoinEdge.getJoinQual()); - } else { - foundJoinEdge.addJoinQual(AlgebraicUtil.createSingletonExprFromCNF( - existJoinEdge.getJoinQual())); + private static JoinEdge swapLeftAndRightIfNecessary(JoinEdge edge) { + if (PlannerUtil.isCommutativeJoinType(edge.getJoinType()) || edge.getJoinType() == JoinType.FULL_OUTER) { + double leftCost = getCost(edge.getLeftVertex()); + double rightCost = getCost(edge.getRightVertex()); + if (leftCost < rightCost) { + return new JoinEdge(edge.getJoinSpec(), edge.getRightVertex(), edge.getLeftVertex()); + } else if (leftCost == rightCost) { + // compare the relation name to make the join order determinant + if (StringUtils.join(edge.getLeftVertex().getRelations(), ""). + compareTo(StringUtils.join(edge.getRightVertex().getRelations(), "")) < 0) { + return new JoinEdge(edge.getJoinSpec(), edge.getRightVertex(), edge.getLeftVertex()); } } } - if (foundJoinEdge != null) { - return foundJoinEdge; - } + return edge; + } - relationNames = - new TreeSet<String>(PlannerUtil.getRelationLineageWithinQueryBlock(plan, inner)); - outerEdgeKey = StringUtils.join(relationNames, ", "); - for (String outerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, outer)) { - if (graph.hasEdge(outerEdgeKey, outerName)) { - JoinEdge existJoinEdge = graph.getEdge(outerEdgeKey, outerName); - String[] joinEdgePair = {outerEdgeKey, outerName}; - joinEdgePairs.add(joinEdgePair); - if (foundJoinEdge == null) { - foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), inner, outer, - existJoinEdge.getJoinQual()); - } else { - foundJoinEdge.addJoinQual(AlgebraicUtil.createSingletonExprFromCNF( - existJoinEdge.getJoinQual())); - } - } - } - if (foundJoinEdge != null) { - return foundJoinEdge; + private static class JoinEdgeFinderContext { + private Set<JoinVertex> visited = TUtil.newHashSet(); + + public void reset() { + visited.clear(); } + } - for (String outerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, outer)) { - for (String innerName : PlannerUtil.getRelationLineageWithinQueryBlock(plan, inner)) { - - // Find all joins between two relations and merge them into one join if possible - if (graph.hasEdge(outerName, innerName)) { - JoinEdge existJoinEdge = graph.getEdge(outerName, innerName); - String[] joinEdgePair = {outerName, innerName}; - joinEdgePairs.add(joinEdgePair); - if (foundJoinEdge == null) { - foundJoinEdge = new JoinEdge(existJoinEdge.getJoinType(), outer, inner, - existJoinEdge.getJoinQual()); + /** + * Find a join edge between two join vertexes. + * + * @param context context for edge finder + * @param graphContext graph context + * @param begin begin vertex to traverse the join graph + * @param leftTarget left target join vertex + * @param rightTarget right target join vertex + * @return If there is no join edge between two vertexes, it returns null. + * @throws PlanningException + */ + private static JoinEdge findJoin(final JoinEdgeFinderContext context, final JoinGraphContext graphContext, + JoinVertex begin, final JoinVertex leftTarget, final JoinVertex rightTarget) + throws PlanningException { + + context.visited.add(begin); + + JoinGraph joinGraph = graphContext.getJoinGraph(); + + // Get all interchangeable vertexes of the begin vertex. + // Please see JoinOrderingUtil.getAllInterchangeableVertexes() for interchangeable vertexes. + Set<JoinVertex> interchangeableWithBegin = JoinOrderingUtil.getAllInterchangeableVertexes(graphContext, begin); + + // If the left search target is interchangeable with the begin vertex, check every outgoing edges + // from the left target to find the join edge who has the right search target as its right vertex. + if (interchangeableWithBegin.contains(leftTarget)) { + List<JoinEdge> edgesFromLeftTarget = joinGraph.getOutgoingEdges(leftTarget); + if (edgesFromLeftTarget != null) { + for (JoinEdge edgeFromLeftTarget : edgesFromLeftTarget) { + edgeFromLeftTarget = JoinOrderingUtil.updateQualIfNecessary(graphContext, edgeFromLeftTarget); + + // Find all interchangeable vertexes with the right vertex of the current edge. + // If the right target vertex is interchangeable with the right vertex of the current edge, + // we've successfully found a join edge between the left and right targets. + Set<JoinVertex> interchangeableWithRightVertex; + if (edgeFromLeftTarget.getJoinType() == JoinType.INNER || edgeFromLeftTarget.getJoinType() == JoinType.CROSS) { + interchangeableWithRightVertex = JoinOrderingUtil.getAllInterchangeableVertexes(graphContext, + edgeFromLeftTarget.getRightVertex()); } else { - foundJoinEdge.addJoinQual(AlgebraicUtil.createSingletonExprFromCNF( - existJoinEdge.getJoinQual())); + interchangeableWithRightVertex = TUtil.newHashSet(edgeFromLeftTarget.getRightVertex()); + } + + if (interchangeableWithRightVertex.contains(rightTarget)) { + JoinEdge targetEdge = joinGraph.getEdge(leftTarget, rightTarget); + if (targetEdge == null) { + if (joinGraph.isSymmetricJoinOnly()) { + // Since the targets of the both sides are searched with symmetric characteristics, + // the join type is assumed as CROSS. + // TODO: This must be improved to consider a case when a query involves multiple commutative and + // TODO: non-commutative joins. It will be done at TAJO-1683. + joinGraph.addJoin(graphContext, new JoinSpec(JoinType.CROSS), leftTarget, rightTarget); + return JoinOrderingUtil.updateQualIfNecessary(graphContext, joinGraph.getEdge(leftTarget, rightTarget)); + } + } else { + targetEdge = JoinOrderingUtil.updateQualIfNecessary(graphContext, targetEdge); + return targetEdge; + } } } } } - if (foundJoinEdge == null) { - foundJoinEdge = new JoinEdge(JoinType.CROSS, outer, inner); - } - - return foundJoinEdge; + // If the left search target is NOT interchangeable with the begin vertex, + // we cannot find any join edges from the current begin vertex, so search from other vertexes. + // Here, we should consider the associativity to check whether other joins can be executed earlier than the join + // who has the begin vertex as its left vertex. + for (JoinVertex interchangeableVertex : interchangeableWithBegin) { + List<JoinEdge> edges = joinGraph.getOutgoingEdges(interchangeableVertex); + if (edges != null) { + for (JoinEdge edge : edges) { + for (JoinEdge associativeEdge : JoinOrderingUtil.getAllAssociativeEdges(graphContext, edge)) { + JoinVertex willBeVisited = associativeEdge.getLeftVertex(); + if (!context.visited.contains(willBeVisited)) { + JoinEdge found = findJoin(context, graphContext, associativeEdge.getLeftVertex(), leftTarget, + rightTarget); + if (found != null) { + return found; + } + } + } + } + } + } + // not found + return null; } + // COMPUTATION_FACTOR is used to give the larger cost for longer plans. + // We assume that every operation has same cost. + // TODO: more accurate cost estimation is required. + private static final double COMPUTATION_FACTOR = 1.5; + /** * Getting a cost of one join * @param joinEdge * @return */ public static double getCost(JoinEdge joinEdge) { - double filterFactor = 1; - if (joinEdge.hasJoinQual()) { - // TODO - should consider join type + double factor = 1; + double cost; + if (joinEdge.getJoinType() != JoinType.CROSS) { // TODO - should statistic information obtained from query history - filterFactor = filterFactor * Math.pow(DEFAULT_SELECTION_FACTOR, joinEdge.getJoinQual().length); - return getCost(joinEdge.getLeftRelation()) * getCost(joinEdge.getRightRelation()) * filterFactor; + switch (joinEdge.getJoinType()) { + // TODO - improve cost estimation + // for outer joins, filter factor does not matter + case LEFT_OUTER: + factor *= SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getSchema()) / + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getLeftVertex().getSchema()); + break; + case RIGHT_OUTER: + factor *= SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getSchema()) / + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getRightVertex().getSchema()); + break; + case FULL_OUTER: + factor *= Math.max(SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getSchema()) / + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getLeftVertex().getSchema()), + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getSchema()) / + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getRightVertex().getSchema())); + break; + case INNER: + default: + // by default, do the same operation with that of inner join + // filter factor * output tuple width / input tuple width + factor *= Math.pow(DEFAULT_SELECTION_FACTOR, joinEdge.getJoinQual().size()) + * SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getSchema()) + / (SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getLeftVertex().getSchema()) + + SchemaUtil.estimateRowByteSizeWithSchema(joinEdge.getRightVertex().getSchema())); + break; + } + // cost = estimated input size * filter factor * (output tuple width / input tuple width) + cost = getCost(joinEdge.getLeftVertex()) * + getCost(joinEdge.getRightVertex()) * factor; } else { // make cost bigger if cross join - return Math.pow(getCost(joinEdge.getLeftRelation()) * getCost(joinEdge.getRightRelation()), 2); + cost = Math.pow(getCost(joinEdge.getLeftVertex()) * + getCost(joinEdge.getRightVertex()), 2); } + + return cost * COMPUTATION_FACTOR; + } + + public static double getCost(JoinVertex joinVertex) { + double cost; + if (joinVertex instanceof RelationVertex) { + cost = getCost(((RelationVertex) joinVertex).getRelationNode()); + } else { + cost = getCost(((JoinedRelationsVertex)joinVertex).getJoinEdge()); + } + return cost; } // TODO - costs of other operator operators (e.g., group-by and sort) should be computed in proper manners. public static double getCost(LogicalNode node) { + double cost; switch (node.getType()) { case PROJECTION: ProjectionNode projectionNode = (ProjectionNode) node; - return getCost(projectionNode.getChild()); + cost = getCost(projectionNode.getChild()); + break; case JOIN: JoinNode joinNode = (JoinNode) node; @@ -334,32 +425,36 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { if (joinNode.hasJoinQual()) { filterFactor = Math.pow(DEFAULT_SELECTION_FACTOR, AlgebraicUtil.toConjunctiveNormalFormArray(joinNode.getJoinQual()).length); - return getCost(joinNode.getLeftChild()) * getCost(joinNode.getRightChild()) * filterFactor; + cost = getCost(joinNode.getLeftChild()) * getCost(joinNode.getRightChild()) * filterFactor; } else { - return Math.pow(getCost(joinNode.getLeftChild()) * getCost(joinNode.getRightChild()), 2); + cost = Math.pow(getCost(joinNode.getLeftChild()) * getCost(joinNode.getRightChild()), 2); } + break; case SELECTION: SelectionNode selectionNode = (SelectionNode) node; - return getCost(selectionNode.getChild()) * + cost = getCost(selectionNode.getChild()) * Math.pow(DEFAULT_SELECTION_FACTOR, AlgebraicUtil.toConjunctiveNormalFormArray(selectionNode.getQual()).length); + break; case TABLE_SUBQUERY: TableSubQueryNode subQueryNode = (TableSubQueryNode) node; - return getCost(subQueryNode.getSubQuery()); + cost = getCost(subQueryNode.getSubQuery()); + break; case SCAN: ScanNode scanNode = (ScanNode) node; if (scanNode.getTableDesc().getStats() != null) { - double cost = ((ScanNode)node).getTableDesc().getStats().getNumBytes(); - return cost; + cost = ((ScanNode)node).getTableDesc().getStats().getNumBytes(); } else { - return Long.MAX_VALUE; + cost = Long.MAX_VALUE; } + break; case UNION: UnionNode unionNode = (UnionNode) node; - return getCost(unionNode.getLeftChild()) + getCost(unionNode.getRightChild()); + cost = getCost(unionNode.getLeftChild()) + getCost(unionNode.getRightChild()); + break; case EXCEPT: case INTERSECT: @@ -368,7 +463,10 @@ public class GreedyHeuristicJoinOrderAlgorithm implements JoinOrderAlgorithm { default: // all binary operators (join, union, except, and intersect) are handled in the above cases. // So, we need to handle only unary nodes in default. - return getCost(((UnaryNode) node).getChild()); + cost = getCost(((UnaryNode) node).getChild()); + break; } + + return cost * COMPUTATION_FACTOR; } } \ No newline at end of file
