Robert,

You do not need to use qgen.exe to generate queries as you are not
running the TPC-H benchmark test.  Attached is an example of the 22
sample TPC-H queries according to the benchmark.  

We have not tested using the TPC-H queries for this particular patch and
only use the TPC-H database as a large, skewed data set.  The simpler
queries we test involve joins of Part-Lineitem or Supplier-Lineitem such
as:

Select * from part, lineitem where p_partkey = l_partkey  

OR

Select count(*) from part, lineitem where p_partkey = l_partkey  

The count(*) version is usually more useful for comparisons as the
generation of output tuples on the client side (say with pgadmin)
dominates the actual time to complete the query.

To isolate query costs, we also test using a simple server-side
function.  The setup description I have also attached.

I would be happy to help in any way I can.

Bryce is currently working on an updated patch according to your
suggestions.

--
Dr. Ramon Lawrence
Assistant Professor, Department of Computer Science, University of
British Columbia Okanagan
E-mail: ramon.lawre...@ubc.ca


> -----Original Message-----
> From: pgsql-hackers-ow...@postgresql.org [mailto:pgsql-hackers-
> ow...@postgresql.org] On Behalf Of Robert Haas
> Sent: December 17, 2008 7:54 PM
> To: Lawrence, Ramon
> Cc: Tom Lane; pgsql-hackers@postgresql.org; Bryce Cutt
> Subject: Re: [HACKERS] Proposed Patch to Improve Performance of Multi-
> Batch Hash Join for Skewed Data Sets
> 
> Dr. Lawrence:
> 
> I'm still working on reviewing this patch.  I've managed to load the
> sample TPCH data from tpch1g1z.zip after changing the line endings to
> UNIX-style and chopping off the trailing vertical bars.  (If anyone is
> interested, I have the results of pg_dump | bzip2 -9 on the resulting
> database, which I would be happy to upload if someone has server
> space.  It is about 250MB.)
> 
> But, I'm not sure quite what to do in terms of generating queries.
> TPCHSkew contains QGEN.EXE, but that seems to require that you provide
> template queries as input, and I'm not sure where to get the
> templates.
> 
> Any suggestions?
> 
> Thanks,
> 
> ...Robert
> 
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
-- using 101000000 as a seed to the RNG

-- QUERY_1

select
        l_returnflag,
        l_linestatus,
        sum(l_quantity) as sum_qty,
        sum(l_extendedprice) as sum_base_price,
        sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
        sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
        avg(l_quantity) as avg_qty,
        avg(l_extendedprice) as avg_price,
        avg(l_discount) as avg_disc,
        count(*) as count_order
from
        lineitem
where
        l_shipdate <= date '1998-09-01' 
group by
        l_returnflag,
        l_linestatus
order by
        l_returnflag,
        l_linestatus;


-- QUERY_2

select
        s_acctbal,
        s_name,
        n_name,
        p_partkey,
        p_mfgr,
        s_address,
        s_phone,
        s_comment
from
        part,
        supplier,
        partsupp,
        nation,
        region
where
        p_partkey = ps_partkey
        and s_suppkey = ps_suppkey
        and p_size = 28
        and p_type like '%STEEL'
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'MIDDLE EAST'
        and ps_supplycost = (
                select
                        min(ps_supplycost)
                from
                        partsupp,
                        supplier,
                        nation,
                        region
                where
                        p_partkey = ps_partkey
                        and s_suppkey = ps_suppkey
                        and s_nationkey = n_nationkey
                        and n_regionkey = r_regionkey
                        and r_name = 'MIDDLE EAST'
        )
order by
        s_acctbal desc,
        n_name,
        s_name,
        p_partkey
limit 100;


-- QUERY_3

select
        l_orderkey,
        sum(l_extendedprice * (1 - l_discount)) as revenue,
        o_orderdate,
        o_shippriority
from
        customer,
        orders,
        lineitem
where
        c_mktsegment = 'BUILDING'
        and c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate < date '1995-03-31'
        and l_shipdate > date '1995-03-31'
group by
        l_orderkey,
        o_orderdate,
        o_shippriority
order by
        revenue desc,
        o_orderdate
limit 10;


-- QUERY_4

select
        o_orderpriority,
        count(*) as order_count
from
        orders
where
        o_orderdate >= date '1997-10-01'
        and o_orderdate < date '1998-02-01'
        and exists (
                select
                        *
                from
                        lineitem
                where
                        l_orderkey = o_orderkey
                        and l_commitdate < l_receiptdate
        )
group by
        o_orderpriority
order by
        o_orderpriority;


-- QUERY_5

select
        n_name,
        sum(l_extendedprice * (1 - l_discount)) as revenue
from
        customer,
        orders,
        lineitem,
        supplier,
        nation,
        region
where
        c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and l_suppkey = s_suppkey
        and c_nationkey = s_nationkey
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'AMERICA'
        and o_orderdate >= date '1994-01-01'
        and o_orderdate < date '1995-01-01' 
group by
        n_name
order by
        revenue desc;


-- QUERY_6

select
        sum(l_extendedprice * l_discount) as revenue
from
        lineitem
where
        l_shipdate >= date '1994-01-01'
        and l_shipdate < date '1995-01-01'
        and l_discount between 0.05 - 0.01 and 0.05 + 0.01
        and l_quantity < 25;


-- QUERY_7

select
        supp_nation,
        cust_nation,
        l_year,
        sum(volume) as revenue
from
        (
                select
                        n1.n_name as supp_nation,
                        n2.n_name as cust_nation,
                        extract(year from l_shipdate) as l_year,
                        l_extendedprice * (1 - l_discount) as volume
                from
                        supplier,
                        lineitem,
                        orders,
                        customer,
                        nation n1,
                        nation n2
                where
                        s_suppkey = l_suppkey
                        and o_orderkey = l_orderkey
                        and c_custkey = o_custkey
                        and s_nationkey = n1.n_nationkey
                        and c_nationkey = n2.n_nationkey
                        and (
                                (n1.n_name = 'IRAN' and n2.n_name = 'UNITED 
STATES')
                                or (n1.n_name = 'UNITED STATES' and n2.n_name = 
'IRAN')
                        )
                        and l_shipdate between date '1995-01-01' and date 
'1996-12-31'
        ) as shipping
group by
        supp_nation,
        cust_nation,
        l_year
order by
        supp_nation,
        cust_nation,
        l_year;


-- QUERY_8

select
        o_year,
        sum(case
                when nation = 'UNITED STATES' then volume
                else 0
        end) / sum(volume) as mkt_share
from
        (
                select
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) as volume,
                        n2.n_name as nation
                from
                        part,
                        supplier,
                        lineitem,
                        orders,
                        customer,
                        nation n1,
                        nation n2,
                        region
                where
                        p_partkey = l_partkey
                        and s_suppkey = l_suppkey
                        and l_orderkey = o_orderkey
                        and o_custkey = c_custkey
                        and c_nationkey = n1.n_nationkey
                        and n1.n_regionkey = r_regionkey
                        and r_name = 'AMERICA'
                        and s_nationkey = n2.n_nationkey
                        and o_orderdate between date '1995-01-01' and date 
'1996-12-31'
                        and p_type = 'MEDIUM BRUSHED STEEL'
        ) as all_nations
group by
        o_year
order by
        o_year;


-- QUERY_9

select
        nation,
        o_year,
        sum(amount) as sum_profit
from
        (
                select
                        n_name as nation,
                        extract(year from o_orderdate) as o_year,
                        l_extendedprice * (1 - l_discount) - ps_supplycost * 
l_quantity as amount
                from
                        part,
                        supplier,
                        lineitem,
                        partsupp,
                        orders,
                        nation
                where
                        s_suppkey = l_suppkey
                        and ps_suppkey = l_suppkey
                        and ps_partkey = l_partkey
                        and p_partkey = l_partkey
                        and o_orderkey = l_orderkey
                        and s_nationkey = n_nationkey
                        and p_name like '%lime%'
        ) as profit
group by
        nation,
        o_year
order by
        nation,
        o_year desc;


-- QUERY_10

select
        c_custkey,
        c_name,
        sum(l_extendedprice * (1 - l_discount)) as revenue,
        c_acctbal,
        n_name,
        c_address,
        c_phone,
        c_comment
from
        customer,
        orders,
        lineitem,
        nation
where
        c_custkey = o_custkey
        and l_orderkey = o_orderkey
        and o_orderdate >= date '1994-09-01'
        and o_orderdate < date '1994-12-01'
        and l_returnflag = 'R'
        and c_nationkey = n_nationkey
group by
        c_custkey,
        c_name,
        c_acctbal,
        c_phone,
        n_name,
        c_address,
        c_comment
order by
        revenue desc
limit 20;


-- QUERY_11

select
        ps_partkey,
        sum(ps_supplycost * ps_availqty) as value
from
        partsupp,
        supplier,
        nation
where
        ps_suppkey = s_suppkey
        and s_nationkey = n_nationkey
        and n_name = 'KENYA'
group by
        ps_partkey having
                sum(ps_supplycost * ps_availqty) > (
                        select
                                sum(ps_supplycost * ps_availqty) * 0.0001000000
                        from
                                partsupp,
                                supplier,
                                nation
                        where
                                ps_suppkey = s_suppkey
                                and s_nationkey = n_nationkey
                                and n_name = 'KENYA'
                )
order by
        value desc;



-- QUERY_12

select
        l_shipmode,
        sum(case
                when o_orderpriority = '1-URGENT'
                        or o_orderpriority = '2-HIGH'
                        then 1
                else 0
        end) as high_line_count,
        sum(case
                when o_orderpriority <> '1-URGENT'
                        and o_orderpriority <> '2-HIGH'
                        then 1
                else 0
        end) as low_line_count
from
        orders,
        lineitem
where
        o_orderkey = l_orderkey
        and l_shipmode in ('SHIP', 'AIR')
        and l_commitdate < l_receiptdate
        and l_shipdate < l_commitdate
        and l_receiptdate >= date '1993-01-01'
        and l_receiptdate < date '1994-01-01' 
group by
        l_shipmode
order by
        l_shipmode;



-- QUERY_13

select
        c_count,
        count(*) as custdist
from
        (
                select
                        c_custkey,
                        count(o_orderkey)
                from
                        customer left outer join orders on
                                c_custkey = o_custkey
                                and o_comment not like '%special%packages%'
                group by
                        c_custkey
        ) as c_orders (c_custkey, c_count)
group by
        c_count
order by
        custdist desc,
        c_count desc;



-- QUERY_14

select
        100.00 * sum(case
                when p_type like 'PROMO%'
                        then l_extendedprice * (1 - l_discount)
                else 0
        end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
        lineitem,
        part
where
        l_partkey = p_partkey
        and l_shipdate >= date '1993-09-01'
        and l_shipdate < date '1993-10-01';



-- QUERY_15

create view revenue0 (supplier_no, total_revenue) as
        select
                l_suppkey,
                sum(l_extendedprice * (1 - l_discount))
        from
                lineitem
        where
                l_shipdate >= date '1995-07-01'
                and l_shipdate < date '1995-10-01' 
        group by
                l_suppkey;


select
        s_suppkey,
        s_name,
        s_address,
        s_phone,
        total_revenue
from
        supplier,
        revenue0
where
        s_suppkey = supplier_no
        and total_revenue = (
                select
                        max(total_revenue)
                from
                        revenue0
        )
order by
        s_suppkey;

drop view revenue0;



-- QUERY_16

select
        p_brand,
        p_type,
        p_size,
        count(distinct ps_suppkey) as supplier_cnt
from
        partsupp,
        part
where
        p_partkey = ps_partkey
        and p_brand <> 'Brand#45'
        and p_type not like 'ECONOMY BURNISHED%'
        and p_size in (13, 11, 15, 47, 34, 3, 27, 5)
        and ps_suppkey not in (
                select
                        s_suppkey
                from
                        supplier
                where
                        s_comment like '%Customer%Complaints%'
        )
group by
        p_brand,
        p_type,
        p_size
order by
        supplier_cnt desc,
        p_brand,
        p_type,
        p_size;


-- QUERY_17

select
        sum(l_extendedprice) / 7.0 as avg_yearly
from
        lineitem,
        part
where
        p_partkey = l_partkey
        and p_brand = 'Brand#54'
        and p_container = 'WRAP DRUM'
        and l_quantity < (
                select
                        0.2 * avg(l_quantity)
                from
                        lineitem
                where
                        l_partkey = p_partkey
        );


A more efficient version of the query above:

select
        sum(l_extendedprice) / 7.0 as avg_yearly
from
        ( select
                        l_partkey, 0.2 * avg(l_quantity) as qty
                from
                        lineitem                
                group by l_partkey
        ) as avgtmp,
        lineitem,
        part
where
        p_partkey = lineitem.l_partkey and avgtmp.l_partkey = lineitem.l_partkey
        and p_brand = 'Brand#54'
        and p_container = 'WRAP DRUM'
        and l_quantity < avgtmp.qty



-- QUERY_18

select
        c_name,
        c_custkey,
        o_orderkey,
        o_orderdate,
        o_totalprice,
        sum(l_quantity)
from
        customer,
        orders,
        lineitem
where
        o_orderkey in (
                select
                        l_orderkey
                from
                        lineitem
                group by
                        l_orderkey having
                                sum(l_quantity) > 314
        )
        and c_custkey = o_custkey
        and o_orderkey = l_orderkey
group by
        c_name,
        c_custkey,
        o_orderkey,
        o_orderdate,
        o_totalprice
order by
        o_totalprice desc,
        o_orderdate
limit 100;


-- QUERY_19

select
        sum(l_extendedprice* (1 - l_discount)) as revenue
from
        lineitem,
        part
where
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#53'
                and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
                and l_quantity >= 6 and l_quantity <= 6 + 10
                and p_size between 1 and 5
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        )
        or
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#55'
                and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
                and l_quantity >= 20 and l_quantity <= 20 + 10
                and p_size between 1 and 10
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        )
        or
        (
                p_partkey = l_partkey
                and p_brand = 'Brand#55'
                and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
                and l_quantity >= 22 and l_quantity <= 22 + 10
                and p_size between 1 and 15
                and l_shipmode in ('AIR', 'AIR REG')
                and l_shipinstruct = 'DELIVER IN PERSON'
        );


-- QUERY_20

select
        s_name,
        s_address
from
        supplier,
        nation
where
        s_suppkey in (
                select
                        ps_suppkey
                from
                        partsupp
                where
                        ps_partkey in (
                                select
                                        p_partkey
                                from
                                        part
                                where
                                        p_name like 'ivory%'
                        )
                        and ps_availqty > (
                                select
                                        0.5 * sum(l_quantity)
                                from
                                        lineitem
                                where
                                        l_partkey = ps_partkey
                                        and l_suppkey = ps_suppkey
                                        and l_shipdate >= date '1997-01-01'
                                        and l_shipdate < date '1998-01-01'
                        )
        )
        and s_nationkey = n_nationkey
        and n_name = 'UNITED KINGDOM'
order by
        s_name;

-- Query 20 modified

select
        s_name,
        s_address
from
        supplier,
        nation,
        part,
        partsupp, 
        (select
                l_partkey, l_suppkey, 0.5 * sum(l_quantity)
                from
                        lineitem
                where                   
                        l_shipdate >= date '1997-01-01' and l_shipdate < date 
'1998-01-01'
                group by l_partkey, l_suppkey
        ) as l1
where
        s_suppkey = ps_suppkey and ps_partkey = p_partkey and l1.l_partkey = 
ps_partkey and l1.l_suppkey = ps_suppkey
        and p_name like 'ivory%'
        and s_nationkey = n_nationkey
        and n_name = 'UNITED KINGDOM'
order by
        s_name;
        
-- QUERY_21

select
        s_name,
        count(*) as numwait
from
        supplier,
        lineitem l1,
        orders,
        nation
where
        s_suppkey = l1.l_suppkey
        and o_orderkey = l1.l_orderkey
        and o_orderstatus = 'F'
        and l1.l_receiptdate > l1.l_commitdate
        and exists (
                select
                        *
                from
                        lineitem l2
                where
                        l2.l_orderkey = l1.l_orderkey
                        and l2.l_suppkey <> l1.l_suppkey
        )
        and not exists (
                select
                        *
                from
                        lineitem l3
                where
                        l3.l_orderkey = l1.l_orderkey
                        and l3.l_suppkey <> l1.l_suppkey
                        and l3.l_receiptdate > l3.l_commitdate
        )
        and s_nationkey = n_nationkey
        and n_name = 'UNITED KINGDOM'
group by
        s_name
order by
        numwait desc,
        s_name
limit 100;



-- QUERY_22

select
        cntrycode,
        count(*) as numcust,
        sum(c_acctbal) as totacctbal
from
        (
                select
                        substring(c_phone from 1 for 2) as cntrycode,
                        c_acctbal
                from
                        customer
                where
                        substring(c_phone from 1 for 2) in
                                ('19', '29', '25', '23', '26', '11', '34')
                        and c_acctbal > (
                                select
                                        avg(c_acctbal)
                                from
                                        customer
                                where
                                        c_acctbal > 0.00
                                        and substring(c_phone from 1 for 2) in
                                                ('19', '29', '25', '23', '26', 
'11', '34')
                        )
                        and not exists (
                                select
                                        *
                                from
                                        orders
                                where
                                        o_custkey = c_custkey
                        )
        ) as custsale
group by
        cntrycode
order by
        cntrycode;


-- Does not finish as sequential scans orders 23,000 times.
-- Should re-write to do left outer join with orders and then check for 
o_custkey is NULL.
http://www.postgresql.org/docs/8.3/interactive/xplang-install.html
http://www.postgresql.org/docs/8.3/interactive/plpgsql-control-structures.html
http://www.postgresql.org/docs/8.3/interactive/plpgsql-declarations.html


CREATE FUNCTION plpgsql_call_handler() RETURNS language_handler AS
    '$libdir/plpgsql' LANGUAGE C;

CREATE FUNCTION plpgsql_validator(oid) RETURNS void AS
    '$libdir/plpgsql' LANGUAGE C;

CREATE TRUSTED PROCEDURAL LANGUAGE plpgsql
    HANDLER plpgsql_call_handler
    VALIDATOR plpgsql_validator;

CREATE VIEW benchview AS
    SELECT * FROM lineitem, part where l_partkey = p_partkey;

CREATE OR REPLACE FUNCTION bench() RETURNS integer AS $$
DECLARE
    rr RECORD;
    cnt integer;
BEGIN
    cnt = 0;

    FOR rr IN SELECT * FROM benchview LOOP
        cnt = cnt + 1;
    END LOOP;

    RETURN cnt;
END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION benchtime()
  RETURNS integer AS
$BODY$
DECLARE
    rr RECORD;
    cnt integer;
    starttime timestamp;
    curtime timestamp;
    timediff interval;
BEGIN
    cnt = 0;
    
    starttime := timeofday()::timestamp;
    RAISE NOTICE 'START: %',starttime;

    FOR rr IN SELECT * FROM benchview LOOP
        cnt = cnt + 1;
        IF cnt = 100 THEN
                curtime := timeofday()::timestamp;
                RAISE NOTICE '100 results: %',curtime;
        ELSIF cnt = 1000 THEN
                curtime := timeofday()::timestamp;
                RAISE NOTICE '1000 results: %',curtime;
        END IF;
    END LOOP;
    
    curtime := timeofday()::timestamp;
    timediff = curtime - starttime;
    RAISE NOTICE 'DONE: %',curtime;
    RAISE NOTICE 'RUNTIME: %',timediff;
    --RAISE NOTICE 'RUNTIME: %',to_char(timediff, 'SSSS');
    RAISE NOTICE 'Generated results: %',cnt;

    RETURN cnt;
END;
$BODY$
  LANGUAGE 'plpgsql';
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to