On Thu, Sep 8, 2016 at 10:41 PM, Ashutosh Bapat < ashutosh.ba...@enterprisedb.com> wrote:
> > > I think we should try to measure performance gain because of aggregate > pushdown. The EXPLAIN > doesn't show actual improvement in the execution times. > I did performance testing for aggregate push down and see good performance with the patch. Attached is the script I have used to get the performance numbers along with the results I got with and without patch (pg_agg_push_down_v3.patch). Also attached few GNU plots from the readings I got. These were run on my local VM having following details: Linux centos7 3.10.0-327.28.3.el7.x86_64 #1 SMP Thu Aug 18 19:05:49 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux RAM alloted: 8 GB CPUs alloted: 8 postgresql.conf is default. With aggregate push down I see around 12x performance for count(*) operation. In another test, I have observed that if number of groups returned from remote server is same as that of input rows, then aggregate push down performs slightly poor which I think is expected. However in all other cases where number of groups are less than input rows, pushing down aggregate gives better performance than performing grouping on the local server. I did this performance testing on my local setup. I would expected even better numbers on a specialized high-end performance machine. I would be more than happy if someone does this testing on such high-end machine. Let me know if you need any help. Thanks -- Jeevan B Chalke Principal Software Engineer, Product Development EnterpriseDB Corporation The Enterprise PostgreSQL Company
=== WITH PATCH === query | rows | avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time ---------------------------------------------------------------------+---------+--------------+------------------+--------------+-------------- select count(*) from fperf1 | 1 | 141.8686 | 4.4668353500495 | 138.903 | 152.241 select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 | 2 | 405.7661 | 11.9403142844368 | 400.689 | 439.675 select c2, sum(c1) from fperf1 group by c2 | 3 | 363.2299 | 4.29278180851687 | 354.815 | 369.739 select c3, avg(c1), sum(c2) from fperf1 group by c3 | 5 | 454.4478 | 3.98680590057494 | 447.248 | 457.955 select c4, avg(c1), sum(c2) from fperf1 group by c4 | 10 | 501.0197 | 5.26951028823454 | 491.726 | 508.574 select c5, avg(c1), sum(c2) from fperf1 group by c5 | 100 | 490.0783 | 5.64261263462349 | 480.78 | 496.662 select c6, avg(c1), sum(c2) from fperf1 group by c6 | 1000 | 582.6842 | 9.9196984474776 | 564.425 | 592.69 select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1 | 1 | 901.1682 | 9.58382273302904 | 888.383 | 923.386 select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10 | 10 | 1032.1959 | 6.89087326268629 | 1018.598 | 1045.423 select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100 | 100 | 1076.3834 | 11.1022883947539 | 1061.305 | 1093.892 select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000 | 1000 | 1113.6001 | 11.2143472634172 | 1092.863 | 1133.007 select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000 | 10000 | 1182.1374 | 32.5953859659133 | 1148.961 | 1231.296 select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000 | 100000 | 1467.1811 | 14.3535175437048 | 1443.95 | 1485.645 select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000 | 5466.2306 | 633.367848489717 | 5127.339 | 7248.381 (14 rows) === WITHOUT PATCH === query | rows | avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time ---------------------------------------------------------------------+---------+--------------+------------------+--------------+-------------- select count(*) from fperf1 | 1 | 1674.5339 | 27.1549108570754 | 1637.345 | 1725.057 select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 | 2 | 2467.8368 | 22.606929678949 | 2437.674 | 2506.438 select c2, sum(c1) from fperf1 group by c2 | 3 | 2387.39 | 34.3686766983568 | 2350.396 | 2444.313 select c3, avg(c1), sum(c2) from fperf1 group by c3 | 5 | 2702.3344 | 28.0312843452488 | 2665.317 | 2746.862 select c4, avg(c1), sum(c2) from fperf1 group by c4 | 10 | 2850.9818 | 42.5758532759606 | 2813.562 | 2946.991 select c5, avg(c1), sum(c2) from fperf1 group by c5 | 100 | 3519.9419 | 34.0792701064716 | 3473.709 | 3583.043 select c6, avg(c1), sum(c2) from fperf1 group by c6 | 1000 | 3030.4009 | 27.6855914248393 | 3008.713 | 3094.22 select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1 | 1 | 2577.7204 | 32.2177922141319 | 2551.266 | 2644.744 select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10 | 10 | 2615.7951 | 19.8623889434851 | 2592.613 | 2646.696 select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100 | 100 | 2611.5906 | 26.5091683017013 | 2578.586 | 2664.936 select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000 | 1000 | 2624.3797 | 18.1034474559238 | 2602.988 | 2665.453 select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000 | 10000 | 2702.8801 | 26.5523021159207 | 2663.27 | 2742.34 select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000 | 100000 | 3019.5939 | 34.1965254942112 | 2978.507 | 3076.033 select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000 | 4555.6334 | 46.5314774013736 | 4496.797 | 4647.332 (14 rows)
drop foreign table fperf1; drop user mapping for current_user server srv; drop server srv; drop table perf1; drop extension postgres_fdw; create extension postgres_fdw; create table perf1( c1 int primary key, c2 int not null, c3 int, c4 text, c5 timestamptz, c6 char(10) ); insert into perf1 select id, id % 3, id % 5, to_char(id%10, 'FM0000000'), '1970-01-01'::timestamptz + ((id % 100) || ' days')::interval, id % 1000 from generate_series(1, 1000000) id; do $d$ begin execute $$create server srv foreign data wrapper postgres_fdw options (dbname '$$||current_database()||$$', port '$$||current_setting('port')||$$' )$$; end; $d$; create user mapping for current_user server srv; create foreign table fperf1( c1 int, c2 int, c3 int, c4 text, c5 timestamptz, c6 char(10) ) server srv options (table_name 'perf1'); analyze perf1; -- This part is taken from Ashutosh Bapat's script posted for -- partitionwise join and modified per my requirement. -- Table to collect data drop table perf; create table perf(query text, rows bigint, avg_exe_time float, std_dev_exe_time float, min_exe_time float, max_exe_time float); drop function query_execution_stats(num_samples int); create function query_execution_stats(num_samples int) RETURNS void LANGUAGE plpgsql AS $$ DECLARE q record; plan json; e json; avg_exe_time float; std_dev_exe_time float; min_exe_time float; max_exe_time float; BEGIN CREATE TEMPORARY TABLE query_exe_times(exe_time float); FOR q IN select query from querytab LOOP -- Execute query a few times to warm the cache FOR i IN 1 .. num_samples/5 LOOP EXECUTE q.query; END LOOP; TRUNCATE query_exe_times; RAISE NOTICE 'Perf testing query: %', q.query; EXECUTE 'EXPLAIN (analyze, verbose, format json) ' || q.query INTO e; -- RAISE NOTICE 'Remote SQL: %', e->0->'Plan'->'Remote SQL'; FOR i IN 1 .. num_samples LOOP EXECUTE 'EXPLAIN (analyze, format json) ' || q.query INTO plan; INSERT INTO query_exe_times VALUES ((plan->0->'Execution Time')::text::float); -- RAISE NOTICE ' completed % samples', i; END LOOP; SELECT avg(exe_time), stddev(exe_time), min(exe_time), max(exe_time) INTO avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time FROM query_exe_times; INSERT INTO perf VALUES (q.query, (e->0->'Plan'->'Actual Rows')::text::bigint, avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time); END LOOP; DROP TABLE query_exe_times; END; $$; --Run queries now drop table querytab; create table querytab(query text); -- Simple count(*) on large table insert into querytab values ('select count(*) from fperf1'); insert into querytab values ('select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334'); insert into querytab values ('select c2, sum(c1) from fperf1 group by c2'); insert into querytab values ('select c3, avg(c1), sum(c2) from fperf1 group by c3'); insert into querytab values ('select c4, avg(c1), sum(c2) from fperf1 group by c4'); insert into querytab values ('select c5, avg(c1), sum(c2) from fperf1 group by c5'); insert into querytab values ('select c6, avg(c1), sum(c2) from fperf1 group by c6'); insert into querytab values ('select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1'); insert into querytab values ('select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10'); insert into querytab values ('select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100'); insert into querytab values ('select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000'); insert into querytab values ('select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000'); insert into querytab values ('select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000'); insert into querytab values ('select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000'); \set num_samples 10 select query_execution_stats(:num_samples); select * from perf;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers