Re: [HACKERS] Aggregate Push Down - Performing aggregation on foreign server

Jeevan Chalke Wed, 12 Oct 2016 06:20:14 -0700

On Thu, Sep 8, 2016 at 10:41 PM, Ashutosh Bapat <
ashutosh.ba...@enterprisedb.com> wrote:


>
>
> I think we should try to measure performance gain because of aggregate
> pushdown. The EXPLAIN
> doesn't show actual improvement in the execution times.
>

I did performance testing for aggregate push down and see good performance
with the patch.

Attached is the script I have used to get the performance numbers along with
the results I got with and without patch (pg_agg_push_down_v3.patch).  Also
attached few GNU plots from the readings I got.  These were run on my local
VM having following details:

Linux centos7 3.10.0-327.28.3.el7.x86_64 #1 SMP Thu Aug 18 19:05:49 UTC
2016 x86_64 x86_64 x86_64 GNU/Linux
RAM alloted: 8 GB
CPUs alloted: 8
postgresql.conf is default.

With aggregate push down I see around 12x performance for count(*)
operation.
In another test, I have observed that if number of groups returned from
remote server is same as that of input rows, then aggregate push down
performs
slightly poor which I think is expected. However in all other cases where
number of groups are less than input rows, pushing down aggregate gives
better
performance than performing grouping on the local server.

I did this performance testing on my local setup. I would expected even
better
numbers on a specialized high-end performance machine.  I would be more than
happy if someone does this testing on such high-end machine.

Let me know if you need any help.

Thanks

-- 
Jeevan B Chalke
Principal Software Engineer, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company

=== WITH PATCH ===

                                query                                |  rows   
| avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time 
---------------------------------------------------------------------+---------+--------------+------------------+--------------+--------------
 select count(*) from fperf1                                         |       1 
|     141.8686 |  4.4668353500495 |      138.903 |      152.241
 select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 |       2 
|     405.7661 | 11.9403142844368 |      400.689 |      439.675
 select c2, sum(c1) from fperf1 group by c2                          |       3 
|     363.2299 | 4.29278180851687 |      354.815 |      369.739
 select c3, avg(c1), sum(c2) from fperf1 group by c3                 |       5 
|     454.4478 | 3.98680590057494 |      447.248 |      457.955
 select c4, avg(c1), sum(c2) from fperf1 group by c4                 |      10 
|     501.0197 | 5.26951028823454 |      491.726 |      508.574
 select c5, avg(c1), sum(c2) from fperf1 group by c5                 |     100 
|     490.0783 | 5.64261263462349 |       480.78 |      496.662
 select c6, avg(c1), sum(c2) from fperf1 group by c6                 |    1000 
|     582.6842 |  9.9196984474776 |      564.425 |       592.69
 select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1             |       1 
|     901.1682 | 9.58382273302904 |      888.383 |      923.386
 select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10           |      10 
|    1032.1959 | 6.89087326268629 |     1018.598 |     1045.423
 select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100         |     100 
|    1076.3834 | 11.1022883947539 |     1061.305 |     1093.892
 select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000       |    1000 
|    1113.6001 | 11.2143472634172 |     1092.863 |     1133.007
 select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000     |   10000 
|    1182.1374 | 32.5953859659133 |     1148.961 |     1231.296
 select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000   |  100000 
|    1467.1811 | 14.3535175437048 |      1443.95 |     1485.645
 select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000 
|    5466.2306 | 633.367848489717 |     5127.339 |     7248.381
(14 rows)


=== WITHOUT PATCH ===

                                query                                |  rows   
| avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time 
---------------------------------------------------------------------+---------+--------------+------------------+--------------+--------------
 select count(*) from fperf1                                         |       1 
|    1674.5339 | 27.1549108570754 |     1637.345 |     1725.057
 select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334 |       2 
|    2467.8368 |  22.606929678949 |     2437.674 |     2506.438
 select c2, sum(c1) from fperf1 group by c2                          |       3 
|      2387.39 | 34.3686766983568 |     2350.396 |     2444.313
 select c3, avg(c1), sum(c2) from fperf1 group by c3                 |       5 
|    2702.3344 | 28.0312843452488 |     2665.317 |     2746.862
 select c4, avg(c1), sum(c2) from fperf1 group by c4                 |      10 
|    2850.9818 | 42.5758532759606 |     2813.562 |     2946.991
 select c5, avg(c1), sum(c2) from fperf1 group by c5                 |     100 
|    3519.9419 | 34.0792701064716 |     3473.709 |     3583.043
 select c6, avg(c1), sum(c2) from fperf1 group by c6                 |    1000 
|    3030.4009 | 27.6855914248393 |     3008.713 |      3094.22
 select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1             |       1 
|    2577.7204 | 32.2177922141319 |     2551.266 |     2644.744
 select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10           |      10 
|    2615.7951 | 19.8623889434851 |     2592.613 |     2646.696
 select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100         |     100 
|    2611.5906 | 26.5091683017013 |     2578.586 |     2664.936
 select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000       |    1000 
|    2624.3797 | 18.1034474559238 |     2602.988 |     2665.453
 select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000     |   10000 
|    2702.8801 | 26.5523021159207 |      2663.27 |      2742.34
 select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000   |  100000 
|    3019.5939 | 34.1965254942112 |     2978.507 |     3076.033
 select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000 | 1000000 
|    4555.6334 | 46.5314774013736 |     4496.797 |     4647.332
(14 rows)

drop foreign table fperf1;
drop user mapping for current_user server srv;
drop server srv;
drop table perf1;
drop extension postgres_fdw;

create extension postgres_fdw;

create table perf1(
  c1 int primary key,
  c2 int not null,
  c3 int,
  c4 text,
  c5 timestamptz,
  c6 char(10)
);

insert into perf1 select
  id,
  id % 3,
  id % 5,
  to_char(id%10, 'FM0000000'),
  '1970-01-01'::timestamptz + ((id % 100) || ' days')::interval,
  id % 1000
from generate_series(1, 1000000) id;

do $d$
  begin
    execute $$create server srv foreign data wrapper postgres_fdw
      options (dbname '$$||current_database()||$$',
               port '$$||current_setting('port')||$$'
    )$$;
  end;
$d$;

create user mapping for current_user server srv;

create foreign table fperf1(
  c1 int,
  c2 int,
  c3 int,
  c4 text,
  c5 timestamptz,
  c6 char(10)
) server srv options (table_name 'perf1');

analyze perf1;

-- This part is taken from Ashutosh Bapat's script posted for
-- partitionwise join and modified per my requirement.

-- Table to collect data
drop table perf;
create table perf(query text, rows bigint,
  avg_exe_time float, std_dev_exe_time float,
  min_exe_time float, max_exe_time float);

drop function query_execution_stats(num_samples int);

create function query_execution_stats(num_samples int)
RETURNS void LANGUAGE plpgsql AS $$
DECLARE
  q record;
  plan json;
  e json;
  avg_exe_time float;
  std_dev_exe_time float;
  min_exe_time float;
  max_exe_time float;
BEGIN
  CREATE TEMPORARY TABLE query_exe_times(exe_time float); 

  FOR q IN select query from querytab LOOP
    -- Execute query a few times to warm the cache
    FOR i IN 1 .. num_samples/5 LOOP
      EXECUTE q.query;
    END LOOP;

    TRUNCATE query_exe_times;
    RAISE NOTICE 'Perf testing query: %', q.query;
    EXECUTE 'EXPLAIN (analyze, verbose, format json) ' || q.query INTO e;
--    RAISE NOTICE 'Remote SQL: %', e->0->'Plan'->'Remote SQL';

    FOR i IN 1 .. num_samples LOOP
      EXECUTE 'EXPLAIN (analyze, format json) ' || q.query INTO plan;
      INSERT INTO query_exe_times VALUES ((plan->0->'Execution Time')::text::float);
--      RAISE NOTICE '  completed % samples', i;
    END LOOP;

    SELECT avg(exe_time), stddev(exe_time), min(exe_time), max(exe_time)
      INTO avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time
      FROM query_exe_times;

    INSERT INTO perf VALUES (q.query, (e->0->'Plan'->'Actual Rows')::text::bigint,
      avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time);
  END LOOP;

  DROP TABLE query_exe_times;
END;
$$;

--Run queries now
drop table querytab;
create table querytab(query text);

-- Simple count(*) on large table
insert into querytab values ('select count(*) from fperf1');
insert into querytab values ('select c2, avg(c1) from fperf1 group by c2 having count(*) < 333334');
insert into querytab values ('select c2, sum(c1) from fperf1 group by c2');
insert into querytab values ('select c3, avg(c1), sum(c2) from fperf1 group by c3');
insert into querytab values ('select c4, avg(c1), sum(c2) from fperf1 group by c4');
insert into querytab values ('select c5, avg(c1), sum(c2) from fperf1 group by c5');
insert into querytab values ('select c6, avg(c1), sum(c2) from fperf1 group by c6');

insert into querytab values ('select c1%1, avg(c1), sum(c2) from fperf1 group by c1%1');
insert into querytab values ('select c1%10, avg(c1), sum(c2) from fperf1 group by c1%10');
insert into querytab values ('select c1%100, avg(c1), sum(c2) from fperf1 group by c1%100');
insert into querytab values ('select c1%1000, avg(c1), sum(c2) from fperf1 group by c1%1000');
insert into querytab values ('select c1%10000, avg(c1), sum(c2) from fperf1 group by c1%10000');
insert into querytab values ('select c1%100000, avg(c1), sum(c2) from fperf1 group by c1%100000');
insert into querytab values ('select c1%1000000, avg(c1), sum(c2) from fperf1 group by c1%1000000');

\set num_samples 10
select query_execution_stats(:num_samples);
select * from perf;

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Aggregate Push Down - Performing aggregation on foreign server

Reply via email to