Hi All,
Now that we have join pushdown support in postgres_fdw, we can leverage the
sort pushdown mechanism for base relations to work for pushed down joins as
well. PFA patch for the same.

The code to find useful set of pathkeys and then generate paths for each
list of pathkeys is moved into a function which is called for base
relations and join relations, while creating respective paths. The useful
pathkeys are same as the base relation i.e. root->query_pathkeys and
pathkeys useful for merge join as discussed in [1].

I measured performance of pushing down sort for merge joins for query
SELECT lt1.val, ft1.val, ft2.val FROM lt1 join (ft1 join ft2 on (ft1.val =
ft2.val)) on (lt1.val = ft1.val) where ft1, ft2 are foreign tables, join
between which gets pushed down to the foreign server and lt is the local
table.

Without the patch servers prefers local merge join between foreign tables
followed by merge join with local table by getting the data sorted from the
foreign server. But with the patch, it pushes down the foreign join and
also gets the data sorted for local merge join. The times measured over 10
runs of query with and without patch are

With patch
 avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time
--------------+------------------+--------------+--------------
   60310.0369 | 251.075471210925 |    59895.064 |    60746.496

Without patch
 avg_exe_time | std_dev_exe_time | min_exe_time | max_exe_time
--------------+------------------+--------------+--------------
   86396.6001 |  254.30988131848 |    85906.606 |    86742.311

With the patch the execution time of the query reduces by 30%.

The scripts to setup and run query and outputs of running query with and
without patch are attached.


[1]
http://www.postgresql.org/message-id/CAFjFpRfeKHiCmwJ72p4=zvuzrqsau9tbfyw7vwr-5ppvrcb...@mail.gmail.com
-- 
Best Wishes,
Ashutosh Bapat
EnterpriseDB Corporation
The Postgres Database Company

Attachment: pg_join_sort_pd.patch
Description: application/download

\set num_samples 10
\set query 'SELECT lt1.val, ft1.val, ft2.val FROM lt1 join (ft1 join ft2 on (ft1.val = ft2.val)) on (lt1.val = ft1.val)'
EXPLAIN (VERBOSE, ANALYSE) :query ;
SELECT avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time
		FROM query_execution_stats(:'query', :num_samples);
\set num_rows (1000*1000*10)
-- Create local tables (to be pointed by foreign tables)
DROP TABLE lt1 CASCADE;
CREATE TABLE lt1(val int, val2 int);
INSERT INTO lt1 SELECT i, i FROM (SELECT generate_series(1, :num_rows)) s(i);
CREATE INDEX i_lt1_val ON lt1(val);


DROP TABLE lt2 CASCADE;
CREATE TABLE lt2(val int, val2 int);
INSERT INTO lt2 SELECT i, i FROM (SELECT generate_series(1, :num_rows)) s(i);
CREATE INDEX i_lt2_val ON lt2(val);

DROP TABLE lt3 CASCADE;
CREATE TABLE lt3(val int, val2 int);
INSERT INTO lt3 SELECT i, i FROM (SELECT generate_series(1, :num_rows)) s(i);
CREATE INDEX i_lt3_val ON lt3(val);

DROP EXTENSION postgres_fdw CASCADE;
create extension postgres_fdw;
CREATE SERVER pg_server FOREIGN DATA WRAPPER postgres_fdw OPTIONS (dbname 'postgres');
CREATE USER MAPPING FOR CURRENT_USER SERVER pg_server;
CREATE FOREIGN TABLE ft1 (val int, val2 int) SERVER pg_server OPTIONS (table_name 'lt1',
																use_remote_estimate 'true');
CREATE FOREIGN TABLE ft2 (val int, val2 int) SERVER pg_server OPTIONS (table_name 'lt2',
																use_remote_estimate 'true');

DROP FUNCTION query_execution_stats(query text, num_samples int,
													OUT avg_exe_time float,
													OUT exec_time_dev float,
													OUT min_exe_time float,
													OUT max_exe_time float);

CREATE FUNCTION query_execution_stats(query text, num_samples int,
													OUT avg_exe_time float,
													OUT std_dev_exe_time float,
													OUT min_exe_time float,
													OUT max_exe_time float)
RETURNS record LANGUAGE plpgsql AS $$
DECLARE
	plan json;
BEGIN
	CREATE TEMPORARY TABLE query_exe_times(exe_time float); 

	-- Execute query a few times (5% of user specified runs) to warm the cache
	FOR i IN 1 .. num_samples/20 LOOP
		EXECUTE query;
	END LOOP;

	FOR i IN 1 .. num_samples LOOP
		EXECUTE 'EXPLAIN (analyze, format json) ' || query INTO plan;
		INSERT INTO query_exe_times VALUES ((plan->0->'Execution Time')::text::float);
		RAISE NOTICE 'completed % samples', i;
	END LOOP;

	SELECT avg(exe_time), stddev(exe_time), min(exe_time), max(exe_time)
		INTO avg_exe_time, std_dev_exe_time, min_exe_time, max_exe_time
		FROM query_exe_times;

	DROP TABLE query_exe_times;
END;
$$;

ANALYZE ft1;
ANALYZE ft2;
ANALYZE lt1;
ANALYZE lt2;
ANALYZE lt3;

Attachment: sort_pd.out.without_patch
Description: Binary data

Attachment: sort_pd.out.with_patch
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to