On Wed, Sep 15, 2021 at 5:35 PM Heikki Linnakangas <hlinn...@iki.fi> wrote:
> Thanks, here's another rebase.
>
> - Heikki

I've had a chance to review and test out the v5 patches.

0001 is a useful simplification. Nothing in 0002 stood out as needing
comment.

I've done some performance testing of master versus both patches applied.
The full results and test script are attached, but I'll give a summary
here. A variety of value distributions were tested, with work_mem from 1MB
to 16MB, plus 2GB which will not use external sort at all. I settled on 2
million records for the sort, to have something large enough to work with
but also keep the test time reasonable. That works out to about 130MB on
disk. We have recent improvements to datum sort, so I used both single
values and all values in the SELECT list.

The system was on a Westmere-era Xeon with gcc 4.8. pg_prewarm was run on
the input tables. The raw measurements were reduced to the minimum of five
runs.

I can confirm that sort performance is improved with small values of
work_mem. That was not the motivating reason for the patch, but it's a nice
bonus. Even as high as 16MB work_mem, it's possible some of the 4-6%
differences represent real improvement and not just noise or binary
effects, but it's much more convincing at 4MB and below, with 25-30% faster
with non-datum integer sorts at 1MB work_mem. The nominal regressions seem
within the noise level, with one exception that only showed up in one set
of measurements (-10.89% in the spreadsheet). I'm not sure what to make of
that since it only happens in one combination of factors and nowhere else.

On Sat, Sep 11, 2021 at 4:28 AM Zhihong Yu <z...@yugabyte.com> wrote:
> + * Before PostgreSQL 14, we used the polyphase merge algorithm (Knuth's
> + * Algorithm 5.4.2D),
>
> I think the above 'Before PostgreSQL 14' should be 'Before PostgreSQL 15'
now that PostgreSQL 14 has been released.
>
> +static int64
> +merge_read_buffer_size(int64 avail_mem, int nInputTapes, int nInputRuns,
> +                      int maxOutputTapes)
>
> For memory to allocate, I think uint64 can be used (instead of int64).

int64 is used elsewhere in this file, and I see now reason to do otherwise.

Aside from s/14/15/ for the version as noted above, I've marked it Ready
for Committer.

--
John Naylor
EDB: http://www.enterprisedb.com
set -e

ROWS=$1


function log {
	echo `date +%s` [`date +'%Y-%m-%d %H:%M:%S'`] $1
}

function create_tables {

	./inst/bin/psql > /dev/null  <<EOF
-- tables for source data
DROP TABLE IF EXISTS data_int;
CREATE TABLE data_int (a INT);
INSERT INTO  data_int SELECT i FROM generate_series(1, $ROWS) s(i);

-- tables used for the actual testing
CREATE TABLE IF NOT EXISTS int_test         (a INT, b TEXT);
CREATE TABLE IF NOT EXISTS txt_test         (a TEXT, b TEXT);
EOF

}

function truncate_tables {

	log "truncating tables"

	./inst/bin/psql > /dev/null  <<EOF
TRUNCATE TABLE int_test;
TRUNCATE TABLE txt_test;
EOF

}

function vacuum_analyze {

	log "analyzing"

	./inst/bin/psql > /dev/null  <<EOF
VACUUM ANALYZE;
CHECKPOINT;
EOF

}

function prewarm {
	log "prewarming buffers"

	./inst/bin/psql > /dev/null  <<EOF
SELECT pg_prewarm(oid) FROM pg_class WHERE oid > 16384 AND relkind = 'r';
EOF

}

################# load test tables

function load_random {

	truncate_tables

	log "loading random"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

-- this needs randomization
INSERT INTO int_test SELECT a, md5(a::text) FROM data_int ORDER BY random();

-- these already are random
INSERT INTO txt_test SELECT md5(a::text), md5((a+1)::text) FROM data_int;
EOF

	prewarm

	vacuum_analyze

}

function load_sorted {

	truncate_tables

	log "loading sorted"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a, md5(a::text) FROM data_int ORDER BY 1;
INSERT INTO txt_test SELECT md5(a::text), md5((a+1)::text) FROM data_int ORDER BY 1;
EOF

	prewarm

	vacuum_analyze

}

function load_almost_sorted {

	truncate_tables

	log "loading almost sorted"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a, b FROM (
    SELECT a, md5(a::text) AS b, rank() OVER (ORDER BY a) AS r FROM data_int
) foo ORDER BY r + (100 * random());

INSERT INTO txt_test SELECT a, b FROM (
    SELECT md5(a::text) AS a, md5(((a+1))::text) AS b, rank() OVER (ORDER BY md5(a::text)) AS r FROM data_int
) foo ORDER BY r + (100 * random());

EOF

	prewarm

	vacuum_analyze

}

function load_reversed {

	truncate_tables

	log "loading reversed"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT a, md5(a::text) FROM data_int ORDER BY 1 DESC;
INSERT INTO txt_test SELECT md5(a::text), md5((a+1)::text) FROM data_int ORDER BY 1 DESC;
EOF

	prewarm

	vacuum_analyze

}

function load_organ_pipe {

	truncate_tables

	log "loading organ pipe"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

WITH c AS (SELECT count(*)/2 AS half from data_int)
INSERT INTO int_test
(SELECT a, md5(a::text) FROM data_int ORDER BY 1 ASC  LIMIT (SELECT half from c))
UNION ALL
(SELECT a, md5(a::text) FROM data_int ORDER BY 1 DESC LIMIT (SELECT half from c));

WITH c AS (SELECT count(*)/2 AS half from data_int)
INSERT INTO txt_test
(SELECT md5(a::text), md5((a+1)::text) FROM data_int ORDER BY 1 ASC  LIMIT (SELECT half from c))
UNION ALL
(SELECT md5(a::text), md5((a+1)::text) FROM data_int ORDER BY 1 DESC LIMIT (SELECT half from c));

EOF

	prewarm

	vacuum_analyze

}


function load_0_1 {

	truncate_tables

	log "loading 0 1"

	./inst/bin/psql > /dev/null  <<EOF
SET work_mem = '2GB';

INSERT INTO int_test SELECT round(random()), md5(a::text) FROM data_int;
INSERT INTO txt_test SELECT 
	case when a > 0.5 
	then 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
	else 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb'
	end
FROM data_int;
EOF

	prewarm

	vacuum_analyze

}

################# run

function run_query {

	times=""
	group=$1
	wmem=$2
	workers=$3
	query=$4

	echo "--" `date` [`date +%s`] >> explains.log
	echo "-- $group rows=$ROWS work_mem=$wmem workers=$workers" >> explains.log

	./inst/bin/psql >> explains.log 2>&1 <<EOF
SET trace_sort=on;
SET work_mem='$wmem';
SET max_parallel_workers_per_gather=$workers;
explain $query
EOF

	s=`date +%s`

	for i in `seq 1 5`; do

		/usr/bin/time -f '%e' -o 'query.time' ./inst/bin/psql > /dev/null <<EOF
\pset pager off
\o /dev/null
SET trace_sort=on;
SET work_mem='$wmem';
SET max_parallel_workers_per_gather=$workers;
COPY ($query) TO '/dev/null'
EOF

		x=`cat query.time`
		times="$times\t$x"

	done

	e=`date +%s`

	echo -e "$group\t$ROWS\t$wmem\t$workers\t$s\t$e\t$query\t$times" >> results.csv
}


function run_queries {

	for wm in '1MB' '2MB' '4MB' '8MB' '16MB' '2GB'; do

		log "running queries work_mem=$wm noparallel"

		run_query $1 $wm 0 'SELECT a FROM int_test ORDER BY 1 OFFSET '$ROWS''
		run_query $1 $wm 0 'SELECT * FROM int_test ORDER BY 1 OFFSET '$ROWS''

		run_query $1 $wm 0 'SELECT a FROM txt_test ORDER BY 1 OFFSET '$ROWS''
		run_query $1 $wm 0 'SELECT * FROM txt_test ORDER BY 1 OFFSET '$ROWS''

	done

}

create_tables

log "sort benchmark $ROWS"

load_random
run_queries "random"

load_sorted
run_queries "sorted"

load_almost_sorted
run_queries "almost_sorted"

load_reversed
run_queries "reversed"

load_organ_pipe
run_queries "organ_pipe"

load_0_1
run_queries "0_1"

Attachment: kway-merge-test-1.ods
Description: application/vnd.oasis.opendocument.spreadsheet

Reply via email to