On 2018-04-06 20:08, Alexander Korotkov wrote:
[0001-Covering-v15.patch]
After some more testing I notice there is also a down-side/slow-down to
this patch that is not so bad but more than negligible, and I don't
think it has been mentioned (but I may have missed something in this
thread that's now been running for 1.5 year, not to mention the
tangential btree-thread(s)).
I attach my test-program, which compares master (this morning) with
covered_indexes (warning: it takes a while to generate the used tables).
The test tables are created as:
create table $t (c1 int, c2 int, c3 int, c4 int);
insert into $t (select x, 2*x, 3*x, 4 from generate_series(1,
$rowcount) as x);
create unique index ${t}uniqueinclude_idx on $t using btree (c1, c2)
include (c3, c4);
or for HEAD, just:
create unique index ${t}unique_idx on $t using btree (c1, c2);
Here is typical output (edited a bit to prevent email-mangling):
test1:
-- explain analyze select c1, c2 from nt0___100000000 where c1 < 10000
-- 250x
unpatched 6511: 100M rows Execution Time: (normal/normal) 98 % exec
avg: 2.44
patched 6976: 100M rows Execution Time: (covered/normal) 108 % exec
avg: 2.67
test1 patched /
unpatched: 109.49 %
test4:
-- explain analyze select c1, c2 from nt0___100000000 where c1 < 10000
and c3 < 20
unpatched 6511: 100M rows Execution Time: (normal/normal) 95 % exec
avg: 1.56
patched 6976: 100M rows Execution Time: (covered/normal) 60 % exec
avg: 0.95
test4 patched /
unpatched: 60.83 %
So the main good thing is that 60%, a good improvement -- but that ~109%
(a slow-down) is also quite repeatable.
(there are a more goodies from the patch (like improved insert-speed)
but I just wanted to draw attention to this particular slow-down too)
I took all timings from explain analyze versions of the statements, on
the assumption that that would be quite comparable to 'normal' querying.
(please let me know if that introduces error).
# \dti+ nt0___1*
List of relations
Schema | Name | Type | Owner |
Table | Size
--------+----------------------------------+-------+----------+-----------------+--------
public | nt0___100000000 | table | aardvark |
| 4224 MB
public | nt0___100000000uniqueinclude_idx | index | aardvark |
nt0___100000000 | 3004 MB
(for what it's worth, I'm in favor of getting this patch into v11
although I can't say I followed the technical details too much)
thanks,
Erik Rijkers
#!/bin/env perl
#!/opt/perl-5.26/bin/perl
use strict;
use warnings;
use DBI;
use Time::HiRes qw/tv_interval gettimeofday/;
use Getopt::Long;
$| = 1;
our $PGPORT_VANILLA = 6511;
our $PGPORT_COVERING_INDEXES = 6976;
our $SQL_REPEAT = 251;
main();
exit;
sub size_unit {
1_000_000
}
sub main {
my $size = 100; # rowcount in millions; this $size variable determines the table used
GetOptions ("size=i" => \$size) or die("Error in command line arguments\n");
my $dbh_patched = connectdb_covering_indexes();
my $dbh_vanilla = connectdb_vanilla();
my $port_patched = check_debug_state( $dbh_patched );
my $port_vanilla = check_debug_state( $dbh_vanilla );
# create tables on patched instance
for my $n (1, 10, 100) { # , 250 ) {
my $rowcount = $n * size_unit();
create_tables($dbh_patched, $port_patched, $rowcount, my $overwrite = 0);
}
# create tables on vanilla instance
for my $n (1, 10, 100) { # , 250 ) {
my $rowcount = $n * size_unit();
create_tables($dbh_vanilla, $port_vanilla, $rowcount, my $overwrite = 0);
}
# print sprintf("-- Perl %vd\n", $^V)
# , "-- ", $dbh_vanilla->selectrow_arrayref( "select version()" )->[0], "\n"
# , "-- ", $dbh_patched->selectrow_arrayref( "select version()" )->[0], "\n" ;
my $c1 = 10000; ## 5000 + int(rand(5000)) + 1;
my $c3 = 20; ## 20 + int(rand( 30)) + 1;
# $c1 = 5000; ## 5000 + int(rand(5000)) + 1;
# $c3 = 20; ## 20 + int(rand( 30)) + 1;
# enable to vary WHERE-clause a little bit:
if (0) {
$c1 = 5000 + int(rand(5000)) + 1;
$c3 = 20 + int(rand( 30)) + 1;
}
my $vanilla = test1($dbh_vanilla, $port_vanilla, $size, $c1);
my $patched = test1($dbh_patched, $port_patched, $size, $c1);
print " "x84, sprintf( "%6.2f %% <- test1, patched / unpatched\n", ((average($patched) * 100) / average($vanilla)) );
# test2($dbh_vanilla, $port_vanilla, $size, $c1);
# test2($dbh_patched, $port_patched, $size, $c1);
# test3($dbh_vanilla, $port_vanilla, $size, $c1, $c3);
# test3($dbh_patched, $port_patched, $size, $c1, $c3);
$vanilla = test4($dbh_vanilla, $port_vanilla, $size, $c1, $c3);
$patched = test4($dbh_patched, $port_patched, $size, $c1, $c3);
print " "x84, sprintf( "%6.2f %% <- test4, patched / unpatched\n", ((average($patched) * 100) / average($vanilla)) );
print "\n";
}
sub test1 {
my ($dbh, $port, $size, $c1) = @_;
my @cond = (); # conditions
push @cond, 'c1 < ' . $c1;
my $rowcount = $size * size_unit();
my ($ot, $nt) = rowcount_to_tables($rowcount);
my $sql_ot = sql_select_only_key_columns($ot, \@cond);
my $sql_nt = sql_select_only_key_columns($nt, \@cond);
my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
$ne;
}
sub test2 {
my ($dbh, $port, $size, $c1) = @_;
my @cond = (); # conditions
push @cond, 'c1 < ' . $c1;
my $rowcount = $size * size_unit();
my ($ot, $nt) = rowcount_to_tables($rowcount);
my $sql_ot = sql_select_all_columns($ot, \@cond);
my $sql_nt = sql_select_all_columns($nt, \@cond);
my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
$ne;
}
sub test3 {
my ($dbh, $port, $size, $c1, $c3) = @_;
my @cond = (); # conditions
push @cond, 'c1 < ' . $c1;
push @cond, 'c3 < ' . $c3;
my $rowcount = $size * size_unit();
my ($ot, $nt) = rowcount_to_tables($rowcount);
my $sql_ot = sql_select_all_columns_query_both_key_and_included_col($ot, \@cond);
my $sql_nt = sql_select_all_columns_query_both_key_and_included_col($nt, \@cond);
my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
$ne;
}
sub test4 {
my ($dbh, $port, $size, $c1, $c3) = @_;
my @cond = (); # conditions
push @cond, 'c1 < ' . $c1;
push @cond, 'c3 < ' . $c3;
my $rowcount = $size * size_unit();
my ($ot, $nt) = rowcount_to_tables($rowcount);
my $sql_ot = sql_select_only_key_columns_query_both_key_and_included_col($ot, \@cond);
my $sql_nt = sql_select_only_key_columns_query_both_key_and_included_col($nt, \@cond);
my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
$ne;
}
sub display_result {
my ($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display) = @_;
print sprintf( "%9s %4d: %3sM rows "
. "Execution Time: "
. ($port == $PGPORT_VANILLA ? " (normal/normal)" : "(covered/normal)" )
. " %3.0f %% "
# . "Planning: %3.0f %% "
# . "(perl says %3.0f %%)"
. " exec avg:%5.2f"
. (" "x10)
. "%-70s"
. " -- %dx"
. "\n"
, ($port == $PGPORT_VANILLA ? "unpatched" : "patched" )
, $port
, $rowcount / 1_000_000
, (average($ne) * 100) / average($oe) # Pg reported 'Executing Time'
# , (average($np) * 100) / average($op) # Pg reported 'Planning Time'
# , (average($nd) * 100) / average($od) # perl Time::HiRes
, average($ne)
, $sql_display
, $SQL_REPEAT - 1
) ;
}
sub average {
my ($rd) = @_;
my $total = 0;
for my $elt (@$rd) {
$total += $elt;
}
($total / scalar(@$rd));
}
sub sql_select_only_key_columns {
my ($table, $rcond) = @_;
my @cols = ("c1", "c2");
sql_select($table, \@cols, $rcond);
# my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select c1, c2 from $table $where" );
}
sub sql_select_all_columns {
my ($table, $rcond) = @_;
my @cols = ("*");
sql_select($table, \@cols, $rcond);
# my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select * from $table $where" );
}
sub sql_select_all_columns_query_both_key_and_included_col {
# * Select all columns. Query on both key and included columns.
# * 1st uses IndexScan to recheck qual on c3 and get c3,c4.
# * 2nd uses IndexOnlyScan. It's faster. */
my ($table, $rcond) = @_;
my @cols = ("*");
sql_select($table, \@cols, $rcond);
## explain analyze select * from $oldt where c1<10000 and c3<20;
# my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select * from $table $where" )
}
sub sql_select_only_key_columns_query_both_key_and_included_col {
# * Select only key columns. Query on both key and included columns.
# * 1st uses IndexScan to recheck qual on c3.
# * 2nd uses IndexOnlyScan. It's faster.
my ($table, $rcond) = @_;
my @cols = ("c1", "c2");
sql_select($table, \@cols, $rcond);
## explain analyze select c1, c2 from $oldt where c1<10000 and c3<20;
# my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select c1, c2 from $table $where" )
}
sub sql_select {
my ($table, $rcols, $rcond) = @_;
my $slist = join(", ", @$rcols);
my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select $slist from $table $where" );
\( "explain (analyze) select $slist from $table $where" );
}
sub run {
my ($dbh, $repeat, $sql) = @_;
my ($d,$p,$e) = run_sql_series($dbh, $sql, $repeat);
# display_result($rowcount, $od, $op, $oe, $nd, $np, $ne);
}
sub run_sql_series {
my ($dbh, $sql, $repeat) = @_;
my @d = (); # duration
my @p = (); # planning time
my @e = (); # executing time
# my $rowcount = $size * size_unit();
for my $x (0 .. $repeat) {
next if ($x == 0);
my ($d, $p, $e) = run_sql($dbh, $sql);
# push @d, $d; # duration (perl Time::HiRes)
# push @p, $p; # Planning Time
push @e, $e; # Executing Time
}
(\@d, \@p, \@e);
}
sub run_sql {
my ($dbh, $rsql) = @_;
my ($pt, $et);
my ($is, $ios) = (0, 0);
my $print = 0;
my $t0 = [gettimeofday];
my $sth = $dbh->prepare( $$rsql );
my $rc = $sth->execute();
while (my $rrow = $sth->fetchrow_arrayref) {
if ($print) {print $rrow->[0], "\n"; }
if ( $rrow->[0] =~ /^Planning Time: ([0-9.]+) ms/ ) {$pt = $1; }
elsif( $rrow->[0] =~ /^Execution Time: ([0-9.]+) ms/ ) {$et = $1; }
# if ( $rrow->[0] =~ /^Index Only Scan using / ) { $ios ++ }
# if ( $rrow->[0] =~ /^Index Scan using/ ) { $is ++ }
}
if ($print) { print "\n"; }
my $d = tv_interval($t0, [gettimeofday]);
($d, $pt, $et);
}
sub rowcount_to_tables {
my ($rowcount) = @_;
my $name_num = sprintf( "%12d", $rowcount );
$name_num =~ s{ }{_}g;;
my $oldt = "ot0" . $name_num;
my $newt = "nt0" . $name_num;
($oldt, $newt); # return the 2 table names
}
sub create_tables {
my ($dbh, $port, $rowcount, $overwrite) = @_;
my $name_num = sprintf( "%12d", $rowcount );
$name_num =~ s{ }{_}g;;
# table names:
my $oldt = "ot0" . $name_num;
my $newt = "nt0" . $name_num;
my $cnt_o = $dbh->selectrow_arrayref( "select count(*) from pg_class where relname = '$oldt'" )->[0];
my $cnt_n = $dbh->selectrow_arrayref( "select count(*) from pg_class where relname = '$newt'" )->[0];
if ($cnt_o == 1 && $cnt_n == 1) {
if ($overwrite) {
$dbh->do("drop table if exists $oldt;");
$dbh->do("drop table if exists $newt;");
}
else {
return 0;
}
}
printf( "port %5d create table for %3d rows -> %s\n", $port, $rowcount, $newt);
$dbh->do("
create table $oldt (c1 int, c2 int, c3 int, c4 int);
insert into $oldt (select x, 2*x, 3*x, 4 from generate_series(1, $rowcount) as x);
create unique index ${oldt}unique_idx on $oldt using btree (c1, c2);
");
$dbh->do("vacuum analyze $oldt;");
$dbh->do("
create table $newt (c1 int, c2 int, c3 int, c4 int);
insert into $newt (select x, 2*x, 3*x, 4 from generate_series(1, $rowcount) as x);
");
if ( $port == $PGPORT_COVERING_INDEXES) {
$dbh->do("create unique index ${newt}uniqueinclude_idx on $newt using btree (c1, c2) include (c3, c4)");
}
else {
$dbh->do("create unique index ${newt}unique_idx on $newt using btree (c1, c2)");
}
$dbh->do("vacuum analyze $newt;");
}
sub check_debug_state {
my ($dbh) = @_;
my $debug = $dbh->selectrow_arrayref( "select current_setting('debug_assertions')")->[0];
my $port = $dbh->selectrow_arrayref( "select current_setting('port')" )->[0];
if ($debug ne 'off') {
die "Bailing out (during performance tests, debug_assertions should be off)\n";
}
$port;
}
sub connectdb_covering_indexes {
my $dbh ;
my $port = $PGPORT_COVERING_INDEXES;
my $dsn = "dbi:Pg:dbname=postgres;port=$port;application_name=test_include2.pl;";
eval {
$dbh = DBI->connect($dsn, undef, undef, {RaiseError=>1, PrintError=>0, } ) ;
};
if ($@) {
print "error while connecting to the database on port [$port] - $!\n";
exit(1);
}
$dbh;
}
sub connectdb_vanilla {
my $dbh ;
my $port = $PGPORT_VANILLA;
my $dsn = "dbi:Pg:dbname=postgres;port=$port;application_name=test_include2.pl;";
eval {
$dbh = DBI->connect($dsn, undef, undef, {RaiseError=>1, PrintError=>0, } ) ;
};
if ($@) {
print "error while connecting to the database on port [$port] - $!\n";
exit(1);
}
$dbh;
}