Re: WIP: Covering + unique indexes. (the good and the bad)

Erik Rijkers Sat, 07 Apr 2018 04:59:02 -0700

On 2018-04-06 20:08, Alexander Korotkov wrote:


[0001-Covering-v15.patch]

After some more testing I notice there is also a down-side/slow-down tothis patch that is not so bad but more than negligible, and I don'tthink it has been mentioned (but I may have missed something in thisthread that's now been running for 1.5 year, not to mention thetangential btree-thread(s)).

I attach my test-program, which compares master (this morning) withcovered_indexes (warning: it takes a while to generate the used tables).


The test tables are created as:
  create table $t (c1 int, c2 int, c3 int, c4 int);

insert into $t (select x, 2*x, 3*x, 4 from generate_series(1,$rowcount) as x);create unique index ${t}uniqueinclude_idx on $t using btree (c1, c2)include (c3, c4);


or for HEAD, just:
  create unique index ${t}unique_idx on $t using btree (c1, c2);


Here is typical output (edited a bit to prevent email-mangling):

test1:

-- explain analyze select c1, c2 from nt0___100000000 where c1 < 10000-- 250xunpatched 6511: 100M rows Execution Time: (normal/normal) 98 % execavg: 2.44patched 6976: 100M rows Execution Time: (covered/normal) 108 % execavg: 2.67test1 patched /unpatched: 109.49 %


test4:

-- explain analyze select c1, c2 from nt0___100000000 where c1 < 10000and c3 < 20unpatched 6511: 100M rows Execution Time: (normal/normal) 95 % execavg: 1.56patched 6976: 100M rows Execution Time: (covered/normal) 60 % execavg: 0.95test4 patched /unpatched: 60.83 %

So the main good thing is that 60%, a good improvement -- but that ~109%(a slow-down) is also quite repeatable.

(there are a more goodies from the patch (like improved insert-speed)but I just wanted to draw attention to this particular slow-down too)

I took all timings from explain analyze versions of the statements, onthe assumption that that would be quite comparable to 'normal' querying.(please let me know if that introduces error).



# \dti+ nt0___1*
                                           List of relations

--------+----------------------------------+-------+----------+-----------------+--------

public | nt0___100000000 | table | aardvark || 4224 MBpublic | nt0___100000000uniqueinclude_idx | index | aardvark |nt0___100000000 | 3004 MB

(for what it's worth, I'm in favor of getting this patch into v11although I can't say I followed the technical details too much)



thanks,


Erik Rijkers

#!/bin/env perl
#!/opt/perl-5.26/bin/perl
use strict;
use warnings;
use DBI;
use Time::HiRes qw/tv_interval gettimeofday/;
use Getopt::Long;
$| = 1;
our $PGPORT_VANILLA          =  6511;
our $PGPORT_COVERING_INDEXES =  6976;
our $SQL_REPEAT = 251;

main();
exit;

sub size_unit { 
  1_000_000
}
sub main {

  my $size   = 100;     #    rowcount in millions; this $size variable determines the table used

  GetOptions ("size=i" => \$size)   or die("Error in command line arguments\n");

  my $dbh_patched  = connectdb_covering_indexes();
  my $dbh_vanilla  = connectdb_vanilla();
  my $port_patched = check_debug_state( $dbh_patched  );
  my $port_vanilla = check_debug_state( $dbh_vanilla );

  # create tables on patched instance
  for my $n (1, 10, 100) { # , 250 ) {
    my $rowcount = $n * size_unit();
    create_tables($dbh_patched, $port_patched, $rowcount, my $overwrite = 0);
  }

  # create tables on vanilla instance
  for my $n (1, 10, 100) { # , 250 ) {
    my $rowcount = $n * size_unit();
    create_tables($dbh_vanilla, $port_vanilla, $rowcount, my $overwrite = 0);
  }

# print sprintf("-- Perl %vd\n", $^V)
#    , "-- ", $dbh_vanilla->selectrow_arrayref( "select version()" )->[0], "\n"
#    , "-- ", $dbh_patched->selectrow_arrayref( "select version()" )->[0], "\n" ;

  my $c1     = 10000; ##  5000 + int(rand(5000)) + 1;
  my $c3     =    20; ##   20 + int(rand(  30)) + 1;

  #  $c1     =  5000; ##  5000 + int(rand(5000)) + 1;
  #  $c3     =    20; ##   20 + int(rand(  30)) + 1;

  # enable to vary WHERE-clause a little bit:
  if (0) {
     $c1     = 5000 + int(rand(5000)) + 1;
     $c3     =   20 + int(rand(  30)) + 1;
  }
  

  my $vanilla = test1($dbh_vanilla, $port_vanilla, $size, $c1);
  my $patched = test1($dbh_patched, $port_patched, $size, $c1);
  print " "x84, sprintf( "%6.2f %%  <- test1, patched / unpatched\n",  ((average($patched) * 100) / average($vanilla)) );

# test2($dbh_vanilla, $port_vanilla, $size, $c1);
# test2($dbh_patched, $port_patched, $size, $c1);

# test3($dbh_vanilla, $port_vanilla, $size, $c1, $c3);
# test3($dbh_patched, $port_patched, $size, $c1, $c3);

  $vanilla = test4($dbh_vanilla, $port_vanilla, $size, $c1, $c3);
  $patched = test4($dbh_patched, $port_patched, $size, $c1, $c3);
  print " "x84, sprintf( "%6.2f %%  <- test4, patched / unpatched\n",  ((average($patched) * 100) / average($vanilla)) );

  print "\n";

}
sub test1 {
  my ($dbh, $port, $size, $c1) = @_;
  my @cond = ();  # conditions
  push @cond, 'c1 < ' . $c1;
  my $rowcount = $size * size_unit();
  my ($ot, $nt) = rowcount_to_tables($rowcount);
  my $sql_ot = sql_select_only_key_columns($ot, \@cond);
  my $sql_nt = sql_select_only_key_columns($nt, \@cond);
  my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
  my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
  my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
  display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
  $ne;
}
sub test2 {
  my ($dbh, $port, $size, $c1) = @_;
  my @cond = ();  # conditions
  push @cond, 'c1 < ' . $c1;
  my $rowcount = $size * size_unit();
  my ($ot, $nt) = rowcount_to_tables($rowcount);
  my $sql_ot = sql_select_all_columns($ot, \@cond);
  my $sql_nt = sql_select_all_columns($nt, \@cond);
  my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
  my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
  my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
  display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
  $ne;
}
sub test3 {
  my ($dbh, $port, $size, $c1, $c3) = @_;
  my @cond = ();  # conditions
  push @cond, 'c1 < ' . $c1;
  push @cond, 'c3 < ' . $c3;
  my $rowcount = $size * size_unit();
  my ($ot, $nt) = rowcount_to_tables($rowcount);
  my $sql_ot = sql_select_all_columns_query_both_key_and_included_col($ot, \@cond);
  my $sql_nt = sql_select_all_columns_query_both_key_and_included_col($nt, \@cond);
  my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
  my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
  my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
  display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
  $ne;
}
sub test4 {
  my ($dbh, $port, $size, $c1, $c3) = @_;
  my @cond = ();  # conditions
  push @cond, 'c1 < ' . $c1;
  push @cond, 'c3 < ' . $c3;
  my $rowcount = $size * size_unit();
  my ($ot, $nt) = rowcount_to_tables($rowcount);
  my $sql_ot = sql_select_only_key_columns_query_both_key_and_included_col($ot, \@cond);
  my $sql_nt = sql_select_only_key_columns_query_both_key_and_included_col($nt, \@cond);
  my $sql_display = substr($$sql_nt, index($$sql_nt, "select"));
  my ($od,$op,$oe) = run_sql_series($dbh, $sql_ot, $SQL_REPEAT);
  my ($nd,$np,$ne) = run_sql_series($dbh, $sql_nt, $SQL_REPEAT);
  display_result($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display);
  $ne;
}
sub display_result {

  my ($port, $rowcount, $od, $op, $oe, $nd, $np, $ne, $sql_display) = @_;

  print sprintf( "%9s %4d: %3sM rows "
               . "Execution Time: "
               . ($port == $PGPORT_VANILLA ? " (normal/normal)" : "(covered/normal)" ) 
               . " %3.0f %%   "
           #   . "Planning: %3.0f %%   "
           #   . "(perl says %3.0f %%)"
               . " exec avg:%5.2f"
               . (" "x10)
               . "%-70s"
               . "  -- %dx"
               
               . "\n"
                      , ($port == $PGPORT_VANILLA ? "unpatched" : "patched" )
                      , $port
                      , $rowcount / 1_000_000
                      , (average($ne) * 100) / average($oe)  # Pg reported 'Executing Time'
            #         , (average($np) * 100) / average($op)  # Pg reported 'Planning Time'
            #         , (average($nd) * 100) / average($od)  # perl Time::HiRes
                      , average($ne)
                      , $sql_display
                      , $SQL_REPEAT - 1
                ) ;

}
sub average {
  my ($rd) = @_;
  my $total = 0;
  for my $elt (@$rd) { 
    $total += $elt;
  }
  ($total / scalar(@$rd));
}
sub sql_select_only_key_columns {
  my ($table, $rcond) = @_;
  my @cols = ("c1", "c2");
  sql_select($table, \@cols, $rcond);
#  my $where = "where " . join(" and ", @$rcond);
#  \( "explain (analyze,verbose,buffers,costs) select c1, c2 from $table $where" );
}
sub sql_select_all_columns {
  my ($table, $rcond) = @_;
  my @cols = ("*");
  sql_select($table, \@cols, $rcond);
#  my $where = "where " . join(" and ", @$rcond);
#  \( "explain (analyze,verbose,buffers,costs) select * from $table $where" );
}
sub sql_select_all_columns_query_both_key_and_included_col {

# * Select all columns. Query on both key and included columns.
# * 1st uses IndexScan to recheck qual on c3 and get c3,c4.
# * 2nd uses IndexOnlyScan. It's faster.  */

  my ($table, $rcond) = @_;
  my @cols = ("*");
  sql_select($table, \@cols, $rcond);
## explain analyze select * from $oldt where c1<10000 and c3<20;
 # my $where = "where " . join(" and ", @$rcond);
 # \( "explain (analyze,verbose,buffers,costs) select * from $table $where" )
}
sub sql_select_only_key_columns_query_both_key_and_included_col {
# * Select only key columns. Query on both key and included columns.
# * 1st uses IndexScan to recheck qual on c3.
# * 2nd uses IndexOnlyScan. It's faster.

  my ($table, $rcond) = @_;
  my @cols = ("c1", "c2");
  sql_select($table, \@cols, $rcond);
## explain analyze select c1, c2 from $oldt where c1<10000 and c3<20;
#  my $where = "where " . join(" and ", @$rcond);
#  \( "explain (analyze,verbose,buffers,costs) select c1, c2 from $table $where" )
}
sub sql_select {
  my ($table, $rcols, $rcond) = @_;
  my $slist = join(", ", @$rcols);
  my $where = "where " . join(" and ", @$rcond);
# \( "explain (analyze,verbose,buffers,costs) select $slist from $table $where" );
  \( "explain (analyze) select $slist from $table $where" );
}
sub run {
  my ($dbh, $repeat, $sql) = @_;
  my ($d,$p,$e) = run_sql_series($dbh, $sql, $repeat);
# display_result($rowcount, $od, $op, $oe, $nd, $np, $ne);
}
sub run_sql_series {
  my ($dbh, $sql, $repeat) = @_;
  my @d = (); # duration
  my @p = (); # planning time
  my @e = (); # executing time
# my $rowcount = $size * size_unit(); 
  for my $x (0 .. $repeat) {
    next if ($x == 0);
    my ($d, $p, $e) = run_sql($dbh, $sql);
  # push @d, $d;  #  duration (perl Time::HiRes) 
  # push @p, $p;  #  Planning Time
    push @e, $e;  #  Executing Time
  }
  (\@d, \@p, \@e);
}
sub run_sql {
  my ($dbh, $rsql) = @_;
  my ($pt, $et);
  my ($is, $ios) = (0, 0);
  my $print = 0;
  my $t0 = [gettimeofday];
  my $sth = $dbh->prepare( $$rsql );
  my $rc  = $sth->execute();
  while (my $rrow = $sth->fetchrow_arrayref) {
     if ($print) {print $rrow->[0], "\n"; }
     if   ( $rrow->[0] =~ /^Planning Time: ([0-9.]+) ms/  ) {$pt = $1; }
     elsif( $rrow->[0] =~ /^Execution Time: ([0-9.]+) ms/ ) {$et = $1; }
  #  if   ( $rrow->[0] =~ /^Index Only Scan using /  ) { $ios ++ }
  #  if   ( $rrow->[0] =~ /^Index Scan using/        ) { $is  ++ }
  }
  if ($print) { print "\n"; }
  my $d = tv_interval($t0, [gettimeofday]);
  ($d, $pt, $et);
}
sub rowcount_to_tables {
  my ($rowcount) = @_;
  my $name_num = sprintf( "%12d", $rowcount );
  $name_num =~ s{ }{_}g;;
  my $oldt = "ot0" . $name_num;
  my $newt = "nt0" . $name_num;
  ($oldt, $newt); # return the 2 table names
}
sub create_tables {

  my ($dbh, $port, $rowcount, $overwrite) = @_;

  my $name_num = sprintf( "%12d", $rowcount );
  $name_num =~ s{ }{_}g;;

  # table names:
  my $oldt = "ot0" . $name_num;
  my $newt = "nt0" . $name_num;

  my $cnt_o = $dbh->selectrow_arrayref( "select count(*) from pg_class where relname = '$oldt'" )->[0];
  my $cnt_n = $dbh->selectrow_arrayref( "select count(*) from pg_class where relname = '$newt'" )->[0];

  if ($cnt_o == 1 && $cnt_n == 1) {
    if ($overwrite) {
      $dbh->do("drop table if exists $oldt;");
      $dbh->do("drop table if exists $newt;");
    }
    else {
      return 0;
    }
  }

  printf( "port %5d  create table for %3d rows -> %s\n", $port, $rowcount, $newt);

  $dbh->do("
    create table $oldt (c1 int, c2 int, c3 int, c4 int);
    insert into $oldt (select x, 2*x, 3*x, 4 from generate_series(1, $rowcount) as x);
    create unique index ${oldt}unique_idx on $oldt using btree (c1, c2);
  ");
  $dbh->do("vacuum analyze $oldt;");

  $dbh->do("
    create table $newt (c1 int, c2 int, c3 int, c4 int);
    insert into $newt (select x, 2*x, 3*x, 4 from generate_series(1, $rowcount) as x);
  ");
  if ( $port == $PGPORT_COVERING_INDEXES) {
    $dbh->do("create unique index ${newt}uniqueinclude_idx on $newt using btree (c1, c2) include (c3, c4)");
  }
  else {
    $dbh->do("create unique index ${newt}unique_idx on $newt using btree (c1, c2)");
  }
  $dbh->do("vacuum analyze $newt;");
}
sub check_debug_state {
  my ($dbh) = @_;
  my $debug = $dbh->selectrow_arrayref( "select current_setting('debug_assertions')")->[0];
  my $port  = $dbh->selectrow_arrayref( "select current_setting('port')"            )->[0];
  if ($debug ne 'off') {
      die "Bailing out (during performance tests, debug_assertions should be off)\n"; 
  }
  $port;
}
sub connectdb_covering_indexes {
  my $dbh ;
  my $port = $PGPORT_COVERING_INDEXES;
  my $dsn = "dbi:Pg:dbname=postgres;port=$port;application_name=test_include2.pl;";
  eval {
    $dbh = DBI->connect($dsn, undef, undef, {RaiseError=>1, PrintError=>0, } ) ; 
  };
  if ($@) {
    print "error while connecting to the database on port [$port] - $!\n";
    exit(1);
  }
  $dbh;
}
sub connectdb_vanilla {
  my $dbh ;
  my $port = $PGPORT_VANILLA;
  my $dsn = "dbi:Pg:dbname=postgres;port=$port;application_name=test_include2.pl;";
  eval {
    $dbh = DBI->connect($dsn, undef, undef, {RaiseError=>1, PrintError=>0, } ) ; 
  };
  if ($@) {
    print "error while connecting to the database on port [$port] - $!\n";
    exit(1);
  }
  $dbh;
}

Re: WIP: Covering + unique indexes. (the good and the bad)

Reply via email to