> On Jul 20, 2021, at 11:28 AM, Tomas Vondra <[email protected]>
> wrote:
>
> Tomas Vondra
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
> <0001-Handling-Expr-op-Expr-clauses-in-extended-stats-20210720.patch>
Hi Tomas,
I tested this patch against master looking for types of clauses that uniformly
get worse with the patch applied. I found some.
The tests are too large to attach, but the scripts that generate them are not.
To perform the tests:
git checkout master
perl ./gentest.pl > src/test/regress/sql/gentest.sql
cat /dev/null > src/test/regress/expected/gentest.out
echo "test: gentest" >> src/test/regress/parallel_schedule
./configure && make && make check
cp src/test/regress/results/gentest.out
src/test/regress/expected/gentest.out
patch -p 1 <
0001-Handling-Expr-op-Expr-clauses-in-extended-stats-20210720.patch
make check
cat src/test/regress/regression.diffs | perl ./check.pl
This shows patterns of conditions that get worse, such as:
better:0, worse:80: A < B and A <> A or not A < A
better:0, worse:80: A < B and not A <= A or A <= A
better:0, worse:80: A < B or A = A
better:0, worse:80: A < B or A = A or not A >= A
better:0, worse:80: A < B or A >= A
better:0, worse:80: A < B or A >= A and not A <> A
better:0, worse:80: A < B or not A < A
better:0, worse:80: A < B or not A <> A
better:0, worse:80: A < B or not A <> A or A <= A
better:0, worse:80: A < B or not A >= A or not A < A
It seems things get worse when the conditions contain a column compared against
itself. I suspect that is being handled incorrectly.
#!/usr/bin/perl
use strict;
use warnings;
my ($query, $where, $before, $after, @before, @after);
my $better = 0;
my $worse = 0;
my %where = ();
my %gripe;
while (<>)
{
if (/from check_estimated_rows\('(.*)'\)/)
{
$query = $1;
if ($query =~ m/where (.*)$/)
{
$where = $1;
my %columns = map { $_ => 1 } ($where =~ m/(column_\d+)/g);
my @normal = ('A'..'Z');
my @columns = sort keys %columns;
for my $i (0..$#columns)
{
my $old = $columns[$i];
my $new = $normal[$i];
$where =~ s/\b$old\b/$new/g;
}
}
}
elsif (m/^-(\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+))\s*$/)
{
($before, @before) = ($1, $2, $3, $4);
}
elsif (m/^\+(\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+))\s*$/)
{
($after, @after) = ($1, $2, $3, $4);
$where{$where}->{better} ||= 0;
$where{$where}->{worse} ||= 0;
# Don't count the difference as meaningful unless we're more than 5 better or worse than before
if ($after[2] > 5 + $before[2])
{
$worse++;
$where{$where}->{worse}++;
}
elsif ($after[2] + 5 < $before[2])
{
$better++;
$where{$where}->{better}++;
}
if (!exists $gripe{$where} && $where{$where}->{better} == 0 && $where{$where}->{worse} > 50)
{
print "TERRIBLE:\n";
print "\tQUERY: $query\n";
print "\tBEFORE: $before\n";
print "\tAFTER: $after\n\n";
$gripe{$query} = 1;
}
}
}
foreach my $where (sort keys %where)
{
if ($where{$where}->{better} < $where{$where}->{worse})
{
print("better:", $where{$where}->{better}, ", worse:", $where{$where}->{worse}, ": $where\n");
}
}
print("\n\nTOTAL:\n");
print("\tbetter: $better\n");
print("\tworse: $worse\n");
#!/usr/bin/perl
use strict;
use warnings;
our ($tblnum, $tblname, $colnum, $colname);
# Generate the where clauses to be used on all tables
our (%wherepattern1, %wherepattern2, %wherepattern3);
my @ops = (">", "<", ">=", "<=", "=", "<>");
my @conj = ("and", "or", "and not", "or not");
for (1..100)
{
my $op1 = $ops[int(rand(@ops))];
my $op2 = $ops[int(rand(@ops))];
my $op3 = $ops[int(rand(@ops))];
my $conj1 = $conj[int(rand(@conj))];
my $conj2 = $conj[int(rand(@conj))];
$wherepattern1{"\%s $op1 \%s"} = 1;
$wherepattern2{"\%s $op1 \%s $conj1 \%s $op2 \%s"} = 1;
$wherepattern3{"\%s $op1 \%s $conj1 \%s $op2 \%s $conj2 \%s $op3 \%s"} = 1;
}
sub next_table {
$tblnum++;
$tblname = "table_$tblnum";
$colnum = 0;
$colname = "column_$colnum";
}
sub next_column {
$colnum++;
$colname = "column_$colnum";
}
for my $colcnt (2..10)
{
next_table();
print("CREATE TABLE $tblname (\n");
for (1..$colcnt-1)
{
next_column();
print("\t$colname INTEGER,\n");
}
next_column();
print("\t$colname INTEGER\n);\n");
print("INSERT INTO $tblname (SELECT ",
join(", ", map { "gs-$_" } (1..$colcnt)),
" FROM generate_series(1,100) gs);\n");
print("VACUUM FREEZE $tblname;\n");
for my $colmax (2..$colcnt)
{
print("CREATE STATISTICS ${tblname}_stats_${colmax} ON ",
join(", ", map { "column_$_" } (1..$colmax)),
" FROM $tblname;\n");
}
print("ANALYZE $tblname;\n");
}
# Restart the table sequence
$tblnum = 0;
for my $colcnt (2..10)
{
next_table();
for (1..100)
{
my $a = sprintf("column_%d", 1+int(rand($colcnt)));
my $b = sprintf("column_%d", 1+int(rand($colcnt)));
my $c = sprintf("column_%d", 1+int(rand($colcnt)));
foreach my $where1 (keys %wherepattern1)
{
my $whereclause1 = sprintf($where1, $a, $b);
print("
select actual, estimated, abs(actual - estimated) AS misestimate
from check_estimated_rows('select * from $tblname where $whereclause1');");
}
foreach my $where2 (keys %wherepattern2)
{
my $whereclause1 = sprintf($where2, $a, $b, $a, $c);
print("
select actual, estimated, abs(actual - estimated) AS misestimate
from check_estimated_rows('select * from $tblname where $whereclause1');");
}
foreach my $where3 (keys %wherepattern3)
{
my $whereclause1 = sprintf($where3, $a, $b, $a, $c, $c, $a);
print("
select actual, estimated, abs(actual - estimated) AS misestimate
from check_estimated_rows('select * from $tblname where $whereclause1');");
}
}
}
—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company