I've recovered from messing up.

<snip>
> Please let me have a bit of time to diagnose this. 

I was completely messed up and walking on the wrong way. I looked
into the vacuum for UPDATEs, not DELETE's so it's quite resonable
to have such results.

The renewed test script attached shows the verbose output of
vacuum after the deletes. I had following output from it.

# I belive this runs for you..

| INFO: "t": found 989999 removable, 110 nonremovable row
|       versions in 6308 out of 10829 pages

On such a case of partially-scanned, lazy_scan_heap() tries to
estimate resulting num_tuples in vac_estimate_reltuples()
assuming the uniformity of tuple density, which failes for such a
a strong imbalance made by bulk updates.

Do you find any differences between what you will have and the
following I had?

| $ ./vactest.sh
| ### test1 ratio = 0.4
| INFO:  vacuuming "public.t"
| INFO:  "t": removed 399999 row versions in 2549 pages
| INFO:  "t": found 399999 removable, 194 nonremovable row versions in 2549 out 
of 10829 pages
| DETAIL:  0 dead row versions cannot be removed yet.
| There were 0 unused item pointers.
| 0 pages are entirely empty.
| CPU 0.00s/0.04u sec elapsed 0.04 sec.
| ### test1 ratio = 0.99
| INFO:  vacuuming "public.t"
| INFO:  "t": removed 989999 row versions in 6308 pages
| INFO:  "t": found 989999 removable, 110 nonremovable row versions in 6308 out 
of 10829 pages
| DETAIL:  0 dead row versions cannot be removed yet.
| There were 93 unused item pointers.
| 0 pages are entirely empty.
| CPU 0.00s/0.11u sec elapsed 0.24 sec.
| INFO:  "t": truncated 10829 to 6370 pages
| DETAIL:  CPU 0.00s/0.00u sec elapsed 0.01 sec.
| ### test1 ratio = 1.00
| INFO:  vacuuming "public.t"
| INFO:  "t": removed 999999 row versions in 6371 pages
| INFO:  "t": found 999999 removable, 1 nonremovable row versions in 6371 out 
of 10829 pages
| DETAIL:  0 dead row versions cannot be removed yet.
| There were 93 unused item pointers.
| 0 pages are entirely empty.
| CPU 0.00s/0.11u sec elapsed 0.20 sec.
| INFO:  "t": truncated 10829 to 6370 pages
| DETAIL:  CPU 0.01s/0.00u sec elapsed 0.27 sec.
|  # | del% | ## | pages | n_live_tup | tups est | tups real |  est/real  | 
bufs 
| 
---+------+----+-------+------------+----------+-----------+------------+------
|  1 |  0.4 |  1 | 10829 |     600001 |   764808 |    600001 |      1.275 | 2549
|  1 | 0.99 |  1 |  6370 |      10001 |   417600 |     10001 |     41.756 | 6308
|  1 |    1 |  1 |  6370 |     411673 |   411673 |         1 | 411673.000 | 6371
| (3 rows)

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
#! /bin/bash

dbname="postgres"
function insert_result() {
        psql ${dbname} -f - > /dev/null <<EOF
insert into result select $1, $2, $3, c.relpages, s.n_live_tup, c.reltuples, 
(select count(*) from t), reltuples::float / (select count(*) from t) from 
pg_stat_user_tables s, pg_class c where s.relname = 't' and c.relname = 't';
EOF
}

function vac_with_bufs() {
        local f=`mktemp vactest_XXXX`
        psql ${dbname} -c "vacuum verbose t" 2>>$f 1>/dev/null
        cat $f 1>&2
        cat $f | egrep "INFO: *\"t\": found " | sed -e 's/^.* versions in 
\([0-9]*\) .*$/\1/'
        rm $f
}

function update_result_bufs() {
        local test_no=$1
        local delratio=$2
        local vac_no=$3
        local bufs=$4

        psql ${dbname} -c "update result set bufs=${bufs} where \"#\"=$test_no 
and \"del%\"=$delratio and \"##\"=$vac_no" >/dev/null
}

function store_result() {
        local test_no=$1
        local delratio=$2
        scanned_bufs=`vac_with_bufs`
        insert_result $test_no $delratio 1
        update_result_bufs $test_no $delratio 1 $scanned_bufs
#       scanned_bufs=`vac_with_bufs`
#       insert_result $test_no $delratio 2
#       update_result_bufs $test_no $delratio 2 $scanned_bufs
}


function test1() {
        local delratio=$1

        echo "### test1 ratio = $delratio"
        psql ${dbname} -f - > /dev/null <<EOF
drop table if exists t;
create table t (a int, b int, c int, d int default 0, e int default 0, f int 
default 0);
insert into t (select a, (random() * 100000)::int from generate_series((select 
count(*) from t) + 1, $nrows) a);
update t set b = b + 1 where a <  (select count(*) from t) * 0.7;
vacuum t;
delete from t where a < (select count(*) from t) * $delratio
EOF
        store_result 1 $delratio
}

function test2() {
        local delratio=$1

        echo "### test2 ratio = $delratio"
        psql ${dbname} -f - > /dev/null <<EOF
drop table if exists t;
create table t (a int, b text);
insert into t (select a, 'abcdefg' from generate_series((select count(*) from 
t) + 1, $nrows) a);
update t set b = repeat('abcdefghij', 250) where a <  (select count(*) from t) 
* 0.7;
vacuum t;
delete from t where a < (select count(*) from t) * $delratio;
EOF
        store_result 2 $delratio
}

psql ${dbname} -f - > /dev/null <<EOF
drop table if exists result;
create table result ("#" int, "del%" float, "##" int, pages int, n_live_tup 
int, "tups est" int, "tups real" int, "est/real" numeric(10, 3), bufs int 
default 0);
EOF
nrows=1000000
# test1 0.1
# test1 0.2
# test1 0.3
test1 0.4
# test1 0.5
# test1 0.6
# test1 0.7
# test1 0.8
# test1 0.9
# test1 0.95
test1 0.99
test1 1.00

# nrows=100000
# test2 0.1
# test2 0.2
# test2 0.3
# test2 0.4
# test2 0.5
# test2 0.6
# test2 0.7
# test2 0.8
# test2 0.9
# test2 0.95
# test2 0.99
# test2 1.00

psql ${dbname} -c 'select * from result order by "#", "del%", "##"'
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to