On 07/13/2011 08:17 AM, Tom Lane wrote:
"Kevin Grittner"<kevin.gritt...@wicourts.gov>  writes:
...  Jeff does raise a good point, though -- it seems odd
that WAL-logging of this pruning would need to be synchronous.
Yeah, we need to get to the bottom of that.  If there's enough
shared_buffer space then it shouldn't be.
This thread has gotten long, let me try to compile all the relevant information in one email.

\d test
            Table "lars.test"
    Column    |     Type      | Modifiers
--------------+---------------+-----------
 tenant       | character(15) |
 created_by   | character(15) |
 created_date | date          |
Indexes:
    "i1" btree (tenant)
    "i11" btree (created_by)

-- Table is populated like this:
------------------------------------
create table test(tenant char(15), created_by char(15), created_date date);
insert into test values('x', 'y','2011-6-30');
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test; -- 256k rows
update test set tenant = lpad((random()*10000)::int::text,15,'0'), created_by = lpad((random()*10000)::int::text,15,'0');
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test; -- 32m rows
create index i1 on test(tenant);
create index i11 on test(created_by);
vacuum analyze;

-- I doubt it needs that many rows.

=> SELECT
   'version'::text AS "name",
   version() AS "current_setting"
 UNION ALL
 SELECT
   name,current_setting(name)
 FROM pg_settings
 WHERE NOT source='default' AND NOT name IN
   ('config_file','data_directory','hba_file','ident_file',
   'log_timezone','DateStyle','lc_messages','lc_monetary',
   'lc_numeric','lc_time','timezone_abbreviations',
   'default_text_search_config','application_name',
   'transaction_deferrable','transaction_isolation',
   'transaction_read_only');

name | current_setting
------------------------------+------------------------------------------------------------------------------------------------------------------
version | PostgreSQL 9.1beta2 on x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.4.4 20100726 (Red Hat 4.4.4-13), 64-bit
 bgwriter_delay               | 10ms
 bgwriter_lru_maxpages        | 1000
 checkpoint_completion_target | 0.9
 checkpoint_segments          | 128
 client_encoding              | UTF8
 effective_cache_size         | 64GB
 lc_collate                   | en_US.UTF-8
 lc_ctype                     | en_US.UTF-8
 log_checkpoints              | on
 log_line_prefix              | %m
 maintenance_work_mem         | 2GB
 max_connections              | 100
 max_stack_depth              | 2MB
 server_encoding              | UTF8
 shared_buffers               | 20GB
 TimeZone                     | UTC
 wal_buffers                  | 16MB
 work_mem                     | 1GB
(19 rows)


-- Now:
----------
=> select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
 count
-------
  3712
(1 row)

=> SELECT c.relname, isdirty, count(*) * 8192 / 1024/1024 AS buffers
FROM pg_buffercache b, pg_class c
WHERE b.relfilenode = pg_relation_filenode(c.oid)
AND b.reldatabase IN (0, (SELECT oid FROM pg_database WHERE datname = current_database()))
GROUP BY c.relname,isdirty
ORDER BY 3 DESC
LIMIT 6;

            relname            | isdirty | buffers
-------------------------------+---------+---------
 test                          | t       |      14
 pg_opclass_oid_index          | f       |       0
 pg_rewrite                    | f       |       0
 i11                           | t       |       0
 pg_rewrite_rel_rulename_index | f       |       0
 pg_constraint                 | f       |       0

-- Just started the server, no nothing else is cached, yet

-- it doesn't matter if that update is executed by the same or another backend. => update test set created_by = '000000000000001' where tenant = '000000000000001';
UPDATE 3712
=> select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
 count
-------
  3712
(1 row)

strace now shows:
-------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579341, 854669}, NULL) = 0
lseek(38, 0, SEEK_END)                  = 187465728
lseek(41, 0, SEEK_END)                  = 115564544
lseek(43, 0, SEEK_END)                  = 101040128
lseek(38, 0, SEEK_END)                  = 187465728
brk(0x1a85000)                          = 0x1a85000
brk(0x1a5d000)                          = 0x1a5d000
lseek(52, 12443648, SEEK_SET)           = 12443648
write(52, "f\320\1\0\1\0\0\0\276\0\0\0\0\340\275\0016\0\0\0S\0\0\0\4\0cY\3\0\0\0"..., 122880) = 122880
fdatasync(52)                           = 0
semop(688135, {{9, 1, 0}}, 1)           = 0
gettimeofday({1310579341, 889163}, NULL) = 0
sendto(5, "\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0<\0\2\0"..., 232, 0, NULL, 0) = 232 sendto(6, "T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"..., 66, 0, NULL, 0) = 66

-- fd 52 was opened earlier to the WAL file in pg_xlog.

=> select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
  count
-------
  3712
(1 row)

now strace shows:
-------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579380, 862637}, NULL) = 0
lseek(38, 0, SEEK_END)                  = 187465728
lseek(41, 0, SEEK_END)                  = 115564544
lseek(43, 0, SEEK_END)                  = 101040128
lseek(38, 0, SEEK_END)                  = 187465728
gettimeofday({1310579380, 868149}, NULL) = 0
sendto(5, "\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0@\0\2\0"..., 232, 0, NULL, 0) = 232 sendto(6, "T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"..., 66, 0, NULL, 0) = 66

=> update test set created_by = '000000000000001' where tenant = '000000000000001';
UPDATE 3712
=> select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
  count
-------
  3712
(1 row)

strace again indicates that a WAL log is written by the backend by the select:
---------------------------------------------------------------------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579663, 890641}, NULL) = 0
lseek(38, 0, SEEK_END)                  = 187596800
lseek(41, 0, SEEK_END)                  = 115638272
lseek(43, 0, SEEK_END)                  = 101122048
lseek(38, 0, SEEK_END)                  = 187596800
brk(0x1a85000)                          = 0x1a85000
brk(0x1a5d000)                          = 0x1a5d000
semop(688135, {{3, -1, 0}}, 1)          = 0
lseek(52, 10223616, SEEK_SET)           = 10223616
write(52, "f\320\1\0\1\0\0\0\276\0\0\0\0\0\234\2\16\0\0\0C(\0\0\1\0\0\0~\0\177\0"..., 16384) = 16384
fdatasync(52)                           = 0
gettimeofday({1310579663, 921932}, NULL) = 0
sendto(5, "\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0?\0\2\0"..., 232, 0, NULL, 0) = 232 sendto(6, "T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"..., 66, 0, NULL, 0) = 66

=> explain (analyze on, buffers on) select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
                                                       QUERY PLAN

--------------------------------------------------------------------------------
-----------------------------------------
Aggregate (cost=12284.65..12284.66 rows=1 width=0) (actual time=9.150..9.150 r
ows=1 loops=1)
   Buffers: shared hit=1976
-> Bitmap Heap Scan on test (cost=91.78..12276.35 rows=3319 width=0) (actua
l time=2.338..6.866 rows=3712 loops=1)
         Recheck Cond: (tenant = '000000000000001'::bpchar)
         Filter: (created_date = '2011-06-30'::date)
         Buffers: shared hit=1976
-> Bitmap Index Scan on i1 (cost=0.00..90.95 rows=3319 width=0) (actu
al time=2.063..2.063 rows=15179 loops=1)
               Index Cond: (tenant = '000000000000001'::bpchar)
               Buffers: shared hit=98
 Total runtime: 9.206 ms
(10 rows)

=> update test set created_by = '000000000000001' where tenant = '000000000000001';
UPDATE 3712
=> explain (analyze on, buffers on) select count(*) from test where tenant = '000000000000001' and created_date = '2011-6-30';
                                                       QUERY PLAN

--------------------------------------------------------------------------------
-----------------------------------------
Aggregate (cost=12284.68..12284.69 rows=1 width=0) (actual time=30.738..30.739
 rows=1 loops=1)
   Buffers: shared hit=2001
-> Bitmap Heap Scan on test (cost=91.78..12276.38 rows=3319 width=0) (actua
l time=2.589..28.361 rows=3712 loops=1)
         Recheck Cond: (tenant = '000000000000001'::bpchar)
         Filter: (created_date = '2011-06-30'::date)
         Buffers: shared hit=2001
-> Bitmap Index Scan on i1 (cost=0.00..90.95 rows=3319 width=0) (actu
al time=2.301..2.301 rows=17123 loops=1)
               Index Cond: (tenant = '000000000000001'::bpchar)
               Buffers: shared hit=107
 Total runtime: 30.785 ms
(10 rows)

----

There seems to be definitely something funky going on. Since created_by is indexed it shouldn't do any HOT logic.

Is there any other information that I can provide? I'm happy to recompile with a patch applied, etc.

Thanks.

-- Lars


--
Sent via pgsql-performance mailing list (pgsql-performance@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance

Reply via email to