HI, this is the test case:Use stap track pg_clog/* fsync call.
[root@digoal ~]# cat trc.stp
global f_start[999999]
probe
process("/opt/pgsql/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c").call
{
f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms()
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1),
gettimeofday_ms(), pp(), $$parms$$)
# printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(),
pid(), tid(), cpu()], pp() )
}
probe
process("/opt/pgsql/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c").return
{
t=gettimeofday_ms()
a=execname()
b=cpu()
c=pid()
d=pp()
e=tid()
if (f_start[a,c,e,b]) {
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t -
f_start[a,c,e,b], d, $return$$)
# printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d)
}
}
probe
process("/opt/pgsql/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").call
{
f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms()
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1),
gettimeofday_ms(), pp(), $$parms$$)
# printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(),
pid(), tid(), cpu()], pp() )
}
probe
process("/opt/pgsql/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").return
{
t=gettimeofday_ms()
a=execname()
b=cpu()
c=pid()
d=pp()
e=tid()
if (f_start[a,c,e,b]) {
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t -
f_start[a,c,e,b], d, $return$$)
# printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d)
}
}
probe
process("/opt/pgsql/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").call
{
f_start[execname(), pid(), tid(), cpu()] = gettimeofday_ms()
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1),
gettimeofday_ms(), pp(), $$parms$$)
# printf("%s -> time:%d, pp:%s\n", thread_indent(1), f_start[execname(),
pid(), tid(), cpu()], pp() )
}
probe
process("/opt/pgsql/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c").return
{
t=gettimeofday_ms()
a=execname()
b=cpu()
c=pid()
d=pp()
e=tid()
if (f_start[a,c,e,b]) {
printf("%s <- time:%d, pp:%s, par:%s\n", thread_indent(-1), t -
f_start[a,c,e,b], d, $return$$)
# printf("%s <- time:%d, pp:%s\n", thread_indent(-1), t - f_start[a,c,e,b], d)
}
}
Use pgbench like this:
postgres@digoal-> cat 7.sql
select txid_current();
postgres@digoal-> pgbench -M prepared -n -r -P 1 -f ./7.sql -c 1 -j 1 -T 100000
About 32K tps.
progress: 240.0 s, 31164.4 tps, lat 0.031 ms stddev 0.183
progress: 241.0 s, 33243.3 tps, lat 0.029 ms stddev 0.127
progress: 242.0 s, 32567.3 tps, lat 0.030 ms stddev 0.179
progress: 243.0 s, 33656.6 tps, lat 0.029 ms stddev 0.038
progress: 244.0 s, 33948.1 tps, lat 0.029 ms stddev 0.021
progress: 245.0 s, 32996.8 tps, lat 0.030 ms stddev 0.046
progress: 246.0 s, 34156.7 tps, lat 0.029 ms stddev 0.015
progress: 247.0 s, 33259.5 tps, lat 0.029 ms stddev 0.074
progress: 248.0 s, 32979.6 tps, lat 0.030 ms stddev 0.043
progress: 249.0 s, 32892.6 tps, lat 0.030 ms stddev 0.039
progress: 250.0 s, 33090.7 tps, lat 0.029 ms stddev 0.020
progress: 251.0 s, 33238.3 tps, lat 0.029 ms stddev 0.017
progress: 252.0 s, 32341.3 tps, lat 0.030 ms stddev 0.045
progress: 253.0 s, 31999.0 tps, lat 0.030 ms stddev 0.167
progress: 254.0 s, 33332.6 tps, lat 0.029 ms stddev 0.056
progress: 255.0 s, 30394.6 tps, lat 0.032 ms stddev 0.027
progress: 256.0 s, 31862.7 tps, lat 0.031 ms stddev 0.023
progress: 257.0 s, 31574.0 tps, lat 0.031 ms stddev 0.112
Trace the PID:
postgres@digoal-> ps -ewf|grep postgres
postgres 2921 1883 29 09:37 pts/1 00:00:05 pgbench -M prepared -n -r -P 1
-f ./7.sql -c 1 -j 1 -T 100000
postgres 2924 1841 66 09:37 ? 00:00:13 postgres: postgres postgres
[local] SELECT
[root@digoal ~]# stap -vp 5 -DMAXSKIPPED=9999999 -DSTP_NO_OVERLOAD
-DMAXTRYLOCK=100 ./trc.stp -x 2924 >./stap.log 2>&1
0 postgres(2924): -> time:1441503927731,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call,
par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960,
.Dir="pg_clog"} pageno=12350 slotno=10 fdata=ERROR
31 postgres(2924): -> time:1441503927731,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call,
par:fileName="pg_clog/0181" fileFlags=66 fileMode=384
53 postgres(2924): <- time:0,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return,
par:14
102 postgres(2924): -> time:1441503927731,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call,
par:fd=14
1096 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return,
par:0
1113 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return,
par:'\001'
1105302 postgres(2924): -> time:1441503928836,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call,
par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960,
.Dir="pg_clog"} pageno=12351 slotno=11 fdata=ERROR
1105329 postgres(2924): -> time:1441503928836,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call,
par:fileName="pg_clog/0181" fileFlags=66 fileMode=384
1105348 postgres(2924): <- time:0,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return,
par:14
1105405 postgres(2924): -> time:1441503928836,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call,
par:fd=14
1106440 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return,
par:0
1106452 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return,
par:'\001'
2087891 postgres(2924): -> time:1441503929819,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").call,
par:ctl={.shared=0x7f74a9fe39c0, .do_fsync='\001', .PagePrecedes=0x4b1960,
.Dir="pg_clog"} pageno=12352 slotno=12 fdata=ERROR
2087917 postgres(2924): -> time:1441503929819,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").call,
par:fileName="pg_clog/0182" fileFlags=66 fileMode=384
2087958 postgres(2924): <- time:0,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("OpenTransientFile@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:1710").return,
par:14
2088013 postgres(2924): -> time:1441503929819,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").call,
par:fd=14
2089250 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("pg_fsync@/opt/soft_bak/postgresql-9.4.4/src/backend/storage/file/fd.c:315").return,
par:0
2089265 postgres(2924): <- time:1,
pp:process("/opt/pgsql9.4.4/bin/postgres").function("SlruPhysicalWritePage@/opt/soft_bak/postgresql-9.4.4/src/backend/access/transam/slru.c:699").return,
par:'\001'
fsync interval about 1 sec, every 32KB xacts, per CLOG PAGE.
postgres=# select 1441503928836-1441503927731;
?column?
----------
1105
(1 row)
postgres=# select 1441503929819-1441503928836;
?column?
----------
983
(1 row)
------------------------------------------------------------------发件人:Amit
Kapila <[email protected]>发送时间:2015年9月8日(星期二) 15:37收件人:周正中(德歌)
<[email protected]>抄 送:Andres Freund <[email protected]>,张广舟(明虚)
<[email protected]>,pgsql-hackers
<[email protected]>,范孝剑(康贤) <[email protected]>,曾文旌(义从)
<[email protected]>,窦贤明(执白) <[email protected]>,萧少聪(铁庵)
<[email protected]>,陈新坚(惧留孙) <[email protected]>主 题:Re:
[HACKERS] 答复:[HACKERS] about fsync in CLOG buffer write
On Sun, Sep 6, 2015 at 7:34 AM, 周正中(德歌) <[email protected]> wrote:
>On 2015-09-10 19:39:59 +0800, 张广舟(明虚) wrote:
>> We found there is a fsync call when CLOG buffer
>> is written out in SlruPhysicalWritePage(). It is often called when a backend
>> needs to check transaction status with SimpleLruReadPage().
>That's when there's not enough buffers available some other, and your
>case dirty, needs to be written out.
>You could try increasing the max (32) in CLOGShmemBuffers() further.I think
>increasing
CLOGShmemBuffers have no help for our case, because PG will call pg_fsync once
for every CLOG PAGE(BLOCKSZ) when ExtendCLOG。
Ideally that should not happen every time, CLOG page is written onlywhen there
is no non-dirty CLOG buffer to accommodate a new pageduring ExtendCLOG. I
think it will happen only if there is no flush ofCLOG pages by Checkpoint or by
any other process trying to committhe transaction during last 32 pages worth of
transactions (which is4 (TRANS_PER_BYTE) * 8192 (BLKSIZE) * 32 (CLOG_BUFFERS)).
What kind of workload you are using, can you once try by commentingthe sync
call and see if there is any gain (I understand that is not right,but at least
we will come to know how much we can save by moving sucha sync to checkpoint).
With Regards,
Amit Kapila.
EnterpriseDB: http://www.enterprisedb.com