Hi,
I built a gist index to try to test a theory in some other thread. For that
the indexes need to cover a lot of entries. With gist creating the index took
a long time, which made me strace the index build process.
Which lead me to notice this:
...
openat(AT_FDCWD, "base/16462/17747_fsm", O_RDWR|O_CLOEXEC) = -1 ENOENT (No such
file or directory)
lseek(56, 0, SEEK_END) = 40173568
pwrite64(56,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192,
40173568) = 8192
lseek(56, 0, SEEK_END) = 40181760
openat(AT_FDCWD, "base/16462/17747_fsm", O_RDWR|O_CLOEXEC) = -1 ENOENT (No such
file or directory)
lseek(56, 0, SEEK_END) = 40181760
pwrite64(56,
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 8192,
40181760) = 8192
lseek(56, 0, SEEK_END) = 40189952
lseek(56, 0, SEEK_END) = 40189952
openat(AT_FDCWD, "base/16462/17747_fsm", O_RDWR|O_CLOEXEC) = -1 ENOENT (No such
file or directory)
lseek(56, 0, SEEK_END) = 40189952
...
I.e. for every write we try and fail to open the FSM.
#0 __libc_open64 (file=0x30469c8 "base/16462/17747_fsm", oflag=524290) at
../sysdeps/unix/sysv/linux/open64.c:30
#1 0x0000000000cbe582 in BasicOpenFilePerm (fileName=0x30469c8
"base/16462/17747_fsm", fileFlags=524290, fileMode=384)
at
../../../../../home/andres/src/postgresql/src/backend/storage/file/fd.c:1134
#2 0x0000000000cbf057 in PathNameOpenFilePerm (fileName=0x30469c8
"base/16462/17747_fsm", fileFlags=524290, fileMode=384)
at
../../../../../home/andres/src/postgresql/src/backend/storage/file/fd.c:1620
#3 0x0000000000cbef88 in PathNameOpenFile (fileName=0x30469c8
"base/16462/17747_fsm", fileFlags=2)
at
../../../../../home/andres/src/postgresql/src/backend/storage/file/fd.c:1577
#4 0x0000000000cfeed2 in mdopenfork (reln=0x2fd5af8, forknum=FSM_FORKNUM,
behavior=2)
at
../../../../../home/andres/src/postgresql/src/backend/storage/smgr/md.c:649
#5 0x0000000000cfe20b in mdexists (reln=0x2fd5af8, forknum=FSM_FORKNUM) at
../../../../../home/andres/src/postgresql/src/backend/storage/smgr/md.c:181
#6 0x0000000000d015b3 in smgrexists (reln=0x2fd5af8, forknum=FSM_FORKNUM) at
../../../../../home/andres/src/postgresql/src/backend/storage/smgr/smgr.c:400
#7 0x0000000000cc5078 in fsm_readbuf (rel=0x7f5b87977f38, addr=...,
extend=false)
at
../../../../../home/andres/src/postgresql/src/backend/storage/freespace/freespace.c:571
#8 0x0000000000cc52d3 in fsm_search (rel=0x7f5b87977f38, min_cat=128 '\200')
at
../../../../../home/andres/src/postgresql/src/backend/storage/freespace/freespace.c:690
#9 0x0000000000cc47a5 in GetPageWithFreeSpace (rel=0x7f5b87977f38,
spaceNeeded=4096)
at
../../../../../home/andres/src/postgresql/src/backend/storage/freespace/freespace.c:141
#10 0x0000000000cc5e52 in GetFreeIndexPage (rel=0x7f5b87977f38) at
../../../../../home/andres/src/postgresql/src/backend/storage/freespace/indexfsm.c:40
#11 0x0000000000855d4a in gistNewBuffer (r=0x7f5b87977f38,
heaprel=0x7f5b87979688)
at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gistutil.c:831
#12 0x0000000000844541 in gistplacetopage (rel=0x7f5b87977f38, freespace=819,
giststate=0x2feae68, buffer=67261, itup=0x7ffd3ce86c30, ntup=1, oldoffnum=0,
newblkno=0x0, leftchildbuf=0, splitinfo=0x7ffd3ce86be0,
markfollowright=true, heapRel=0x7f5b87979688, is_build=true)
at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gist.c:353
#13 0x0000000000846263 in gistinserttuples (state=0x7ffd3ce86c90,
stack=0x2fde7e8, giststate=0x2feae68, tuples=0x7ffd3ce86c30, ntup=1,
oldoffnum=0,
leftchild=0, rightchild=0, unlockbuf=false, unlockleftchild=false) at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gist.c:1298
#14 0x00000000008461a7 in gistinserttuple (state=0x7ffd3ce86c90,
stack=0x2fde7e8, giststate=0x2feae68, tuple=0x2fde708, oldoffnum=0)
at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gist.c:1251
#15 0x0000000000845681 in gistdoinsert (r=0x7f5b87977f38, itup=0x2fde708,
freespace=819, giststate=0x2feae68, heapRel=0x7f5b87979688, is_build=true)
at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gist.c:887
#16 0x0000000000848c79 in gistBuildCallback (index=0x7f5b87977f38,
tid=0x2f31f74, values=0x7ffd3ce87060, isnull=0x7ffd3ce87040, tupleIsAlive=true,
state=0x7ffd3ce87340) at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gistbuild.c:863
#17 0x000000000087d605 in heapam_index_build_range_scan
(heapRelation=0x7f5b87979688, indexRelation=0x7f5b87977f38,
indexInfo=0x2fd9f50, allow_sync=true,
anyvisible=false, progress=true, start_blockno=0, numblocks=4294967295,
callback=0x848b6b <gistBuildCallback>, callback_state=0x7ffd3ce87340,
scan=0x2f31f18) at
../../../../../home/andres/src/postgresql/src/backend/access/heap/heapam_handler.c:1706
#18 0x0000000000847996 in table_index_build_scan (table_rel=0x7f5b87979688,
index_rel=0x7f5b87977f38, index_info=0x2fd9f50, allow_sync=true, progress=true,
callback=0x848b6b <gistBuildCallback>, callback_state=0x7ffd3ce87340,
scan=0x0)
at
../../../../../home/andres/src/postgresql/src/include/access/tableam.h:1794
#19 0x0000000000847da8 in gistbuild (heap=0x7f5b87979688, index=0x7f5b87977f38,
indexInfo=0x2fd9f50)
at
../../../../../home/andres/src/postgresql/src/backend/access/gist/gistbuild.c:313
#20 0x000000000094c945 in index_build (heapRelation=0x7f5b87979688,
indexRelation=0x7f5b87977f38, indexInfo=0x2fd9f50, isreindex=false,
parallel=true)
at
../../../../../home/andres/src/postgresql/src/backend/catalog/index.c:3021
#21 0x000000000094970f in index_create (heapRelation=0x7f5b87979688,
indexRelationName=0x2f2f798 "foo_i_idx1", indexRelationId=17747,
parentIndexRelid=0,
parentConstraintId=0, relFileNumber=0, indexInfo=0x2fd9f50,
indexColNames=0x2f2fc60, accessMethodId=783, tableSpaceId=0,
collationIds=0x2f32340,
opclassIds=0x2f32358, opclassOptions=0x2f32370, coloptions=0x2f32388,
stattargets=0x0, reloptions=0, flags=0, constr_flags=0,
allow_system_table_mods=false, is_internal=false,
constraintId=0x7ffd3ce876f4)
at
../../../../../home/andres/src/postgresql/src/backend/catalog/index.c:1275
The reason we reopen over and over is that we close the file in mdexist():
/*
* Close it first, to ensure that we notice if the fork has been
unlinked
* since we opened it. As an optimization, we can skip that in
recovery,
* which already closes relations when dropping them.
*/
if (!InRecovery)
mdclose(reln, forknum);
We call smgrexists as part of this code:
static Buffer
fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
...
/*
* If we haven't cached the size of the FSM yet, check it first. Also
* recheck if the requested block seems to be past end, since our cached
* value might be stale. (We send smgr inval messages on truncation,
but
* not on extension.)
*/
if (reln->smgr_cached_nblocks[FSM_FORKNUM] == InvalidBlockNumber ||
blkno >= reln->smgr_cached_nblocks[FSM_FORKNUM])
{
/* Invalidate the cache so smgrnblocks asks the kernel. */
reln->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
if (smgrexists(reln, FSM_FORKNUM))
smgrnblocks(reln, FSM_FORKNUM);
else
reln->smgr_cached_nblocks[FSM_FORKNUM] = 0;
}
Because we set the size to 0 if the the fork doesn't exist, we'll reenter
during the next call, and then do the same thing again.
I don't think this is a huge performance issue or anything, but somehow it
seems indicative of something being "wrong".
It seems likely we encounter this issue not just with gist, but I haven't
checked yet.
Greetings,
Andres Freund