On Tue, Apr 13, 2021 at 04:17:39PM +0000, Tom Lane wrote:
> Avoid improbable PANIC during heap_update.
>
> heap_update needs to clear any existing "all visible" flag on
> the old tuple's page (and on the new page too, if different).
> Per coding rules, to do this it must acquire pin on the appropriate
> visibility-map page while not holding exclusive buffer lock;
> which creates a race condition since someone else could set the
> flag whenever we're not holding the buffer lock. The code is
> supposed to handle that by re-checking the flag after acquiring
> buffer lock and retrying if it became set. However, one code
> path through heap_update itself, as well as one in its subroutine
> RelationGetBufferForTuple, failed to do this. The end result,
> in the unlikely event that a concurrent VACUUM did set the flag
> while we're transiently not holding lock, is a non-recurring
> "PANIC: wrong buffer passed to visibilitymap_clear" failure.
>
Hi,
This doesn't look as improbable because I saw it at least 3 times with
v15beta4.
The first time I thought it was my fault, then I tried with a commit on
september 25 (didn't remember which exactly but that doesn't seems too
relevant).
Finally I saw it again in a build with TRACE_VISIBILITYMAP defined (the
same commit).
But I haven't see it anymore on rc1. Anyway I'm attaching the backtrace
(this is from the build with TRACE_VISIBILITYMAP), the query that was
running at the time was (no changes were made to quad_poly_tbl table
nor any indexes were added to this table):
"""
update public.quad_poly_tbl set
id = public.quad_poly_tbl.id
returning
public.quad_poly_tbl.id as c0
"""
--
Jaime Casanova
Director de Servicios Profesionales
SystemGuards - Consultores de PostgreSQL
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
set = {__val = {4194304, 139906277533872, 139906277529216,
139906277533872, 139906275702592, 0, 895, 94675498488722, 140729237540432,
281470681751485, 0, 0, 0, 0, 0, 0}}
pid = <optimized out>
tid = <optimized out>
ret = <optimized out>
#1 0x00007f3e77dfc535 in __GI_abort () at abort.c:79
save_stage = 1
act = {__sigaction_handler = {sa_handler = 0x3ff0000000000000,
sa_sigaction = 0x3ff0000000000000}, sa_mask = {__val = {0,
4609753056924675352, 0, 4728779608772575232, 0, 140729237540784,
94675496496005, 94675512751464, 94675512749920, 12884901888,
94675512751464, 94675512751440, 94675512749920, 140729237540832,
139906277529216, 139906277511840}}, sa_flags = 2011471149,
sa_restorer = 0x0}
sigs = {__val = {32, 0 <repeats 15 times>}}
#2 0x0000561b5b5335c7 in errfinish (filename=0x561b5b5c7180 "visibilitymap.c",
lineno=156,
funcname=0x561b5b5c74a0 <__func__.12294> "visibilitymap_clear") at
elog.c:675
edata = 0x561b5b8a8960 <errordata>
elevel = 23
oldcontext = 0x561b5d786c00
econtext = 0x0
__func__ = "errfinish"
#3 0x0000561b5af09ef7 in visibilitymap_clear (rel=0x561b5da3eb48, heapBlk=49,
buf=0, flags=3 '\003') at visibilitymap.c:156
__errno_location = <optimized out>
mapBlock = 0
mapByte = 12
mapOffset = 2
mask = 12 '\f'
map = 0x7f3e70fc7af8 "\200\326\r"
cleared = false
__func__ = "visibilitymap_clear"
#4 0x0000561b5aee6ea7 in heap_update (relation=0x561b5da3eb48,
otid=0x7ffe1436942a, newtup=0x561b5cdcf350, cid=0, crosscheck=0x0, wait=true,
tmfd=0x7ffe14369460, lockmode=0x7ffe14369394) at heapam.c:3913
result = TM_Ok
xid = 906880
hot_attrs = 0x561b5cdcf740
key_attrs = 0x0
id_attrs = 0x0
interesting_attrs = 0x561b5cdcf718
modified_attrs = 0x0
lp = 0x7f3e74a0fc18
oldtup = {t_len = 264, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 49},
ip_posid = 1}, t_tableOid = 17323, t_data = 0x7f3e74a11af8}
heaptup = 0x561b5cdcf350
old_key_tuple = 0x0
old_key_copied = false
page = 0x7f3e74a0fc00 "\215"
block = 49
mxact_status = MultiXactStatusNoKeyUpdate
buffer = 11159
newbuf = 3698
vmbuffer = 0
vmbuffer_new = 0
need_toast = false
newtupsize = 264
pagefree = 112
have_tuple_lock = false
iscombo = false
use_hot_update = false
key_intact = true
all_visible_cleared = true
all_visible_cleared_new = false
checked_lockers = false
locker_remains = false
id_has_external = false
xmax_new_tuple = 0
xmax_old_tuple = 906880
infomask_old_tuple = 0
infomask2_old_tuple = 0
infomask_new_tuple = 2048
infomask2_new_tuple = 0
__func__ = "heap_update"
#5 0x0000561b5aef562b in heapam_tuple_update (relation=0x561b5da3eb48,
otid=0x7ffe1436942a, slot=0x561b5d788a58, cid=0, snapshot=0x561b5c51be80,
crosscheck=0x0, wait=true, tmfd=0x7ffe14369460, lockmode=0x7ffe14369394,
update_indexes=0x7ffe14369391) at heapam_handler.c:327
shouldFree = false
tuple = 0x561b5cdcf350
result = 32766
#6 0x0000561b5b186b50 in table_tuple_update (rel=0x561b5da3eb48,
otid=0x7ffe1436942a, slot=0x561b5d788a58, cid=0, snapshot=0x561b5c51be80,
crosscheck=0x0, wait=true, tmfd=0x7ffe14369460, lockmode=0x7ffe14369394,
update_indexes=0x7ffe14369391)
at ../../../src/include/access/tableam.h:1511
No locals.
#7 0x0000561b5b18996d in ExecUpdateAct (context=0x7ffe14369430,
resultRelInfo=0x561b5d7871a8, tupleid=0x7ffe1436942a, oldtuple=0x0,
slot=0x561b5d788a58, canSetTag=true, updateCxt=0x7ffe14369390) at
nodeModifyTable.c:1971
estate = 0x561b5d786d20
resultRelationDesc = 0x561b5da3eb48
partition_constraint_failed = false
result = 1568173344
#8 0x0000561b5b189e96 in ExecUpdate (context=0x7ffe14369430,
resultRelInfo=0x561b5d7871a8, tupleid=0x7ffe1436942a, oldtuple=0x0,
slot=0x561b5d788a58, canSetTag=true) at nodeModifyTable.c:2185
estate = 0x561b5d786d20
resultRelationDesc = 0x561b5da3eb48
updateCxt = {updated = false, updateIndexes = false, crossPartUpdate =
false, lockmode = LockTupleNoKeyExclusive}
recheckIndexes = 0x0
result = 22043
__func__ = "ExecUpdate"
#9 0x0000561b5b18c865 in ExecModifyTable (pstate=0x561b5d786f90) at
nodeModifyTable.c:3737
node = 0x561b5d786f90
context = {mtstate = 0x561b5d786f90, epqstate = 0x561b5d787078, estate
= 0x561b5d786d20, planSlot = 0x561b5d7878e8,
GetUpdateNewTuple = 0x561b5b187d1d <internalGetUpdateNewTuple>,
relaction = 0x0, tmfd = {ctid = {ip_blkid = {bi_hi = 49287,
bi_lo = 23320}, ip_posid = 22043}, xmax = 1561021592, cmax =
22043, traversed = 4}, cpUpdateRetrySlot = 0x100000010,
cpUpdateReturningSlot = 0x561b5d0b4c98}
estate = 0x561b5d786d20
operation = CMD_UPDATE
resultRelInfo = 0x561b5d7871a8
subplanstate = 0x561b5d787458
slot = 0x561b5d788a58
oldSlot = 0x561b5d788940
tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 49}, ip_posid = 1}
oldtupdata = {t_len = 1561021608, t_self = {ip_blkid = {bi_hi = 22043,
bi_lo = 0}, ip_posid = 1}, t_tableOid = 1, t_data = 0x7ffe14369490}
oldtuple = 0x0
tupleid = 0x7ffe1436942a
proute = 0x0
relinfos = 0x0
lc = 0x561b5d787f48
__func__ = "ExecModifyTable"
#10 0x0000561b5b140cd3 in ExecProcNode (node=0x561b5d786f90) at
../../../src/include/executor/executor.h:259
No locals.
#11 0x0000561b5b143992 in ExecutePlan (estate=0x561b5d786d20,
planstate=0x561b5d786f90, use_parallel_mode=false, operation=CMD_UPDATE,
sendTuples=true, numberTuples=0, direction=ForwardScanDirection,
dest=0x561b5c535b28, execute_once=true) at execMain.c:1636
slot = 0x561b5d787f48
current_tuple_count = 1471
#12 0x0000561b5b141379 in standard_ExecutorRun (queryDesc=0x561b5c535bc0,
direction=ForwardScanDirection, count=0, execute_once=true)
at execMain.c:363
estate = 0x561b5d786d20
operation = CMD_UPDATE
dest = 0x561b5c535b28
sendTuples = true
oldcontext = 0x561b5c5358d0
__func__ = "standard_ExecutorRun"
#13 0x0000561b5b14118f in ExecutorRun (queryDesc=0x561b5c535bc0,
direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:307
No locals.
#14 0x0000561b5b391d88 in ProcessQuery (plan=0x561b6046f6a0,
sourceText=0x561b5c4eb8a0 "update public.quad_poly_tbl set \n id =
public.quad_poly_tbl.id\nreturning \n public.quad_poly_tbl.id as c0",
params=0x0, queryEnv=0x0, dest=0x561b5c535b28, qc=0x7ffe14369710) at
pquery.c:160
queryDesc = 0x561b5c535bc0
#15 0x0000561b5b393761 in PortalRunMulti (portal=0x561b5f832120,
isTopLevel=true, setHoldSnapshot=true, dest=0x561b5c535b28,
altdest=0x561b5b86da00 <donothingDR>, qc=0x7ffe14369710) at pquery.c:1277
pstmt = 0x561b6046f6a0
stmtlist_item__state = {l = 0x561b5cfce990, i = 0}
active_snapshot_set = true
stmtlist_item = 0x561b5cfce9a8
#16 0x0000561b5b3932a5 in FillPortalStore (portal=0x561b5f832120,
isTopLevel=true) at pquery.c:1026
treceiver = 0x561b5c535b28
qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 0}
__func__ = "FillPortalStore"
#17 0x0000561b5b392c4b in PortalRun (portal=0x561b5f832120,
count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x561b5cfce9e8,
altdest=0x561b5cfce9e8, qc=0x7ffe143698e0) at pquery.c:763
_save_exception_stack = 0x7ffe143699e0
_save_context_stack = 0x0
_local_sigjmp_buf = {{__jmpbuf = {0, 2860372577420250015,
94675489186000, 140729237544704, 0, 0, 2860372577606896543,
8392611472902654879},
__mask_was_saved = 0, __saved_mask = {__val = {18728835850,
94675524184528, 128, 94675512768384, 94675513071824, 140729237542976,
16256, 94675513072368, 94675524184400, 140729237543008,
94675496542566, 109, 112, 94675524184664, 112, 339122288}}}}
_do_rethrow = false
result = false
nprocessed = 94675524184552
saveTopTransactionResourceOwner = 0x561b5c573850
saveTopTransactionContext = 0x561b5c51bc90
saveActivePortal = 0x0
saveResourceOwner = 0x561b5c573850
savePortalContext = 0x0
saveMemoryContext = 0x561b5c51bc90
__func__ = "PortalRun"
#18 0x0000561b5b38c548 in exec_simple_query (
query_string=0x561b5c4eb8a0 "update public.quad_poly_tbl set \n id =
public.quad_poly_tbl.id\nreturning \n public.quad_poly_tbl.id as c0")
at postgres.c:1250
snapshot_set = true
per_parsetree_context = 0x0
plantree_list = 0x561b5cfce990
parsetree = 0x561b5c4ec8f0
commandTag = CMDTAG_UPDATE
qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 0}
querytree_list = 0x561b5c4ed358
portal = 0x561b5f832120
receiver = 0x561b5cfce9e8
format = 0
parsetree_item__state = {l = 0x561b5c4ec928, i = 0}
dest = DestRemote
oldcontext = 0x561b5c51bc90
parsetree_list = 0x561b5c4ec928
parsetree_item = 0x561b5c4ec940
save_log_statement_stats = false
was_logged = true
use_implicit_block = false
msec_str =
"\240\270N\\\033V\000\000j\000\000\000\006\000\000\000\060\231\066\024\376\177\000\000\062\001\034[j\000\000"
__func__ = "exec_simple_query"
#19 0x0000561b5b390d39 in PostgresMain (dbname=0x561b5c5163e0 "regression",
username=0x561b5c5163b8 "jcasanov") at postgres.c:4581
query_string = 0x561b5c4eb8a0 "update public.quad_poly_tbl set \n id =
public.quad_poly_tbl.id\nreturning \n public.quad_poly_tbl.id as c0"
firstchar = 81
input_message = {
data = 0x561b5c4eb8a0 "update public.quad_poly_tbl set \n id =
public.quad_poly_tbl.id\nreturning \n public.quad_poly_tbl.id as c0",
len = 107, maxlen = 1024, cursor = 107}
local_sigjmp_buf = {{__jmpbuf = {0, 2860372577472678815,
94675489186000, 140729237544704, 0, 0, 2860372577384598431,
8392611473924361119},
__mask_was_saved = 1, __saved_mask = {__val = {4194304, 1, 15679,
15680, 979, 18446744073709551536, 0, 0, 139906276254835, 3904, 0,
140729237543680, 94675489186000, 140729237544704,
94675496494014, 15616}}}}
send_ready_for_query = false
idle_in_transaction_timeout_enabled = false
idle_session_timeout_enabled = false
__func__ = "PostgresMain"
#20 0x0000561b5b2c8a02 in BackendRun (port=0x561b5c51a640) at postmaster.c:4504
No locals.
#21 0x0000561b5b2c834e in BackendStartup (port=0x561b5c51a640) at
postmaster.c:4232
bn = 0x561b5c50d830
pid = 0
__func__ = "BackendStartup"
#22 0x0000561b5b2c4793 in ServerLoop () at postmaster.c:1806
port = 0x561b5c51a640
i = 2
rmask = {fds_bits = {256, 0 <repeats 15 times>}}
selres = 1
now = 1664163286
readmask = {fds_bits = {448, 0 <repeats 15 times>}}
nSockets = 9
last_lockfile_recheck_time = 1664163252
last_touch_time = 1664163132
__func__ = "ServerLoop"
#23 0x0000561b5b2c3fe2 in PostmasterMain (argc=3, argv=0x561b5c4e4eb0) at
postmaster.c:1478
opt = -1
status = 0
userDoption = 0x561b5c5088a0 "data1"
listen_addr_saved = true
i = 64
output_config_variable = 0x0
__func__ = "PostmasterMain"
#24 0x0000561b5b1c33da in main (argc=3, argv=0x561b5c4e4eb0) at main.c:202
do_check_root = true