On Tue, Apr 13, 2021 at 04:17:39PM +0000, Tom Lane wrote:
> Avoid improbable PANIC during heap_update.
> 
> heap_update needs to clear any existing "all visible" flag on
> the old tuple's page (and on the new page too, if different).
> Per coding rules, to do this it must acquire pin on the appropriate
> visibility-map page while not holding exclusive buffer lock;
> which creates a race condition since someone else could set the
> flag whenever we're not holding the buffer lock.  The code is
> supposed to handle that by re-checking the flag after acquiring
> buffer lock and retrying if it became set.  However, one code
> path through heap_update itself, as well as one in its subroutine
> RelationGetBufferForTuple, failed to do this.  The end result,
> in the unlikely event that a concurrent VACUUM did set the flag
> while we're transiently not holding lock, is a non-recurring
> "PANIC: wrong buffer passed to visibilitymap_clear" failure.
> 

Hi,

This doesn't look as improbable because I saw it at least 3 times with
v15beta4.

The first time I thought it was my fault, then I tried with a commit on
september 25 (didn't remember which exactly but that doesn't seems too
relevant).
Finally I saw it again in a build with TRACE_VISIBILITYMAP defined (the
same commit).

But I haven't see it anymore on rc1. Anyway I'm attaching the backtrace
(this is from the build with TRACE_VISIBILITYMAP), the query that was 
running at the time was (no changes were made to quad_poly_tbl table 
nor any indexes were added to this table):

"""
update public.quad_poly_tbl set
  id = public.quad_poly_tbl.id
returning
  public.quad_poly_tbl.id as c0
"""

-- 
Jaime Casanova
Director de Servicios Profesionales
SystemGuards - Consultores de PostgreSQL
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
        set = {__val = {4194304, 139906277533872, 139906277529216, 
139906277533872, 139906275702592, 0, 895, 94675498488722, 140729237540432, 
            281470681751485, 0, 0, 0, 0, 0, 0}}
        pid = <optimized out>
        tid = <optimized out>
        ret = <optimized out>
#1  0x00007f3e77dfc535 in __GI_abort () at abort.c:79
        save_stage = 1
        act = {__sigaction_handler = {sa_handler = 0x3ff0000000000000, 
sa_sigaction = 0x3ff0000000000000}, sa_mask = {__val = {0, 
              4609753056924675352, 0, 4728779608772575232, 0, 140729237540784, 
94675496496005, 94675512751464, 94675512749920, 12884901888, 
              94675512751464, 94675512751440, 94675512749920, 140729237540832, 
139906277529216, 139906277511840}}, sa_flags = 2011471149, 
          sa_restorer = 0x0}
        sigs = {__val = {32, 0 <repeats 15 times>}}
#2  0x0000561b5b5335c7 in errfinish (filename=0x561b5b5c7180 "visibilitymap.c", 
lineno=156, 
    funcname=0x561b5b5c74a0 <__func__.12294> "visibilitymap_clear") at 
elog.c:675
        edata = 0x561b5b8a8960 <errordata>
        elevel = 23
        oldcontext = 0x561b5d786c00
        econtext = 0x0
        __func__ = "errfinish"
#3  0x0000561b5af09ef7 in visibilitymap_clear (rel=0x561b5da3eb48, heapBlk=49, 
buf=0, flags=3 '\003') at visibilitymap.c:156
        __errno_location = <optimized out>
        mapBlock = 0
        mapByte = 12
        mapOffset = 2
        mask = 12 '\f'
        map = 0x7f3e70fc7af8 "\200\326\r"
        cleared = false
        __func__ = "visibilitymap_clear"
#4  0x0000561b5aee6ea7 in heap_update (relation=0x561b5da3eb48, 
otid=0x7ffe1436942a, newtup=0x561b5cdcf350, cid=0, crosscheck=0x0, wait=true, 
    tmfd=0x7ffe14369460, lockmode=0x7ffe14369394) at heapam.c:3913
        result = TM_Ok
        xid = 906880
        hot_attrs = 0x561b5cdcf740
        key_attrs = 0x0
        id_attrs = 0x0
        interesting_attrs = 0x561b5cdcf718
        modified_attrs = 0x0
        lp = 0x7f3e74a0fc18
        oldtup = {t_len = 264, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 49}, 
ip_posid = 1}, t_tableOid = 17323, t_data = 0x7f3e74a11af8}
        heaptup = 0x561b5cdcf350
        old_key_tuple = 0x0
        old_key_copied = false
        page = 0x7f3e74a0fc00 "\215"
        block = 49
        mxact_status = MultiXactStatusNoKeyUpdate
        buffer = 11159
        newbuf = 3698
        vmbuffer = 0
        vmbuffer_new = 0
        need_toast = false
        newtupsize = 264
        pagefree = 112
        have_tuple_lock = false
        iscombo = false
        use_hot_update = false
        key_intact = true
        all_visible_cleared = true
        all_visible_cleared_new = false
        checked_lockers = false
        locker_remains = false
        id_has_external = false
        xmax_new_tuple = 0
        xmax_old_tuple = 906880
        infomask_old_tuple = 0
        infomask2_old_tuple = 0
        infomask_new_tuple = 2048
        infomask2_new_tuple = 0
        __func__ = "heap_update"
#5  0x0000561b5aef562b in heapam_tuple_update (relation=0x561b5da3eb48, 
otid=0x7ffe1436942a, slot=0x561b5d788a58, cid=0, snapshot=0x561b5c51be80, 
    crosscheck=0x0, wait=true, tmfd=0x7ffe14369460, lockmode=0x7ffe14369394, 
update_indexes=0x7ffe14369391) at heapam_handler.c:327
        shouldFree = false
        tuple = 0x561b5cdcf350
        result = 32766
#6  0x0000561b5b186b50 in table_tuple_update (rel=0x561b5da3eb48, 
otid=0x7ffe1436942a, slot=0x561b5d788a58, cid=0, snapshot=0x561b5c51be80, 
    crosscheck=0x0, wait=true, tmfd=0x7ffe14369460, lockmode=0x7ffe14369394, 
update_indexes=0x7ffe14369391)
    at ../../../src/include/access/tableam.h:1511
No locals.
#7  0x0000561b5b18996d in ExecUpdateAct (context=0x7ffe14369430, 
resultRelInfo=0x561b5d7871a8, tupleid=0x7ffe1436942a, oldtuple=0x0, 
    slot=0x561b5d788a58, canSetTag=true, updateCxt=0x7ffe14369390) at 
nodeModifyTable.c:1971
        estate = 0x561b5d786d20
        resultRelationDesc = 0x561b5da3eb48
        partition_constraint_failed = false
        result = 1568173344
#8  0x0000561b5b189e96 in ExecUpdate (context=0x7ffe14369430, 
resultRelInfo=0x561b5d7871a8, tupleid=0x7ffe1436942a, oldtuple=0x0, 
    slot=0x561b5d788a58, canSetTag=true) at nodeModifyTable.c:2185
        estate = 0x561b5d786d20
        resultRelationDesc = 0x561b5da3eb48
        updateCxt = {updated = false, updateIndexes = false, crossPartUpdate = 
false, lockmode = LockTupleNoKeyExclusive}
        recheckIndexes = 0x0
        result = 22043
        __func__ = "ExecUpdate"
#9  0x0000561b5b18c865 in ExecModifyTable (pstate=0x561b5d786f90) at 
nodeModifyTable.c:3737
        node = 0x561b5d786f90
        context = {mtstate = 0x561b5d786f90, epqstate = 0x561b5d787078, estate 
= 0x561b5d786d20, planSlot = 0x561b5d7878e8, 
          GetUpdateNewTuple = 0x561b5b187d1d <internalGetUpdateNewTuple>, 
relaction = 0x0, tmfd = {ctid = {ip_blkid = {bi_hi = 49287, 
                bi_lo = 23320}, ip_posid = 22043}, xmax = 1561021592, cmax = 
22043, traversed = 4}, cpUpdateRetrySlot = 0x100000010, 
          cpUpdateReturningSlot = 0x561b5d0b4c98}
        estate = 0x561b5d786d20
        operation = CMD_UPDATE
        resultRelInfo = 0x561b5d7871a8
        subplanstate = 0x561b5d787458
        slot = 0x561b5d788a58
        oldSlot = 0x561b5d788940
        tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 49}, ip_posid = 1}
        oldtupdata = {t_len = 1561021608, t_self = {ip_blkid = {bi_hi = 22043, 
bi_lo = 0}, ip_posid = 1}, t_tableOid = 1, t_data = 0x7ffe14369490}
        oldtuple = 0x0
        tupleid = 0x7ffe1436942a
        proute = 0x0
        relinfos = 0x0
        lc = 0x561b5d787f48
        __func__ = "ExecModifyTable"
#10 0x0000561b5b140cd3 in ExecProcNode (node=0x561b5d786f90) at 
../../../src/include/executor/executor.h:259
No locals.
#11 0x0000561b5b143992 in ExecutePlan (estate=0x561b5d786d20, 
planstate=0x561b5d786f90, use_parallel_mode=false, operation=CMD_UPDATE, 
    sendTuples=true, numberTuples=0, direction=ForwardScanDirection, 
dest=0x561b5c535b28, execute_once=true) at execMain.c:1636
        slot = 0x561b5d787f48
        current_tuple_count = 1471
#12 0x0000561b5b141379 in standard_ExecutorRun (queryDesc=0x561b5c535bc0, 
direction=ForwardScanDirection, count=0, execute_once=true)
    at execMain.c:363
        estate = 0x561b5d786d20
        operation = CMD_UPDATE
        dest = 0x561b5c535b28
        sendTuples = true
        oldcontext = 0x561b5c5358d0
        __func__ = "standard_ExecutorRun"
#13 0x0000561b5b14118f in ExecutorRun (queryDesc=0x561b5c535bc0, 
direction=ForwardScanDirection, count=0, execute_once=true) at execMain.c:307
No locals.
#14 0x0000561b5b391d88 in ProcessQuery (plan=0x561b6046f6a0, 
    sourceText=0x561b5c4eb8a0 "update public.quad_poly_tbl set \n  id = 
public.quad_poly_tbl.id\nreturning \n  public.quad_poly_tbl.id as c0", 
    params=0x0, queryEnv=0x0, dest=0x561b5c535b28, qc=0x7ffe14369710) at 
pquery.c:160
        queryDesc = 0x561b5c535bc0
#15 0x0000561b5b393761 in PortalRunMulti (portal=0x561b5f832120, 
isTopLevel=true, setHoldSnapshot=true, dest=0x561b5c535b28, 
    altdest=0x561b5b86da00 <donothingDR>, qc=0x7ffe14369710) at pquery.c:1277
        pstmt = 0x561b6046f6a0
        stmtlist_item__state = {l = 0x561b5cfce990, i = 0}
        active_snapshot_set = true
        stmtlist_item = 0x561b5cfce9a8
#16 0x0000561b5b3932a5 in FillPortalStore (portal=0x561b5f832120, 
isTopLevel=true) at pquery.c:1026
        treceiver = 0x561b5c535b28
        qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 0}
        __func__ = "FillPortalStore"
#17 0x0000561b5b392c4b in PortalRun (portal=0x561b5f832120, 
count=9223372036854775807, isTopLevel=true, run_once=true, dest=0x561b5cfce9e8, 
    altdest=0x561b5cfce9e8, qc=0x7ffe143698e0) at pquery.c:763
        _save_exception_stack = 0x7ffe143699e0
        _save_context_stack = 0x0
        _local_sigjmp_buf = {{__jmpbuf = {0, 2860372577420250015, 
94675489186000, 140729237544704, 0, 0, 2860372577606896543, 
8392611472902654879}, 
            __mask_was_saved = 0, __saved_mask = {__val = {18728835850, 
94675524184528, 128, 94675512768384, 94675513071824, 140729237542976, 
                16256, 94675513072368, 94675524184400, 140729237543008, 
94675496542566, 109, 112, 94675524184664, 112, 339122288}}}}
        _do_rethrow = false
        result = false
        nprocessed = 94675524184552
        saveTopTransactionResourceOwner = 0x561b5c573850
        saveTopTransactionContext = 0x561b5c51bc90
        saveActivePortal = 0x0
        saveResourceOwner = 0x561b5c573850
        savePortalContext = 0x0
        saveMemoryContext = 0x561b5c51bc90
        __func__ = "PortalRun"
#18 0x0000561b5b38c548 in exec_simple_query (
    query_string=0x561b5c4eb8a0 "update public.quad_poly_tbl set \n  id = 
public.quad_poly_tbl.id\nreturning \n  public.quad_poly_tbl.id as c0")
    at postgres.c:1250
        snapshot_set = true
        per_parsetree_context = 0x0
        plantree_list = 0x561b5cfce990
        parsetree = 0x561b5c4ec8f0
        commandTag = CMDTAG_UPDATE
        qc = {commandTag = CMDTAG_UNKNOWN, nprocessed = 0}
        querytree_list = 0x561b5c4ed358
        portal = 0x561b5f832120
        receiver = 0x561b5cfce9e8
        format = 0
        parsetree_item__state = {l = 0x561b5c4ec928, i = 0}
        dest = DestRemote
        oldcontext = 0x561b5c51bc90
        parsetree_list = 0x561b5c4ec928
        parsetree_item = 0x561b5c4ec940
        save_log_statement_stats = false
        was_logged = true
        use_implicit_block = false
        msec_str = 
"\240\270N\\\033V\000\000j\000\000\000\006\000\000\000\060\231\066\024\376\177\000\000\062\001\034[j\000\000"
        __func__ = "exec_simple_query"
#19 0x0000561b5b390d39 in PostgresMain (dbname=0x561b5c5163e0 "regression", 
username=0x561b5c5163b8 "jcasanov") at postgres.c:4581
        query_string = 0x561b5c4eb8a0 "update public.quad_poly_tbl set \n  id = 
public.quad_poly_tbl.id\nreturning \n  public.quad_poly_tbl.id as c0"
        firstchar = 81
        input_message = {
          data = 0x561b5c4eb8a0 "update public.quad_poly_tbl set \n  id = 
public.quad_poly_tbl.id\nreturning \n  public.quad_poly_tbl.id as c0", 
          len = 107, maxlen = 1024, cursor = 107}
        local_sigjmp_buf = {{__jmpbuf = {0, 2860372577472678815, 
94675489186000, 140729237544704, 0, 0, 2860372577384598431, 
8392611473924361119}, 
            __mask_was_saved = 1, __saved_mask = {__val = {4194304, 1, 15679, 
15680, 979, 18446744073709551536, 0, 0, 139906276254835, 3904, 0, 
                140729237543680, 94675489186000, 140729237544704, 
94675496494014, 15616}}}}
        send_ready_for_query = false
        idle_in_transaction_timeout_enabled = false
        idle_session_timeout_enabled = false
        __func__ = "PostgresMain"
#20 0x0000561b5b2c8a02 in BackendRun (port=0x561b5c51a640) at postmaster.c:4504
No locals.
#21 0x0000561b5b2c834e in BackendStartup (port=0x561b5c51a640) at 
postmaster.c:4232
        bn = 0x561b5c50d830
        pid = 0
        __func__ = "BackendStartup"
#22 0x0000561b5b2c4793 in ServerLoop () at postmaster.c:1806
        port = 0x561b5c51a640
        i = 2
        rmask = {fds_bits = {256, 0 <repeats 15 times>}}
        selres = 1
        now = 1664163286
        readmask = {fds_bits = {448, 0 <repeats 15 times>}}
        nSockets = 9
        last_lockfile_recheck_time = 1664163252
        last_touch_time = 1664163132
        __func__ = "ServerLoop"
#23 0x0000561b5b2c3fe2 in PostmasterMain (argc=3, argv=0x561b5c4e4eb0) at 
postmaster.c:1478
        opt = -1
        status = 0
        userDoption = 0x561b5c5088a0 "data1"
        listen_addr_saved = true
        i = 64
        output_config_variable = 0x0
        __func__ = "PostmasterMain"
#24 0x0000561b5b1c33da in main (argc=3, argv=0x561b5c4e4eb0) at main.c:202
        do_check_root = true

Reply via email to