On Wed, 27 Aug 2025 14:44:55 +0100 Dean Rasheed <dean.a.rash...@gmail.com> wrote:
> On Sun, 24 Aug 2025 at 18:34, Yugo Nagata <nag...@sraoss.co.jp> wrote: > > > > I confirmed this issue by executing the following query concurrently > > in three transactions. (With only two transactions, the issue does not > > occur.) > > Yes, I think 3 transactions are required to reproduce this (2 separate > concurrent updates). > > > I don't completely understand how this race condition occurs, > > but I believe the bug is due to the misuse of TM_FailureData > > returned by table_tuple_lock in ExecMergeMatched(). > > > > Currently, TM_FailureData.ctid is used as a reference to the > > latest version of oldtuple, but this is not always correct. > > Instead, the tupleid passed to table_tuple_lock should be used. > > > > I've attached a patch to fix this. > > Thanks. That makes sense. > > I think we also should update the isolation tests to test this. > Attached is an update to the merge-match-recheck isolation test, doing > so. As you found, it doesn't always seem to fail with the unpatched > code (though I didn't look to see why), but with your patch, it always > passes. Thank you for your suggestion and the test patch. The test looks good to me, so I’ve attached an updated patch including it. Regards, Yugo Nagata -- Yugo Nagata <nag...@sraoss.co.jp>
>From a5490eaf22e3c6d0145c4daf3272a7c0bbd6dc6f Mon Sep 17 00:00:00 2001 From: Yugo Nagata <nag...@sraoss.co.jp> Date: Mon, 25 Aug 2025 01:57:25 +0900 Subject: [PATCH v2] Fix misuse of TM_FailureData.ctid in ExecMergeMatched Previously, the ctid field of TM_FailureData returned by table_tuple_lock() was used as a reference to the latest version of oldtuple, but this was not always correct. Instead, the tupleid passed to table_tuple_lock() should be used. This misuse caused incorrect UPDATE results when more than two MERGE commands were executed concurrently. --- src/backend/executor/nodeModifyTable.c | 7 +- .../expected/merge-match-recheck.out | 175 +++++++++++++++++- .../isolation/specs/merge-match-recheck.spec | 18 ++ 3 files changed, 196 insertions(+), 4 deletions(-) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 7c6c2c1f6e4..ea1457ce3bf 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3450,12 +3450,13 @@ lmerge_matched: if (ItemPointerIsValid(&lockedtid)) UnlockTuple(resultRelInfo->ri_RelationDesc, &lockedtid, InplaceUpdateTupleLock); - LockTuple(resultRelInfo->ri_RelationDesc, &context->tmfd.ctid, + LockTuple(resultRelInfo->ri_RelationDesc, tupleid, InplaceUpdateTupleLock); - lockedtid = context->tmfd.ctid; + lockedtid = *tupleid; } + if (!table_tuple_fetch_row_version(resultRelationDesc, - &context->tmfd.ctid, + tupleid, SnapshotAny, resultRelInfo->ri_oldTupleSlot)) elog(ERROR, "failed to fetch the target tuple"); diff --git a/src/test/isolation/expected/merge-match-recheck.out b/src/test/isolation/expected/merge-match-recheck.out index 90300f1db5a..5f91b221a21 100644 --- a/src/test/isolation/expected/merge-match-recheck.out +++ b/src/test/isolation/expected/merge-match-recheck.out @@ -1,4 +1,4 @@ -Parsed test spec with 2 sessions +Parsed test spec with 3 sessions starting permutation: update1 merge_status c2 select1 c1 step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; @@ -271,6 +271,179 @@ key|balance|status|val step c1: COMMIT; +starting permutation: update1 merge_bal c2 select1 c1 +step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; +step merge_bal: + MERGE INTO target t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + <waiting ...> +step c2: COMMIT; +step merge_bal: <... completed> +step select1: SELECT * FROM target; +key|balance|status|val +---+-------+------+------------------------------ + 1| 680|s1 |setup updated by update1 when2 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_pa merge_bal_pa c2 select1_pa c1 +step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; +step merge_bal_pa: + MERGE INTO target_pa t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + <waiting ...> +step c2: COMMIT; +step merge_bal_pa: <... completed> +step select1_pa: SELECT * FROM target_pa; +key|balance|status|val +---+-------+------+--------------------------------- + 1| 680|s1 |setup updated by update1_pa when2 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_tg merge_bal_tg c2 select1_tg c1 +s2: NOTICE: Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg") +step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; +step merge_bal_tg: + WITH t AS ( + MERGE INTO target_tg t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3' + RETURNING t.* + ) + SELECT * FROM t; + <waiting ...> +step c2: COMMIT; +s1: NOTICE: Update: (1,170,s1,"setup updated by update1_tg") -> (1,680,s1,"setup updated by update1_tg when2") +step merge_bal_tg: <... completed> +key|balance|status|val +---+-------+------+--------------------------------- + 1| 680|s1 |setup updated by update1_tg when2 +(1 row) + +step select1_tg: SELECT * FROM target_tg; +key|balance|status|val +---+-------+------+--------------------------------- + 1| 680|s1 |setup updated by update1_tg when2 +(1 row) + +step c1: COMMIT; + +starting permutation: update1 b3 update1b merge_bal c2 c3 select1 c1 +step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; +step b3: BEGIN ISOLATION LEVEL READ COMMITTED; +step update1b: UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update1b' WHERE t.key = 1; <waiting ...> +step merge_bal: + MERGE INTO target t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + <waiting ...> +step c2: COMMIT; +step update1b: <... completed> +step c3: COMMIT; +step merge_bal: <... completed> +step select1: SELECT * FROM target; +key|balance|status|val +---+-------+------+-------------------------------------------------- + 1| 140|s1 |setup updated by update1 updated by update1b when1 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_pa b3 update1b_pa merge_bal_pa c2 c3 select1_pa c1 +step update1_pa: UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; +step b3: BEGIN ISOLATION LEVEL READ COMMITTED; +step update1b_pa: UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update1b_pa' WHERE t.key = 1; <waiting ...> +step merge_bal_pa: + MERGE INTO target_pa t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3'; + <waiting ...> +step c2: COMMIT; +step update1b_pa: <... completed> +step c3: COMMIT; +step merge_bal_pa: <... completed> +step select1_pa: SELECT * FROM target_pa; +key|balance|status|val +---+-------+------+-------------------------------------------------------- + 1| 140|s1 |setup updated by update1_pa updated by update1b_pa when1 +(1 row) + +step c1: COMMIT; + +starting permutation: update1_tg b3 update1b_tg merge_bal_tg c2 c3 select1_tg c1 +s2: NOTICE: Update: (1,160,s1,setup) -> (1,170,s1,"setup updated by update1_tg") +step update1_tg: UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; +step b3: BEGIN ISOLATION LEVEL READ COMMITTED; +step update1b_tg: UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update1b_tg' WHERE t.key = 1; <waiting ...> +step merge_bal_tg: + WITH t AS ( + MERGE INTO target_tg t + USING (SELECT 1 as key) s + ON s.key = t.key + WHEN MATCHED AND balance < 100 THEN + UPDATE SET balance = balance * 2, val = t.val || ' when1' + WHEN MATCHED AND balance < 200 THEN + UPDATE SET balance = balance * 4, val = t.val || ' when2' + WHEN MATCHED AND balance < 300 THEN + UPDATE SET balance = balance * 8, val = t.val || ' when3' + RETURNING t.* + ) + SELECT * FROM t; + <waiting ...> +step c2: COMMIT; +s3: NOTICE: Update: (1,170,s1,"setup updated by update1_tg") -> (1,70,s1,"setup updated by update1_tg updated by update1b_tg") +step update1b_tg: <... completed> +step c3: COMMIT; +s1: NOTICE: Update: (1,70,s1,"setup updated by update1_tg updated by update1b_tg") -> (1,140,s1,"setup updated by update1_tg updated by update1b_tg when1") +step merge_bal_tg: <... completed> +key|balance|status|val +---+-------+------+-------------------------------------------------------- + 1| 140|s1 |setup updated by update1_tg updated by update1b_tg when1 +(1 row) + +step select1_tg: SELECT * FROM target_tg; +key|balance|status|val +---+-------+------+-------------------------------------------------------- + 1| 140|s1 |setup updated by update1_tg updated by update1b_tg when1 +(1 row) + +step c1: COMMIT; + starting permutation: update1 merge_delete c2 select1 c1 step update1: UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; step merge_delete: diff --git a/src/test/isolation/specs/merge-match-recheck.spec b/src/test/isolation/specs/merge-match-recheck.spec index 15226e40c9e..6eba77aca13 100644 --- a/src/test/isolation/specs/merge-match-recheck.spec +++ b/src/test/isolation/specs/merge-match-recheck.spec @@ -146,6 +146,7 @@ setup BEGIN ISOLATION LEVEL READ COMMITTED; } step "update1" { UPDATE target t SET balance = balance + 10, val = t.val || ' updated by update1' WHERE t.key = 1; } +step "update1_pa" { UPDATE target_pa t SET balance = balance + 10, val = t.val || ' updated by update1_pa' WHERE t.key = 1; } step "update1_tg" { UPDATE target_tg t SET balance = balance + 10, val = t.val || ' updated by update1_tg' WHERE t.key = 1; } step "update2" { UPDATE target t SET status = 's2', val = t.val || ' updated by update2' WHERE t.key = 1; } step "update2_tg" { UPDATE target_tg t SET status = 's2', val = t.val || ' updated by update2_tg' WHERE t.key = 1; } @@ -158,6 +159,13 @@ step "update_bal1_pa" { UPDATE target_pa t SET balance = 50, val = t.val || ' up step "update_bal1_tg" { UPDATE target_tg t SET balance = 50, val = t.val || ' updated by update_bal1_tg' WHERE t.key = 1; } step "c2" { COMMIT; } +session "s3" +step "b3" { BEGIN ISOLATION LEVEL READ COMMITTED; } +step "update1b" { UPDATE target t SET balance = balance - 100, val = t.val || ' updated by update1b' WHERE t.key = 1; } +step "update1b_pa" { UPDATE target_pa t SET balance = balance - 100, val = t.val || ' updated by update1b_pa' WHERE t.key = 1; } +step "update1b_tg" { UPDATE target_tg t SET balance = balance - 100, val = t.val || ' updated by update1b_tg' WHERE t.key = 1; } +step "c3" { COMMIT; } + # merge_status sees concurrently updated row and rechecks WHEN conditions, but recheck passes and final status = 's2' permutation "update1" "merge_status" "c2" "select1" "c1" permutation "update1_tg" "merge_status_tg" "c2" "select1_tg" "c1" @@ -179,6 +187,16 @@ permutation "update_bal1" "merge_bal" "c2" "select1" "c1" permutation "update_bal1_pa" "merge_bal_pa" "c2" "select1_pa" "c1" permutation "update_bal1_tg" "merge_bal_tg" "c2" "select1_tg" "c1" +# merge_bal sees concurrently updated row and rechecks WHEN conditions, recheck passes, so final balance = 680 +permutation "update1" "merge_bal" "c2" "select1" "c1" +permutation "update1_pa" "merge_bal_pa" "c2" "select1_pa" "c1" +permutation "update1_tg" "merge_bal_tg" "c2" "select1_tg" "c1" + +# merge_bal sees row concurrently updated twice and rechecks WHEN conditions, recheck fails, so final balance = 140 +permutation "update1" "b3" "update1b" "merge_bal" "c2" "c3" "select1" "c1" +permutation "update1_pa" "b3" "update1b_pa" "merge_bal_pa" "c2" "c3" "select1_pa" "c1" +permutation "update1_tg" "b3" "update1b_tg" "merge_bal_tg" "c2" "c3" "select1_tg" "c1" + # merge_delete sees concurrently updated row and rechecks WHEN conditions, but recheck passes and row is deleted permutation "update1" "merge_delete" "c2" "select1" "c1" permutation "update1_tg" "merge_delete_tg" "c2" "select1_tg" "c1" -- 2.43.0