Re: Postgres stucks in deadlock detection

Konstantin Knizhnik Fri, 13 Apr 2018 06:44:09 -0700

Updated patch is attached.

On 05.04.2018 11:03, Konstantin Knizhnik wrote:

Hi,
Thank for your feedback.
On 04.04.2018 21:15, Andres Freund wrote:
Hi,

On 2018-04-04 11:54:14 +0300, Konstantin Knizhnik wrote:
Several times we and our customers are suffered from the problem that
Postgres got stuck in deadlock detection.
One scenario is YCSB workload with Zipf's distribution when many clients are
trying to update the same record and compete for it's lock.
Another scenario is large number of clients performing inserts in the same
table. In this case the source of the problem is relation extension lock.
In both cases number of connection is large enough: several hundreds.
Have you ever observed that in the field? This sounds more artificial
than real to me.
Yes, it was real customer's problem. And it is not so difficult toreproduce it: you just need to have system with larger number of cores(in our case 72 physical cores)
and spawn 1000 clients which performs normal inserts to the same table:


cat test.sql
begin;
insert into test select i, md5(i::text) from generate_series(1,1000) AS i;
end;

pgbench -U postgres test -c 1000 -j 36 -T 3600 -P 1 -f test.sql


With deadlock detection timeout equal to 100msec it happens immediately.
With default 1 second deadlock timeout it will take about 12 minutesafter which the following messages can be found in the log:
2018-03-23 20:25:27.287 MSK [41469] STATEMENT: insert into test selecti, md5(i::text) from generate_series(1,1000) AS i;2018-03-23 20:25:27.287 MSK [41707] LOG: process 41707 acquiredExclusiveLock on extension of relation 16387 of database 16384 after61294.863 ms2018-03-23 20:25:27.287 MSK [41707] STATEMENT: insert into test selecti, md5(i::text) from generate_series(1,1000) AS i;2018-03-23 20:25:27.303 MSK [42251] LOG: process 42251 still waitingfor ExclusiveLock on extension of relation 16387 of database 16384after 1121.274 ms2018-03-23 20:25:27.303 MSK [42251] DETAIL: Process holding the lock:41707. Wait queue: 41577, 41880, 41482, 41649, 42275, 42237, 42117,41430, 42121, 42091, 41507, 42265, 41738, 42018, 41490, 42048, 42269,42108, 41843, 42213, 42134, 42341, 42083, 41763, 41536, 41980, 41793,41878, 42065, 42152, 42193, 42022, 42103, 42354, 41849, 42120, 41520,41593, 42020, 42123, 42012, 42235, 42242, 41982, 41661, 41734, 42385,41756, 41733, 41415, 41874, 42161, 41749, 41431, 42175, 42100, 42222,41435, 41762, 42352, 41840, 41879, 42348, 41445, 42088, 42187, 41992,42327, 42258, 41586, 42034, 41851, 41440, 42192, 41726, 41471, 42185,41607, 41844, 42016, 41868, 41397, 41932, 42343, 41545, 41690, 41549,41684, 42304, 42105, 41551, 41580, 41728, 42172, 41898, 41560, 41411,41657, 41444, 42170, 41481, 42241, 42263, 41884, 42014, 41608, 42289,42191, 42067, 42011, 41959, 41681, 41768, 42194, 42090, 41527, 41655,41638, 41458, 41552, 41446, 41403, 41666, 42021, 41614, 41542, 41588,41937, 42008, 42280, 42071, 41390, 41483...
At this moment TPS drops to zero and it is even not possible to loginto Postgres.
Increasing deadlock timeout to one minute still doesn't help:
2018-04-02 17:16:59 MSK [644699] [6-1]db=bp,user=bp,app=[unknown],client=172.29.224.48 LOG: process 644699acquired ExclusiveLock on extension of relation 23554 of database16385 after 585248.495 ms2018-04-02 17:16:59 MSK [629624]: [6-1]db=bp,user=bp,app=[unknown],client=172.29.224.64 LOG: process 629624acquired ExclusiveLock on extension of relation 23554 of database16385 after 585132.924 ms2018-04-02 17:16:59 MSK [646297]: [6-1]db=bp,user=bp,app=[unknown],client=172.29.224.57 LOG: process 646297acquired ExclusiveLock on extension of relation 23554 of database16385 after 584982.438 ms2018-04-02 17:16:59 MSK [629264]: [6-1]db=bp,user=bp,app=[unknown],client=172.29.224.66 LOG: process 629264acquired ExclusiveLock on extension of relation 23554 of database16385 after 584959.859 ms
And this is the backtrace of backend which blocks the systems:

#0  0x00007fde34a73576 in do_futex_wait.constprop () from /lib64/libpthread.so.0
#1  0x00007fde34a73668 in __new_sem_wait_slow.constprop.0 () from 
/lib64/libpthread.so.0
#2  0x0000000000686b02 in PGSemaphoreLock (sema=0x7fcdc7748618) at pg_sema.c:310
#3  0x00000000006ec0d4 in LWLockAcquire (lock=0x7fcdc774ee80, 
mode=mode@entry=LW_EXCLUSIVE) at lwlock.c:1233
#4  0x00000000006e99c5 in CheckDeadLock () at proc.c:1671
#5  ProcSleep (locallock=locallock@entry=0x118a158, 
lockMethodTable=lockMethodTable@entry=0x9538a0 <default_lockmethod>) at 
proc.c:1261
#6  0x00000000006e49ef in WaitOnLock (locallock=locallock@entry=0x118a158, 
owner=owner@entry=0x12490f0) at lock.c:1702
#7  0x00000000006e5c0b in LockAcquireExtended 
(locktag=locktag@entry=0x7ffc5b5f7d10, lockmode=lockmode@entry=7, 
sessionLock=sessionLock@entry=0'\000', dontWait=dontWait@entry=0'\000',
     reportMemoryError=reportMemoryError@entry=1'\001') at lock.c:998
#8  0x00000000006e6221 in LockAcquire (locktag=locktag@entry=0x7ffc5b5f7d10, 
lockmode=lockmode@entry=7, sessionLock=sessionLock@entry=0'\000', 
dontWait=dontWait@entry=0'\000') at lock.c:688
#9  0x00000000006e3d7c in LockRelationForExtension 
(relation=relation@entry=0x7fcdc73ecb68, lockmode=lockmode@entry=7) at 
lmgr.c:340
#10 0x00000000004b6ce4 in RelationGetBufferForTuple 
(relation=relation@entry=0x7fcdc73ecb68, len=72, 
otherBuffer=otherBuffer@entry=0, options=options@entry=0, 
bistate=bistate@entry=0x0,
     vmbuffer=vmbuffer@entry=0x7ffc5b5f7dc0, vmbuffer_other=0x0) at hio.c:526
#11 0x00000000004b04db in heap_insert (relation=relation@entry=0x7fcdc73ecb68, 
tup=tup@entry=0x12768f8, cid=<optimized out>, options=options@entry=0, 
bistate=bistate@entry=0x0) at heapam.c:2418
#12 0x00000000005fce33 in ExecInsert (canSetTag=1'\001', estate=0x1263148, 
onconflict=<optimized out>, arbiterIndexes=0x0, planSlot=0x12638c8, 
slot=0x12638c8, mtstate=0x1263498)
     at nodeModifyTable.c:513
#13 ExecModifyTable (pstate=0x1263498) at nodeModifyTable.c:1781
#14 0x00000000005e0a5a in ExecProcNode (node=0x1263498) at 
../../../src/include/executor/executor.h:250
#15 ExecutePlan (execute_once=<optimized out>, dest=0x1260e18, direction=<optimized out>, 
numberTuples=0, sendTuples=<optimized out>, operation=CMD_INSERT, use_parallel_mode=<optimized 
out>,
     planstate=0x1263498, estate=0x1263148) at execMain.c:1722
#16 standard_ExecutorRun (queryDesc=0x1291628, direction=<optimized out>, count=0, 
execute_once=<optimized out>) at execMain.c:363
#17 0x00000000006fc972 in ProcessQuery (plan=<optimized out>, sourceText=0x1236598"insert into test select i, md5(i::text) from generate_series(1,1000)AS i;", params=0x0, queryEnv=0x0,
     dest=0x1260e18, completionTag=0x7ffc5b5f81d0 "") at pquery.c:161
#18 0x00000000006fcbb0 in PortalRunMulti (portal=portal@entry=0x1183888, 
isTopLevel=isTopLevel@entry=1'\001', 
setHoldSnapshot=setHoldSnapshot@entry=0'\000', dest=dest@entry=0x1260e18,
     altdest=altdest@entry=0x1260e18, completionTag=completionTag@entry=0x7ffc5b5f81d0 
"") at pquery.c:1286
#19 0x00000000006fd651 in PortalRun (portal=portal@entry=0x1183888, 
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=1'\001', 
run_once=run_once@entry=1'\001',
     dest=dest@entry=0x1260e18, altdest=altdest@entry=0x1260e18, 
completionTag=0x7ffc5b5f81d0 "") at pquery.c:799
#20 0x00000000006f974f in exec_simple_query (query_string=0x1236598"insert into test select i, md5(i::text) from generate_series(1,1000)AS i;") at postgres.c:1099
#21 0x00000000006fb594 in PostgresMain (argc=<optimized out>, argv=argv@entry=0x11920e8, 
dbname=<optimized out>, username=<optimized out>) at postgres.c:4088
#22 0x0000000000479887 in BackendRun (port=0x11862b0) at postmaster.c:4405
#23 BackendStartup (port=0x11862b0) at postmaster.c:4077
#24 ServerLoop () at postmaster.c:1755
#25 0x0000000000694fec in PostmasterMain (argc=argc@entry=3, 
argv=argv@entry=0x1165250) at postmaster.c:1363
#26 0x000000000047aca9 in main (argc=3, argv=0x1165250) at main.c:228
So, I see three possible ways to fix this problem:
1. Yury Sololov's patch with two phase deadlock check
2. Avoid concurrent deadlock detection
3. Avoid concurrent deadlock detection + let CheckDeadLock detect all
deadlocks, not only those in which current transaction is involved.
4) Don't do anything about deadlock detection, because this is just a
    symptom five steps removed from the cause.

We've to pay attention to fixing actual problems, rather than purely
benchmark ones. Complexity has a significant price.
Sorry, I once again has to repeat that it was real customer's problemand the bgbench scenario described above is just result of ourattempts to reproduce and investigate the problem.Moreover - this is not the first we have faced with this problem withpostgres stalled because of cascade of deadlock detection timeoutsexpiration.
I do not think that deadlock detection is just "a symptom".
It has quite simple explanation: deadlock detection tries toexclusively obtain all partition locks.So if under high load deadlock timeout is expired and deadlockdetection is launched, then it cause avalanche of lock conflicts whichcause expiration of deadlock timeouts in other backends,which also try to perform deadlock detection and compete for thislocks... And finally system is stalled.
The proposed patch fixes the problem: there are still some periodicfall downs of performance but it never drops to zero and it alwayspossible to login to the system.So I can not agree with you that deadlock detection is "just a symptomfive steps removed from the cause".
Certainly, it is possible to find some other solution of the problem,for example do not use heavy weight locks for relation extension.There is huge discussion about it and patch at commitfest with norecent activity and obscure perspectives:
https://www.postgresql.org/message-id/flat/CA%2BTgmobuP%2BQoJgaj2HWz5VoDgVA0KP7UGtTREY%2BPeghbPtWZLg%40mail.gmail.com#ca+tgmobup+qojgaj2hwz5vodgva0kp7ugttrey+peghbptw...@mail.gmail.com
But once again please notice that the same problem takes place notonly with relation extension locks.
Please read description of the problem in the Yury Sokolov's patch:

https://commitfest.postgresql.org/18/1602/
In this case there are only updates and tuple locks but still thesource of the problem is deadlock detection and simplest workaround isjust to increase deadlock timeout.
If you want, I can provide you access at our system where allnecessary infrastructure is already install and you can easilyreproduce the problem and investigate it yourself.But if something looks like a duck, swims like a duck, and quacks likea duck, then it probably /is/ a duck;)
It seems to be obvious the obtaining all partition locks in CheckDeadLock:

    /*
* Acquire exclusive lock on the entire shared lock datastructures. Must
     * grab LWLocks in partition-number order to avoid LWLock deadlock.
     *
     * Note that the deadlock check interrupt had better not be enabled
* anywhere that this process itself holds lock partition locks,else this
     * will wait forever.  Also note that LWLockAcquire creates a critical
     * section, so that this routine cannot be interrupted by cancel/die
     * interrupts.
     */
    for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
        LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
can cause huge contention with other backends in which in turn cancause expiration of deadlock timeout in them and escalation of theproblem.
From the other side, the fact that any of this

a) increasing deadlock timeout
b) avoid concurrent execution of deadlock check
c) perform first stage of deadlock check under shared lock
have very noticeable effect on the observed behavior and some of themallow to completely eliminate the problem,makes me think that deadlock detection is not a symptom but the mainsource of the problem.
And the last note. I know that 1000 connections is too large forPostgres. But:
1. In customer's case the problem is reproduced even with 100 connections.
2. At modern SMPs systems with hundreds of cores running hundreds ofbackends is not something exotic/stupid.
And here we are faced with the problem which Postgres has never hasbefore:
1. Too large snapshot and long time needed for its calculation
2. High contention for procarray locks
3. Very inefficient memory usage because of local caches and preparedstatements.
...
At such systems we are faced with many new bottlenecks and we reallyneed to address them.I hope that nobody think that Postgres should normally work only atnotebooks and quad core desktops?
--
Konstantin Knizhnik
Postgres Professional:http://www.postgrespro.com
The Russian Postgres Company


--
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 6f30e08..e2c6274 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -86,6 +86,8 @@ static LOCALLOCK *lockAwaited = NULL;
 
 static DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
 
+static bool inside_deadlock_check = false;
+
 /* Is a deadlock check pending? */
 static volatile sig_atomic_t got_deadlock_timeout;
 
@@ -187,6 +189,7 @@ InitProcGlobal(void)
 	ProcGlobal->checkpointerLatch = NULL;
 	pg_atomic_init_u32(&ProcGlobal->procArrayGroupFirst, INVALID_PGPROCNO);
 	pg_atomic_init_u32(&ProcGlobal->clogGroupFirst, INVALID_PGPROCNO);
+	pg_atomic_init_flag(&ProcGlobal->activeDeadlockCheck);
 
 	/*
 	 * Create and initialize all the PGPROC structures we'll need.  There are
@@ -763,6 +766,14 @@ ProcReleaseLocks(bool isCommit)
 {
 	if (!MyProc)
 		return;
+
+	/* Release deadlock detection flag is backend was interrupted inside deadlock check */
+	if (inside_deadlock_check)
+	{
+		pg_atomic_clear_flag(&ProcGlobal->activeDeadlockCheck);
+		inside_deadlock_check = false;
+	}
+
 	/* If waiting, get off wait queue (should only be needed after error) */
 	LockErrorCleanup();
 	/* Release standard locks, including session-level if aborting */
@@ -1666,6 +1677,18 @@ CheckDeadLock(void)
 {
 	int			i;
 
+
+	/*
+	 * Ensure that only one backend is checking for deadlock.
+	 * Otherwise under high load cascade of deadlock timeout expirations can cause stuck of Postgres.
+	 */
+	if (!pg_atomic_test_set_flag(&ProcGlobal->activeDeadlockCheck))
+	{
+		enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
+		return;
+	}
+	inside_deadlock_check = true;
+
 	/*
 	 * Acquire exclusive lock on the entire shared lock data structures. Must
 	 * grab LWLocks in partition-number order to avoid LWLock deadlock.
@@ -1741,6 +1764,9 @@ CheckDeadLock(void)
 check_done:
 	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
 		LWLockRelease(LockHashPartitionLockByIndex(i));
+
+	pg_atomic_clear_flag(&ProcGlobal->activeDeadlockCheck);
+	inside_deadlock_check = false;
 }
 
 /*
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 5c19a61..04de570 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -267,6 +267,8 @@ typedef struct PROC_HDR
 	int			startupProcPid;
 	/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
 	int			startupBufferPinWaitBufId;
+	/* Deadlock detection is in progress */
+	pg_atomic_flag activeDeadlockCheck;
 } PROC_HDR;
 
 extern PGDLLIMPORT PROC_HDR *ProcGlobal;

Re: Postgres stucks in deadlock detection

Reply via email to