There's another class of parallel worker core dumps when testing master
with sqlsmith.  In these cases, the following assertion fails for all
workers simulataneously:

TRAP: FailedAssertion("!(mqh->mqh_partial_bytes <= nbytes)", File: "shm_mq.c", 
Line: 386)

The backtraces of the controlling process is always in
ExecShutdownGatherWorkers.  The queries always work fine on re-running,
so I guess there is some race condition on worker shutdown?  Backtraces
below.

regards
andreas

Core was generated by `postgres: bgworker: parallel worker for PID 30525       
'.
Program terminated with signal SIGABRT, Aborted.
#0  0x00007f5a3df91067 in __GI_raise (sig=sig@entry=6) at 
../nptl/sysdeps/unix/sysv/linux/raise.c:56
56      ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  0x00007f5a3df91067 in __GI_raise (sig=sig@entry=6) at 
../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1  0x00007f5a3df92448 in __GI_abort () at abort.c:89
#2  0x00000000007eabe1 in ExceptionalCondition 
(conditionName=conditionName@entry=0x984e10 "!(mqh->mqh_partial_bytes <= 
nbytes)", errorType=errorType@entry=0x82a75d "FailedAssertion", 
fileName=fileName@entry=0x984b8c "shm_mq.c", lineNumber=lineNumber@entry=386) 
at assert.c:54
#3  0x00000000006d8042 in shm_mq_sendv (mqh=0x25f17b8, 
iov=iov@entry=0x7ffc6352af00, iovcnt=iovcnt@entry=1, nowait=<optimized out>) at 
shm_mq.c:386
#4  0x00000000006d807d in shm_mq_send (mqh=<optimized out>, nbytes=<optimized 
out>, data=<optimized out>, nowait=<optimized out>) at shm_mq.c:327
#5  0x00000000005d96b9 in ExecutePlan (dest=0x25f1850, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, operation=CMD_SELECT, 
use_parallel_mode=<optimized out>, planstate=0x2612da8, estate=0x2612658) at 
execMain.c:1596
#6  standard_ExecutorRun (queryDesc=0x261a660, direction=<optimized out>, 
count=0) at execMain.c:338
#7  0x00000000005dc7cf in ParallelQueryMain (seg=<optimized out>, 
toc=0x7f5a3ea6c000) at execParallel.c:735
#8  0x00000000004e617b in ParallelWorkerMain (main_arg=<optimized out>) at 
parallel.c:1035
#9  0x0000000000683862 in StartBackgroundWorker () at bgworker.c:726
#10 0x000000000068e9a2 in do_start_bgworker (rw=0x2590760) at postmaster.c:5531
#11 maybe_start_bgworker () at postmaster.c:5706
#12 0x000000000046cbba in ServerLoop () at postmaster.c:1762
#13 0x000000000069081e in PostmasterMain (argc=argc@entry=4, 
argv=argv@entry=0x256d580) at postmaster.c:1298
#14 0x000000000046d80d in main (argc=4, argv=0x256d580) at main.c:228
(gdb) attach 30525
0x00007f5a3e044e33 in __epoll_wait_nocancel () at 
../sysdeps/unix/syscall-template.S:81
81      ../sysdeps/unix/syscall-template.S: No such file or directory.
(gdb) bt
#0  0x00007f5a3e044e33 in __epoll_wait_nocancel () at 
../sysdeps/unix/syscall-template.S:81
#1  0x00000000006d1b4e in WaitEventSetWaitBlock (nevents=1, 
occurred_events=0x7ffc6352aec0, cur_timeout=-1, set=0x44251c0) at latch.c:981
#2  WaitEventSetWait (set=set@entry=0x44251c0, timeout=timeout@entry=-1, 
occurred_events=occurred_events@entry=0x7ffc6352aec0, nevents=nevents@entry=1) 
at latch.c:935
#3  0x00000000006d1f96 in WaitLatchOrSocket (latch=0x7f5a3d898494, 
wakeEvents=wakeEvents@entry=1, sock=sock@entry=-1, timeout=timeout@entry=-1) at 
latch.c:347
#4  0x00000000006d205d in WaitLatch (latch=<optimized out>, 
wakeEvents=wakeEvents@entry=1, timeout=timeout@entry=-1) at latch.c:302
#5  0x00000000004e6d64 in WaitForParallelWorkersToFinish (pcxt=0x442d4e8) at 
parallel.c:537
#6  0x00000000005dcf84 in ExecParallelFinish (pei=0x441cab8) at 
execParallel.c:541
#7  0x00000000005eeead in ExecShutdownGatherWorkers (node=node@entry=0x3e3a070) 
at nodeGather.c:416
#8  0x00000000005ef389 in ExecShutdownGather (node=0x3e3a070) at 
nodeGather.c:430
#9  0x00000000005dd03d in ExecShutdownNode (node=0x3e3a070) at 
execProcnode.c:807
#10 0x000000000061ad73 in planstate_tree_walker (planstate=0x3e361a8, 
walker=0x5dd010 <ExecShutdownNode>, context=0x0) at nodeFuncs.c:3442
#11 0x000000000061ad73 in planstate_tree_walker (planstate=0xf323c30, 
walker=0x5dd010 <ExecShutdownNode>, context=0x0) at nodeFuncs.c:3442
#12 0x000000000061ad73 in planstate_tree_walker (planstate=0xf323960, 
walker=0x5dd010 <ExecShutdownNode>, context=0x0) at nodeFuncs.c:3442
#13 0x00000000005d96da in ExecutePlan (dest=0xb826868, direction=<optimized 
out>, numberTuples=0, sendTuples=<optimized out>, operation=CMD_SELECT, 
use_parallel_mode=<optimized out>, planstate=0xf323960, estate=0xf322b28) at 
execMain.c:1576
#14 standard_ExecutorRun (queryDesc=0xddca888, direction=<optimized out>, 
count=0) at execMain.c:338
#15 0x00000000006f6e88 in PortalRunSelect (portal=portal@entry=0x258ccc8, 
forward=forward@entry=1 '\001', count=0, count@entry=9223372036854775807, 
dest=dest@entry=0xb826868) at pquery.c:946
#16 0x00000000006f83ae in PortalRun (portal=0x258ccc8, 
count=9223372036854775807, isTopLevel=<optimized out>, dest=0xb826868, 
altdest=0xb826868, completionTag=0x7ffc6352b3d0 "") at pquery.c:787
#17 0x00000000006f5c63 in exec_simple_query (query_string=<optimized out>) at 
postgres.c:1094
#18 PostgresMain (argc=39374024, argv=0x25ed130, dbname=0x256e480 "regression", 
username=0x25ed308 "0\321^\002") at postgres.c:4059
#19 0x000000000046c8b2 in BackendRun (port=0x25935d0) at postmaster.c:4258
#20 BackendStartup (port=0x25935d0) at postmaster.c:3932
#21 ServerLoop () at postmaster.c:1690
#22 0x000000000069081e in PostmasterMain (argc=argc@entry=4, 
argv=argv@entry=0x256d580) at postmaster.c:1298
#23 0x000000000046d80d in main (argc=4, argv=0x256d580) at main.c:228


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to