Hi,

We have observed a random server crash (FailedAssertion), while running few
tests at our end. Stack-trace is attached.

By looking at the stack-trace, and as discussed it with my team members;
what we have observed that in SearchCatCacheList(), we are incrementing
refcount and then decrementing it at the end. However for some reason, if
we are in TRY() block (where we increment the refcount), and hit with any
interrupt, we failed to decrement the refcount due to which later we get
assertion failure.

To mimic the scenario, I have added a sleep in SearchCatCacheList() as
given below:

diff --git a/src/backend/utils/cache/catcache.c
b/src/backend/utils/cache/catcache.c
index e7e8e3b..eb6d4b5 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -1520,6 +1520,9 @@ SearchCatCacheList(CatCache *cache,
            hashValue = CatalogCacheComputeTupleHashValue(cache, ntp);
            hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets);

+           elog(INFO, "Sleeping for 0.1 seconds.");
+           pg_usleep(100000L); /* 0.1 seconds */
+
            bucket = &cache->cc_bucket[hashIndex];
            dlist_foreach(iter, bucket)
            {

And then followed these steps to get a server crash:

-- Terminal 1
DROP TYPE typ;
DROP FUNCTION func(x int);

CREATE TYPE typ AS (X VARCHAR(50), Y INT);

CREATE OR REPLACE FUNCTION func(x int) RETURNS int AS $$
DECLARE
  rec typ;
  var2 numeric;
BEGIN
  RAISE NOTICE 'Function Called.';
  REC.X := 'Hello';
  REC.Y := 0;

  IF (rec.Y + var2) = 0 THEN
    RAISE NOTICE 'Check Pass';
  END IF;

  RETURN 1;
END;
$$ LANGUAGE plpgsql;

SELECT pg_backend_pid();

SELECT func(1);

-- Terminal 2, should be run in parallel when SELECT func(1) is in progress
in terminal 1.
SELECT pg_terminate_backend(<pid of backend obtained in terminal 1>);


I thought it worth posting here to get others attention.

I have observed this on the master branch, but can also be reproducible on
back-branches.

Thanks
-- 
Jeevan Chalke
Principal Software Engineer, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
Program terminated with signal 6, Aborted.
#0  0x00007fdff78951d7 in raise () from /lib64/libc.so.6
Missing separate debuginfos, use: debuginfo-install 
audit-libs-2.6.5-3.el7_3.1.x86_64 cyrus-sasl-lib-2.1.26-20.el7_2.x86_64 
glibc-2.17-157.el7_3.1.x86_64 keyutils-libs-1.5.8-3.el7.x86_64 
krb5-libs-1.14.1-27.el7_3.x86_64 libcap-ng-0.7.5-4.el7.x86_64 
libcom_err-1.42.9-9.el7.x86_64 libgcc-4.8.5-11.el7.x86_64 
libicu-50.1.2-15.el7.x86_64 libselinux-2.5-6.el7.x86_64 
libstdc++-4.8.5-11.el7.x86_64 nspr-4.13.1-1.0.el7_3.x86_64 
nss-3.28.2-1.6.el7_3.x86_64 nss-softokn-freebl-3.16.2.3-14.4.el7.x86_64 
nss-util-3.28.2-1.1.el7_3.x86_64 openldap-2.4.40-13.el7.x86_64 
openssl-libs-1.0.1e-60.el7_3.1.x86_64 pam-1.1.8-18.el7.x86_64 
pcre-8.32-15.el7_2.1.x86_64 zlib-1.2.7-17.el7.x86_64
(gdb) bt
#0  0x00007fdff78951d7 in raise () from /lib64/libc.so.6
#1  0x00007fdff78968c8 in abort () from /lib64/libc.so.6
#2  0x00000000009d43ff in ExceptionalCondition (conditionName=0xc02255 
"!(ct->refcount == 0)", errorType=0xc020b4 "FailedAssertion", 
    fileName=0xc02060 "catcache.c", lineNumber=567) at assert.c:54
#3  0x00000000009b59e0 in AtEOXact_CatCache (isCommit=0 '\000') at 
catcache.c:567
#4  0x0000000000538a8d in AbortTransaction () at xact.c:2613
#5  0x000000000053a95e in AbortOutOfAnyTransaction () at xact.c:4271
#6  0x00000000009e8e99 in ShutdownPostgres (code=1, arg=0) at postinit.c:1146
#7  0x00000000008456e1 in shmem_exit (code=1) at ipc.c:228
#8  0x00000000008455d5 in proc_exit_prepare (code=1) at ipc.c:185
#9  0x0000000000845543 in proc_exit (code=1) at ipc.c:102
#10 0x00000000009d4bae in errfinish (dummy=0) at elog.c:543
#11 0x00000000008761b4 in ProcessInterrupts () at postgres.c:2882
#12 0x00000000009d4bea in errfinish (dummy=0) at elog.c:565
#13 0x00000000009d7097 in elog_finish (elevel=17, fmt=0xc02678 "Sleeping for 
0.1 seconds.") at elog.c:1378
#14 0x00000000009b765b in SearchCatCacheList (cache=0x27636d0, nkeys=1, 
v1=11604184, v2=0, v3=0, v4=0) at catcache.c:1523
#15 0x00000000009cbd20 in SearchSysCacheList (cacheId=37, nkeys=1, 
key1=11604184, key2=0, key3=0, key4=0) at syscache.c:1338
#16 0x000000000056e22e in OpernameGetCandidates (names=0x27f3b08, oprkind=98 
'b', missing_schema_ok=0 '\000') at namespace.c:1576
#17 0x00000000005f09a6 in oper (pstate=0x27f3ec0, opname=0x27f3b08, ltypeId=23, 
rtypeId=1700, noError=0 '\000', location=14) at parse_oper.c:414
#18 0x00000000005f132d in make_op (pstate=0x27f3ec0, opname=0x27f3b08, 
ltree=0x27f40f0, rtree=0x27f4128, last_srf=0x0, location=14) at parse_oper.c:778
#19 0x00000000005e5e8d in transformAExprOp (pstate=0x27f3ec0, a=0x27f3a60) at 
parse_expr.c:933
#20 0x00000000005e454e in transformExprRecurse (pstate=0x27f3ec0, 
expr=0x27f3a60) at parse_expr.c:217
#21 0x00000000005e5e44 in transformAExprOp (pstate=0x27f3ec0, a=0x27f3b78) at 
parse_expr.c:930
#22 0x00000000005e454e in transformExprRecurse (pstate=0x27f3ec0, 
expr=0x27f3b78) at parse_expr.c:217
#23 0x00000000005e4293 in transformExpr (pstate=0x27f3ec0, expr=0x27f3b78, 
exprKind=EXPR_KIND_SELECT_TARGET) at parse_expr.c:156
#24 0x00000000005f8b09 in transformTargetEntry (pstate=0x27f3ec0, 
node=0x27f3b78, expr=0x0, exprKind=EXPR_KIND_SELECT_TARGET, colname=0x0, 
resjunk=0 '\000')
    at parse_target.c:103
#25 0x00000000005f8cd1 in transformTargetList (pstate=0x27f3ec0, 
targetlist=0x27f3cd8, exprKind=EXPR_KIND_SELECT_TARGET) at parse_target.c:191
#26 0x00000000005a221e in transformSelectStmt (pstate=0x27f3ec0, 
stmt=0x27f3d10) at analyze.c:1214
#27 0x00000000005a0796 in transformStmt (pstate=0x27f3ec0, parseTree=0x27f3d10) 
at analyze.c:297
#28 0x00000000005a06c4 in transformOptionalSelectInto (pstate=0x27f3ec0, 
parseTree=0x27f3d10) at analyze.c:242
#29 0x00000000005a0582 in transformTopLevelStmt (pstate=0x27f3ec0, 
parseTree=0x27f3e28) at analyze.c:192
#30 0x0000000000872f73 in pg_analyze_and_rewrite_params (parsetree=0x27f3e28, 
query_string=0x27e6df8 "SELECT (rec.Y + var2) = 0", 
    parserSetup=0x7fdfe338e7c6 <plpgsql_parser_setup>, 
parserSetupArg=0x27e6d60, queryEnv=0x0) at postgres.c:700
#31 0x00000000006f9867 in _SPI_prepare_plan (src=0x27e6df8 "SELECT (rec.Y + 
var2) = 0", plan=0x7fff82089fd0) at spi.c:1792
#32 0x00000000006f7333 in SPI_prepare_params (src=0x27e6df8 "SELECT (rec.Y + 
var2) = 0", parserSetup=0x7fdfe338e7c6 <plpgsql_parser_setup>, 
    parserSetupArg=0x27e6d60, cursorOptions=256) at spi.c:549
#33 0x00007fdfe3397cf6 in exec_prepare_plan (estate=0x7fff8208a390, 
expr=0x27e6d60, cursorOptions=256) at pl_exec.c:3571
#34 0x00007fdfe339b19f in exec_eval_expr (estate=0x7fff8208a390, 
expr=0x27e6d60, isNull=0x7fff8208a127 "", rettype=0x7fff8208a0f4, 
rettypmod=0x7fff8208a0f0)
    at pl_exec.c:5181
#35 0x00007fdfe339b114 in exec_eval_boolean (estate=0x7fff8208a390, 
expr=0x27e6d60, isNull=0x7fff8208a127 "") at pl_exec.c:5153
#36 0x00007fdfe33944e2 in exec_stmt_if (estate=0x7fff8208a390, stmt=0x27e6f50) 
at pl_exec.c:1826
#37 0x00007fdfe3393dab in exec_stmt (estate=0x7fff8208a390, stmt=0x27e6f50) at 
pl_exec.c:1571
#38 0x00007fdfe3393c41 in exec_stmts (estate=0x7fff8208a390, stmts=0x27e67f8) 
at pl_exec.c:1522
#39 0x00007fdfe3393ae7 in exec_stmt_block (estate=0x7fff8208a390, 
block=0x27e7110) at pl_exec.c:1460
#40 0x00007fdfe3391b52 in plpgsql_exec_function (func=0x2748e70, 
fcinfo=0x27b6328, simple_eval_estate=0x0) at pl_exec.c:465
#41 0x00007fdfe338c13f in plpgsql_call_handler (fcinfo=0x27b6328) at 
pl_handler.c:258
#42 0x00000000006ad7c9 in ExecInterpExpr (state=0x27b6240, econtext=0x27b5ef0, 
isnull=0x7fff8208a807 "") at execExprInterp.c:650
#43 0x00000000006e6231 in ExecEvalExprSwitchContext (state=0x27b6240, 
econtext=0x27b5ef0, isNull=0x7fff8208a807 "")
    at ../../../src/include/executor/executor.h:307
#44 0x00000000006e629a in ExecProject (projInfo=0x27b6238) at 
../../../src/include/executor/executor.h:341
#45 0x00000000006e6462 in ExecResult (pstate=0x27b5dd8) at nodeResult.c:136
#46 0x00000000006bd465 in ExecProcNodeFirst (node=0x27b5dd8) at 
execProcnode.c:430
#47 0x00000000006b6168 in ExecProcNode (node=0x27b5dd8) at 
../../../src/include/executor/executor.h:249
#48 0x00000000006b887d in ExecutePlan (estate=0x27b5bc0, planstate=0x27b5dd8, 
use_parallel_mode=0 '\000', operation=CMD_SELECT, sendTuples=1 '\001', 
    numberTuples=0, direction=ForwardScanDirection, dest=0x278f148, 
execute_once=1 '\001') at execMain.c:1693
#49 0x00000000006b670a in standard_ExecutorRun (queryDesc=0x273b770, 
direction=ForwardScanDirection, count=0, execute_once=1 '\001') at 
execMain.c:362
#50 0x00000000006b6536 in ExecutorRun (queryDesc=0x273b770, 
direction=ForwardScanDirection, count=0, execute_once=1 '\001') at 
execMain.c:305
#51 0x00000000008797d4 in PortalRunSelect (portal=0x27c7170, forward=1 '\001', 
count=0, dest=0x278f148) at pquery.c:932
#52 0x0000000000879473 in PortalRun (portal=0x27c7170, 
count=9223372036854775807, isTopLevel=1 '\001', run_once=1 '\001', 
dest=0x278f148, altdest=0x278f148, 
    completionTag=0x7fff8208ab80 "") at pquery.c:773
#53 0x0000000000873663 in exec_simple_query (query_string=0x2793270 "SELECT 
func(-1);") at postgres.c:1099
#54 0x000000000087781b in PostgresMain (argc=1, argv=0x273f780, 
dbname=0x27136d0 "postgres", username=0x273f5e8 "jeevan") at postgres.c:4090
#55 0x00000000007dbfaa in BackendRun (port=0x2737cd0) at postmaster.c:4357
#56 0x00000000007db732 in BackendStartup (port=0x2737cd0) at postmaster.c:4029
#57 0x00000000007d7d2a in ServerLoop () at postmaster.c:1753
#58 0x00000000007d7369 in PostmasterMain (argc=3, argv=0x2711540) at 
postmaster.c:1361
#59 0x00000000007169f0 in main (argc=3, argv=0x2711540) at main.c:228
(gdb) 

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to