A production instance crashed like so.

< 2023-07-20 05:41:36.557 MDT  >LOG:  server process (PID 106001) was 
terminated by signal 11: Segmentation fault

[168417.649549] postmaster[106001]: segfault at 12 ip 00000000008f1d40 sp 
00007ffe885502e0 error 4 in postgres[400000+7dc000]

Core was generated by `postgres: main main 127.0.0.1(51936) SELECT            '.

(gdb) bt
#0  CachedPlanIsSimplyValid (plansource=0x2491398, plan=0x0, owner=0x130a198) 
at plancache.c:1441
#1  0x00007fcab2e93d15 in exec_eval_simple_expr (rettypmod=0x7ffe88550374, 
rettype=0x7ffe88550370, isNull=0x7ffe8855036f, result=<synthetic pointer>, 
expr=0x2489dc8, estate=0x7ffe885507f0) at pl_exec.c:6067
#2  exec_eval_expr (estate=0x7ffe885507f0, expr=0x2489dc8, 
isNull=0x7ffe8855036f, rettype=0x7ffe88550370, rettypmod=0x7ffe88550374) at 
pl_exec.c:5696
#3  0x00007fcab2e97d42 in exec_assign_expr (estate=estate@entry=0x7ffe885507f0, 
target=0x16250f8, expr=0x2489dc8) at pl_exec.c:5028
#4  0x00007fcab2e98675 in exec_stmt_assign (stmt=0x2489d80, stmt=0x2489d80, 
estate=0x7ffe885507f0) at pl_exec.c:2155
#5  exec_stmts (estate=estate@entry=0x7ffe885507f0, stmts=0x2489ad8) at 
pl_exec.c:2019
#6  0x00007fcab2e9b672 in exec_stmt_block (estate=estate@entry=0x7ffe885507f0, 
block=block@entry=0x248af48) at pl_exec.c:1942
#7  0x00007fcab2e9b6cb in exec_toplevel_block 
(estate=estate@entry=0x7ffe885507f0, block=0x248af48) at pl_exec.c:1633
#8  0x00007fcab2e9bf84 in plpgsql_exec_function (func=func@entry=0x1035388, 
fcinfo=fcinfo@entry=0x40a76b8, simple_eval_estate=simple_eval_estate@entry=0x0, 
simple_eval_resowner=simple_eval_resowner@entry=0x0,
    procedure_resowner=procedure_resowner@entry=0x0, atomic=atomic@entry=true) 
at pl_exec.c:622
#9  0x00007fcab2ea7454 in plpgsql_call_handler (fcinfo=0x40a76b8) at 
pl_handler.c:277
#10 0x0000000000658307 in ExecAggPlainTransByRef (setno=<optimized out>, 
aggcontext=<optimized out>, pergroup=0x41a3358, pertrans=<optimized out>, 
aggstate=<optimized out>) at execExprInterp.c:4379
#11 ExecInterpExpr (state=0x40a7870, econtext=0x1843a58, isnull=<optimized 
out>) at execExprInterp.c:1770
#12 0x0000000000670282 in ExecEvalExprSwitchContext (isNull=0x7ffe88550b47, 
econtext=<optimized out>, state=<optimized out>) at 
../../../src/include/executor/executor.h:341
#13 advance_aggregates (aggstate=<optimized out>, aggstate=<optimized out>) at 
nodeAgg.c:824
#14 agg_fill_hash_table (aggstate=0x1843630) at nodeAgg.c:2544
#15 ExecAgg (pstate=0x1843630) at nodeAgg.c:2154
#16 0x0000000000662c58 in ExecProcNodeInstr (node=0x1843630) at 
execProcnode.c:480
#17 0x000000000067c7f3 in ExecProcNode (node=0x1843630) at 
../../../src/include/executor/executor.h:259
#18 ExecHashJoinImpl (parallel=false, pstate=0x1843390) at nodeHashjoin.c:268
#19 ExecHashJoin (pstate=0x1843390) at nodeHashjoin.c:621
#20 0x0000000000662c58 in ExecProcNodeInstr (node=0x1843390) at 
execProcnode.c:480
#21 0x000000000067c7f3 in ExecProcNode (node=0x1843390) at 
../../../src/include/executor/executor.h:259
#22 ExecHashJoinImpl (parallel=false, pstate=0x18430f0) at nodeHashjoin.c:268
#23 ExecHashJoin (pstate=0x18430f0) at nodeHashjoin.c:621
#24 0x0000000000662c58 in ExecProcNodeInstr (node=0x18430f0) at 
execProcnode.c:480
#25 0x000000000068de76 in ExecProcNode (node=0x18430f0) at 
../../../src/include/executor/executor.h:259
#26 ExecSort (pstate=0x1842ee0) at nodeSort.c:149
#27 0x0000000000662c58 in ExecProcNodeInstr (node=0x1842ee0) at 
execProcnode.c:480
#28 0x000000000066ced9 in ExecProcNode (node=0x1842ee0) at 
../../../src/include/executor/executor.h:259
#29 fetch_input_tuple (aggstate=aggstate@entry=0x1842908) at nodeAgg.c:563
#30 0x00000000006701b2 in agg_retrieve_direct (aggstate=0x1842908) at 
nodeAgg.c:2346
#31 ExecAgg (pstate=0x1842908) at nodeAgg.c:2161
#32 0x0000000000662c58 in ExecProcNodeInstr (node=0x1842908) at 
execProcnode.c:480
#33 0x000000000065be12 in ExecProcNode (node=0x1842908) at 
../../../src/include/executor/executor.h:259
#34 ExecutePlan (execute_once=<optimized out>, dest=0x5fb39f8, 
direction=<optimized out>, numberTuples=0, sendTuples=true, 
operation=CMD_SELECT, use_parallel_mode=<optimized out>, planstate=0x1842908, 
estate=0x1842298)
    at execMain.c:1636
#35 standard_ExecutorRun (queryDesc=0x2356ab8, direction=<optimized out>, 
count=0, execute_once=<optimized out>) at execMain.c:363
#36 0x00007fcab32bd67d in pgss_ExecutorRun (queryDesc=0x2356ab8, 
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at 
pg_stat_statements.c:1010
#37 0x00007fcab30b7781 in explain_ExecutorRun (queryDesc=0x2356ab8, 
direction=ForwardScanDirection, count=0, execute_once=<optimized out>) at 
auto_explain.c:320
#38 0x00000000007d010e in PortalRunSelect (portal=portal@entry=0xf79598, 
forward=forward@entry=true, count=0, count@entry=9223372036854775807, 
dest=dest@entry=0x5fb39f8) at pquery.c:924
#39 0x00000000007d15bf in PortalRun (portal=<optimized out>, 
count=9223372036854775807, isTopLevel=<optimized out>, run_once=<optimized 
out>, dest=0x5fb39f8, altdest=0x5fb39f8, qc=0x7ffe885512d0) at pquery.c:768
#40 0x00000000007cd417 in exec_simple_query (
    query_string=0x15ee788 "-- report: thresrept: None: E-UTRAN/eNB KPIs: 
['2023-07-19 2023-07-20', '0 23', '4', '{\"report_type\": \"hourly\", 
\"busy_hour_granularity\": \"\", \"metric\": 
\"ue_context_abnorm_rel_enb_all_pct\"}']\n--BEGIN SQ"...) at postgres.c:1250
#41 0x00000000007cda17 in PostgresMain (dbname=<optimized out>, 
username=<optimized out>) at postgres.c:4593
#42 0x00000000004937f7 in BackendRun (port=<optimized out>, port=<optimized 
out>) at postmaster.c:4511
#43 BackendStartup (port=0xf19cb0) at postmaster.c:4239
#44 ServerLoop () at postmaster.c:1806
#45 0x000000000074ba0d in PostmasterMain (argc=argc@entry=3, 
argv=argv@entry=0xef1300) at postmaster.c:1478
#46 0x0000000000494666 in main (argc=3, argv=0xef1300) at main.c:202

(gdb) p *plansource
$1 = {magic = 195726186, raw_parse_tree = 0x24914a8, query_string = 0x2491950 
"c := array_upper(x, 1)", commandTag = CMDTAG_SELECT, param_types = 0x0, 
num_params = 0, parserSetup = 0x7fcab2e8efb0 <plpgsql_parser_setup>, 
  parserSetupArg = 0x2489dc8, cursor_options = 0, fixed_result = false, 
resultDesc = 0x2491980, context = 0x2491280, query_list = 0x2491bb8, 
relationOids = 0x0, invalItems = 0x0, search_path = 0x2491f68, 
  query_context = 0x2491aa0, rewriteRoleId = 16714, rewriteRowSecurity = true, 
dependsOnRLS = false, gplan = 0x0, is_oneshot = false, is_complete = true, 
is_saved = true, is_valid = true, generation = 30, node = {
    prev = 0x248d7b0, next = 0x24930a0}, generic_cost = 0.012500000000000001, 
total_custom_cost = 0, num_custom_plans = 0, num_generic_plans = 30}

Note that the prior query seems to have timed out in the same / similar plpgsql
statement:

65/138 E-UTRAN/eNB KPIs: e-UTRAN: hourly: _ON_AIR_FILTER_=Any Status: failed to 
run: ERROR:  canceling statement due to statement timeout
CONTEXT:  PL/pgSQL assignment "c := array_upper(x, 1)"
PL/pgSQL function array_add(anycompatiblearray,anycompatiblearray) line 12 at 
assignment

65/138 E-UTRAN/eNB KPIs: e-UTRAN: hourly: _ON_AIR_FILTER_=Off-Air: failed to 
run: connection to server at "database" (127.0.0.1), port 5432 failed: FATAL:  
the database system is in recovery mode

I haven't tried yet, but the crash is probably not consistently reproducible -
it would've run the same report yesterday and the day before, etc.

For some reason, gdb doesn't show source code in "list".  I think I've fixed
this before, but now cannot remember how.

-- 
Justin


Reply via email to