[ https://issues.apache.org/jira/browse/HAWQ-1618?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16494743#comment-16494743 ]
Hongxu Ma commented on HAWQ-1618: --------------------------------- This issue has already fixed in GPDB: https://github.com/greenplum-db/gpdb/commit/f0a0a593bde8cf0c9b9bbc79061a09f7164b54f7#diff-b37908413e016b54a45d549cf0539121 We should part this fix to HAWQ. > Segment panic at workfile_mgr_close_file() when transaction ROLLBACK > -------------------------------------------------------------------- > > Key: HAWQ-1618 > URL: https://issues.apache.org/jira/browse/HAWQ-1618 > Project: Apache HAWQ > Issue Type: Bug > Components: Query Execution > Reporter: Hongxu Ma > Assignee: Hongxu Ma > Priority: Major > Fix For: 2.4.0.0-incubating > > > Log: > {code} > 2018-05-23 15:49:14.843058 > UTC,"user","db",p179799,th401824032,"172.31.6.17","6935",2018-05-23 15:47:39 > UTC,1260445558,con25148,cmd7,seg21,slice82,,x1260445558,sx1,"ERROR","25M01","*canceling > MPP operation*",,,,,,"INSERT INTO ... > 2018-05-23 15:49:15.253671 UTC,,,p179799,th0,,,2018-05-23 15:47:39 > UTC,0,con25148,cmd7,seg21,slice82,,,,"PANIC","XX000","Unexpected internal > error: Segment process r > eceived signal SIGSEGV",,,,,,,0,,,,"1 0x8ce2a3 postgres gp_backtrace + 0xa3 > 2 0x8ce491 postgres <symbol not found> + 0x8ce491 > 3 0x7f2d147ae7e0 libpthread.so.0 <symbol not found> + 0x147ae7e0 > 4 0x91f4ad postgres workfile_mgr_close_file + 0xd > 5 0x90bc84 postgres <symbol not found> + 0x90bc84 > 6 0x4e6b60 postgres AbortTransaction + 0x240 > 7 0x4e75c5 postgres AbortCurrentTransaction + 0x25 > 8 0x7ed81a postgres PostgresMain + 0x6ea > 9 0x7a0c50 postgres <symbol not found> + 0x7a0c50 > 10 0x7a3a19 postgres PostmasterMain + 0x759 > 11 0x4a5309 postgres main + 0x519 > 12 0x7f2d13cead1d libc.so.6 __libc_start_main + 0xfd > 13 0x4a5389 postgres <symbol not found> + 0x4a5389" > {code} > > Core stack: > {code} > (gdb) bt > #0 0x00007f2d147ae6ab in raise () from libpthread.so.0 > #1 0x00000000008ce552 in SafeHandlerForSegvBusIll (postgres_signal_arg=11, > processName=<optimized out>) at elog.c:4573 > #2 <signal handler called> > #3 *workfile_mgr_close_file* (work_set=0x0, file=0x7f2ce96d2de0, > canReportError=canReportError@entry=0 '\000') at workfile_file.c:129 > #4 0x000000000090bc84 in *ntuplestore_cleanup* (fNormal=0 '\000', > canReportError=0 '\000', ts=0x21f4810) at tuplestorenew.c:654 > #5 XCallBack_NTS (event=event@entry=XACT_EVENT_ABORT, > nts=nts@entry=0x21f4810) at tuplestorenew.c:674 > #6 0x00000000004e6b60 in CallXactCallbacksOnce (event=<optimized out>) at > xact.c:3660 > #7 AbortTransaction () at xact.c:2871 > #8 0x00000000004e75c5 in AbortCurrentTransaction () at xact.c:3377 > #9 0x00000000007ed81a in PostgresMain (argc=<optimized out>, argv=<optimized > out>, argv@entry=0x182c900, username=0x17ddcd0 "user") at postgres.c:4648 > #10 0x00000000007a0c50 in BackendRun (port=0x17cfb10) at postmaster.c:5915 > #11 BackendStartup (port=0x17cfb10) at postmaster.c:5484 > #12 ServerLoop () at postmaster.c:2163 > #13 0x00000000007a3a19 in PostmasterMain (argc=<optimized out>, > argv=<optimized out>) at postmaster.c:1454 > #14 0x00000000004a5309 in main (argc=9, argv=0x1785d10) at main.c:226 > {code} > > Repro: > {code} > # create test table > drop table if exists testsisc; > create table testsisc (i1 int, i2 int, i3 int, i4 int); > insert into testsisc select i, i % 1000, i % 100000, i % 75 from > generate_series(0,19999) i; > drop table if exists to_insert_into; > create table to_insert_into as > with ctesisc as > (select count(i1) as c1,i3 as c2 from testsisc group by i3) > select t1.c1 as c11, t1.c2 as c12, t2.c1 as c21, t2.c2 as c22 > from ctesisc as t1, ctesisc as t2 > where t1.c1 = t2.c2 > limit 10; > # run a long time query > begin; > set gp_simex_run=on; > set gp_cte_sharing=on; > insert into to_insert_into > with ctesisc as > (select count(i1) as c1,i3 as c2 from testsisc group by i3) > select * > from ctesisc as t1, ctesisc as t2 > where t1.c1 = t2.c2; > commit; > {code} > Kill one segment process when the second query is running. Then will find > panic log in segment log. > -- This message was sent by Atlassian JIRA (v7.6.3#76005)