[ https://issues.apache.org/jira/browse/HAWQ-1640?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
TaoJIn updated HAWQ-1640: ------------------------- Description: When client (such as pgbouncer,jdbc,zeppelin) connected to hawq and execute a long query,if the client connection interrupted before query finished,the server process will not exit until an hour later. This issue was happend in HAWQ 2.3.0.0-incubating.And set parameter gp_interconnect_transmit_timeout to 600(default 3600) will reduce the time to 10 minutes. When the query wa running,we could see its status in pg_stat_activty,but after it finished we could only saw the process id in pg_locks and OS process. We could saw some error log as below: $ tailf hawq-2018-07-04_063514.csv|grep p294 2018-07-04 08:13:29.595365 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID 5. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, 2018-07-04 08:13:29.595555 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID 5. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, 2018-07-04 08:15:58.706458 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Registered in HAWQ resource manager (By OID)",,,,,,"select * from cppayorderproduct",0,,"rmcomm_QD2RM.c",609, 2018-07-04 08:15:58.706640 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Acquired resource from resource manager, (256 MB, 0.062500 CORE) x 18.",,,,,,"select * from cppayorderproduct",0,,"rmcomm_QD2RM.c",868, 2018-07-04 09:04:56.190873 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","08006","could not send data to client: Connection reset by peer",,,,,,"select * from cppayorderproduct",0,,"pqcomm.c",1413, 2018-07-04 09:04:56.192347 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"FATAL","08006","connection to client lost",,,,,,"select * from cppayorderproduct",0,,"postgres.c",3606, 2018-07-04 10:04:56.306412 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_consume meets error, connection is bad.",,,,,,,0,,,, 2018-07-04 10:04:56.306535 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispmgt_thread_func_run(): fail to consume data. Will exit and clean up.",,,,,,,0,,,, 2018-07-04 10:04:56.309663 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.312741 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.315364 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.317885 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.320411 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.322998 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.327342 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.330034 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.332656 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.335257 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.337972 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.340634 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.343785 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.346309 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.350458 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.352760 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.354846 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.354996 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher thinks centos7-datanode3.centos7_hawq_network is down.",,,,,,,0,,"rmcomm_QD2RM.c",1209, 2018-07-04 10:04:56.355020 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher sends 1 failed host(s) to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1213, 2018-07-04 10:04:56.355398 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","succeed in sending failed host to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1232, 2018-07-04 10:04:56.356559 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, 2018-07-04 10:04:56.356869 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, 2018-07-04 10:04:56.356892 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","clean up communication to resource manager now.",,,,,,,0,,"rmcomm_QD2RM.c",460, 2018-07-04 10:04:56.379198 UTC,"dev","hdb",p294,th998237952,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","generateResourceRefreshHeartBeat exits.",,,,,,,0,,,, attched is the server process backtrace was: When client (such as pgbouncer,jdbc,zeppelin) connected to hawq and execute a long query,if the client connection interrupted before query finished,the server process will not exit until an hour later. This issue was happend in HAWQ 2.3.0.0-incubating.And set parameter gp_interconnect_transmit_timeout to 600(default 3600) will reduce the time to 10 minutes. When the query wa running,we could see its status in pg_stat_activty,but after it finished we could only saw the process id in pg_locks and OS process. We could saw some error log as below: $ tailf hawq-2018-07-04_063514.csv|grep p294 2018-07-04 08:13:29.595365 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID 5. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, 2018-07-04 08:13:29.595555 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID 5. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, 2018-07-04 08:15:58.706458 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Registered in HAWQ resource manager (By OID)",,,,,,"select * from cppayorderproduct",0,,"rmcomm_QD2RM.c",609, 2018-07-04 08:15:58.706640 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Acquired resource from resource manager, (256 MB, 0.062500 CORE) x 18.",,,,,,"select * from cppayorderproduct",0,,"rmcomm_QD2RM.c",868, 2018-07-04 09:04:56.190873 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","08006","could not send data to client: Connection reset by peer",,,,,,"select * from cppayorderproduct",0,,"pqcomm.c",1413, 2018-07-04 09:04:56.192347 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"FATAL","08006","connection to client lost",,,,,,"select * from cppayorderproduct",0,,"postgres.c",3606, 2018-07-04 10:04:56.306412 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_consume meets error, connection is bad.",,,,,,,0,,,, 2018-07-04 10:04:56.306535 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispmgt_thread_func_run(): fail to consume data. Will exit and clean up.",,,,,,,0,,,, 2018-07-04 10:04:56.309663 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.312741 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.315364 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.317885 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.320411 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.322998 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.327342 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.330034 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.332656 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.335257 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.337972 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.340634 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.343785 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.346309 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.350458 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.352760 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.354846 UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, 2018-07-04 10:04:56.354996 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher thinks centos7-datanode3.centos7_hawq_network is down.",,,,,,,0,,"rmcomm_QD2RM.c",1209, 2018-07-04 10:04:56.355020 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher sends 1 failed host(s) to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1213, 2018-07-04 10:04:56.355398 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","succeed in sending failed host to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1232, 2018-07-04 10:04:56.356559 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, 2018-07-04 10:04:56.356869 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID 6. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, 2018-07-04 10:04:56.356892 UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","clean up communication to resource manager now.",,,,,,,0,,"rmcomm_QD2RM.c",460, 2018-07-04 10:04:56.379198 UTC,"dev","hdb",p294,th998237952,"172.17.10.148","63974",2018-07-04 06:37:28 UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","generateResourceRefreshHeartBeat exits.",,,,,,,0,,,, [backtrace of the server process |https://imgur.com/Qm3QnDA] > process not exit after query finished immediately while client connection lost > ------------------------------------------------------------------------------ > > Key: HAWQ-1640 > URL: https://issues.apache.org/jira/browse/HAWQ-1640 > Project: Apache HAWQ > Issue Type: Bug > Components: Core > Reporter: TaoJIn > Assignee: Radar Lei > Priority: Major > Fix For: 2.3.0.0-incubating > > Attachments: gdb backtrace.jpeg > > > When client (such as pgbouncer,jdbc,zeppelin) connected to hawq and > execute a long query,if the client connection interrupted before query > finished,the server process will not exit until an hour later. > This issue was happend in HAWQ 2.3.0.0-incubating.And > set parameter gp_interconnect_transmit_timeout to 600(default 3600) will > reduce the time to 10 minutes. > When the query wa running,we could see its status in > pg_stat_activty,but after it finished we could only saw the process id > in pg_locks and OS process. > We could saw some error log as below: > $ tailf hawq-2018-07-04_063514.csv|grep p294 > 2018-07-04 08:13:29.595365 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID > 5. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, > 2018-07-04 08:13:29.595555 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58896,con19,cmd32,seg-1,,,x58896,sx1,"LOG","00000","ConnID > 5. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, > 2018-07-04 08:15:58.706458 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID > 6. Registered in HAWQ resource manager (By OID)",,,,,,"select * from > cppayorderproduct",0,,"rmcomm_QD2RM.c",609, > 2018-07-04 08:15:58.706640 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd34,seg-1,,,x58903,sx1,"LOG","00000","ConnID > 6. Acquired resource from resource manager, (256 MB, 0.062500 CORE) x > 18.",,,,,,"select * from cppayorderproduct",0,,"rmcomm_QD2RM.c",868, > 2018-07-04 09:04:56.190873 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","08006","could > not send data to client: Connection reset by peer",,,,,,"select * from > cppayorderproduct",0,,"pqcomm.c",1413, > 2018-07-04 09:04:56.192347 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"FATAL","08006","connection to > client lost",,,,,,"select * from cppayorderproduct",0,,"postgres.c",3606, > 2018-07-04 10:04:56.306412 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_consume meets error, connection is bad.",,,,,,,0,,,, > 2018-07-04 10:04:56.306535 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispmgt_thread_func_run(): > > fail to consume data. Will exit and clean up.",,,,,,,0,,,, > 2018-07-04 10:04:56.309663 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.312741 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.315364 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.317885 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.320411 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.322998 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.327342 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.330034 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.332656 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.335257 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.337972 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.340634 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.343785 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.346309 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.350458 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.352760 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.354846 > UTC,"dev","hdb",p294,th1627412224,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","function > executormgr_cancel calling executormgr_catch_error",,,,,,,0,,,, > 2018-07-04 10:04:56.354996 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher > thinks centos7-datanode3.centos7_hawq_network is > down.",,,,,,,0,,"rmcomm_QD2RM.c",1209, > 2018-07-04 10:04:56.355020 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","dispatcher sends > 1 failed host(s) to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1213, > 2018-07-04 10:04:56.355398 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","succeed > in sending failed host to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",1232, > 2018-07-04 10:04:56.356559 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID > 6. Returned resource to resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",951, > 2018-07-04 10:04:56.356869 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,58903,con19,cmd35,seg-1,,,x58903,sx1,"LOG","00000","ConnID > 6. Unregistered from HAWQ resource manager.",,,,,,,0,,"rmcomm_QD2RM.c",661, > 2018-07-04 10:04:56.356892 > UTC,"dev","hdb",p294,th1628359104,"172.17.10.148","63974",2018-07-04 > 06:37:28 UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","clean up > communication to resource manager now.",,,,,,,0,,"rmcomm_QD2RM.c",460, > 2018-07-04 10:04:56.379198 > UTC,"dev","hdb",p294,th998237952,"172.17.10.148","63974",2018-07-04 > 06:37:28 > UTC,0,con19,cmd35,seg-1,,,,,"LOG","00000","generateResourceRefreshHeartBeat > exits.",,,,,,,0,,,, > attched is the server process backtrace -- This message was sent by Atlassian JIRA (v7.6.3#76005)