[ https://issues.apache.org/jira/browse/HAWQ-1117?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Devin Jia updated HAWQ-1117: ---------------------------- Description: after i upgrade hawq to 2.0.1 and build, the hawq cluster can't start. 1.configure and build: {quote} ./configure --prefix=/opt/hawq-build --enable-depend --enable-cassert --enable-debug make && make install {quote} 2. start error: {quote} [gpadmin@hmaster pg_log]$ more /home/gpadmin/hawq-data-directory/masterdd/pg_log/hawq-2016-10-20_133056.csv 2016-10-20 13:30:56.549712 CST,"gpadmin","template1",p3279,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","57P03","the database system is in recovery mode",,,,,,, 0,,"postmaster.c",2656, 2016-10-20 13:30:56.556630 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system was interrupted at 2016-10-20 13:22:51 CST",,,,,,,0,,"xlog.c",6229, 2016-10-20 13:30:56.558414 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","checkpoint record is at 0/857ED8",,,,,,,0,,"xlog.c",6306, 2016-10-20 13:30:56.558464 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","redo record is at 0/857ED8; undo record is at 0/0; shutdown TRUE",,,,,,,0,,"xlog.c",6340, 2016-10-20 13:30:56.558495 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next transaction ID: 0/963; next OID: 10896",,,,,,,0,,"xlog.c",6344, 2016-10-20 13:30:56.558522 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next MultiXactId: 1; next MultiXactOffset: 0",,,,,,,0,,"xlog.c",6347, 2016-10-20 13:30:56.558559 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system was not properly shut down; automatic recovery in progress",,,,,,,0,,"xlog.c",6436, 2016-10-20 13:30:56.563303 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero length at 0/857F28",,,,,,,0,,"xlog.c",4110, 2016-10-20 13:30:56.563348 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","no record for redo after checkpoint, skip redo and proceed for recovery pass",,,,,,,0,,"xlog.c",6500, 2016-10-20 13:30:56.563411 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","end of transaction log location is 0/857F28",,,,,,,0,,"xlog.c",6584, 2016-10-20 13:30:56.568795 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup pass 1. Proceeding to startup crash recovery passes 2 and 3.",,,,,,,0,,"xlog.c",681 8, 2016-10-20 13:30:56.580641 CST,,,p3281,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup crash recovery pass 2",,,,,,,0,,"xlog.c",6989, 2016-10-20 13:30:56.595325 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","recovery restart point at 0/857ED8",,,,,"xlog redo checkpoint: redo 0/857ED8; undo 0/0; tli 1; xid 0/ 963; oid 10896; multi 1; offset 0; shutdown REDO PASS 3 @ 0/857ED8; LSN 0/857F28: prev 0/857E88; xid 0: XLOG - checkpoint: redo 0/857ED8; undo 0/0; tli 1; xid 0/963; oid 10896; multi 1; offset 0; shutdown",,0,,"xlog.c",8331, 2016-10-20 13:30:56.595390 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero length at 0/857F28",,,,,,,0,,"xlog.c",4110, 2016-10-20 13:30:56.595477 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Oldest active transaction from prepared transactions 963",,,,,,,0,,"xlog.c",5998, 2016-10-20 13:30:56.603266 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system is ready",,,,,,,0,,"xlog.c",6024, 2016-10-20 13:30:56.603314 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:04 (with assert checking)",,,,,,,0,,"xlog.c",6034, 2016-10-20 13:30:56.607520 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup crash recovery pass 3",,,,,,,0,,"xlog.c",7133, 2016-10-20 13:30:56.632316 CST,,,p3283,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup integrity checking",,,,,,,0,,"xlog.c",7161, 2016-10-20 13:30:56.645485 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Resource manager starts accepting resource request. Listening normal socket port 5437. Total list ened 1 FDs.",,,,,,,0,,"resourcemanager.c",2495, 2016-10-20 13:30:56.645622 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Wait for HAWQ RM -1",,,,,,,0,,"resourcemanager.c",421, 2016-10-20 13:30:56.645632 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","HAWQ :: Received signal notification that HAWQ RM works now.",,,,,,,0,,"resourcemanager.c",429, 2016-10-20 13:30:56.645645 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:47 (with assert checking)",,,,,,,0,,"postmaster.c",3694, 2016-10-20 13:30:56.645654 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system is ready to accept connections","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux-gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:47 (with assert checking)",,,,,,0,,"postmast er.c",3701, 2016-10-20 13:30:56.647413 CST,"gpadmin","template1",p3291,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.647501 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when cleanup segment configuration catalog table, error code: 1, FAT AL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",418, 2016-10-20 13:30:56.648990 CST,"gpadmin","template1",p3292,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.650024 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when cleanup segment history catalog table, error code: 1, FATAL: i nvalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",486, {color:red}2016-10-20 13:30:56.651081 CST,"gpadmin","template1",p3293,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, {color} 2016-10-20 13:30:56.651301 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when add a new row into segment configuration catalog table, error c ode: 1, FATAL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",821, 2016-10-20 13:30:56.652237 CST,"gpadmin","template1",p3294,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.652626 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Resource manager failed to connect database when loadingrole specifications from pg_authid, e rror code 1, reason: FATAL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcemanager.c",1168, 2016-10-20 13:30:56.652642 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Fail to load queue and user definition.",,,,,,,0,,"resourcemanager.c",1130, 2016-10-20 13:30:56.740128 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"FATAL","XX000","Fail to load queue and user definition. (resourcemanager.c:496)",,"Process 3290 will wait for g p_debug_linger=120 seconds before termination. Note that its locks and other resources will not be released until then.",,,,,0,,"resourcemanager.c",496,"Stack trace: 1 0x9bf143 postgres errstart + 0x39f 2 0x9c16e4 postgres elog_finish + 0x119 3 0xa7c91c postgres ResManagerMainServer2ndPhase + 0x1c1 4 0xa7c436 postgres ResManagerMain + 0x567 5 0xa7c63e postgres ResManagerProcessStartup + 0x162 6 0x8834a3 postgres <symbol not found> + 0x8834a3 7 0x883e9c postgres <symbol not found> + 0x883e9c 8 0x8802a5 postgres <symbol not found> + 0x8802a5 9 0x87f35f postgres PostmasterMain + 0xf50 10 0x79ccab postgres main + 0x367 11 0x3b7781ed5d libc.so.6 __libc_start_main + 0xfd 12 0x4bcb59 postgres <symbol not found> + 0x4bcb59 " 2016-10-20 13:30:57.556199 CST,"gpadmin","template1",p3297,th-266811104,"[local]",,2016-10-20 13:30:57 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:58.562193 CST,"gpadmin","template1",p3298,th-266811104,"[local]",,2016-10-20 13:30:58 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:59.569291 CST,"gpadmin","template1",p3299,th-266811104,"[local]",,2016-10-20 13:30:59 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:00.575481 CST,"gpadmin","template1",p3300,th-266811104,"[local]",,2016-10-20 13:31:00 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:01.581725 CST,"gpadmin","template1",p3301,th-266811104,"[local]",,2016-10-20 13:31:01 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:02.592724 CST,"gpadmin","template1",p3302,th-266811104,"[local]",,2016-10-20 13:31:02 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:03.599184 CST,"gpadmin","template1",p3303,th-266811104,"[local]",,2016-10-20 13:31:03 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, --More--(7%) {quote} 3.the os process list {quote} [gpadmin@hmaster pg_log]$ ps -ef|grep postgres gpadmin 3277 1 0 13:30 ? 00:00:01 /opt/hawq-build/bin/postgres -D /home/gpadmin/hawq-data-directory/masterdd -i -M master -p 5432 --silent-mode=true gpadmin 3278 3277 0 13:30 ? 00:00:00 postgres: port 5432, master logger process gpadmin 3284 3277 0 13:30 ? 00:00:00 postgres: port 5432, stats collector process gpadmin 3285 3277 0 13:30 ? 00:00:00 postgres: port 5432, writer process gpadmin 3286 3277 0 13:30 ? 00:00:00 postgres: port 5432, checkpoint process gpadmin 3287 3277 0 13:30 ? 00:00:00 postgres: port 5432, seqserver process gpadmin 3288 3277 0 13:30 ? 00:00:00 postgres: port 5432, WAL Send Server process gpadmin 3289 3277 0 13:30 ? 00:00:00 postgres: port 5432, DFS Metadata Cache process {color:red}gpadmin 3797 3277 0 13:38 ? 00:00:00 postgres: port 5432, master resource manager con8 error exit in 0m 25s {color} gpadmin 3905 3215 0 13:40 pts/0 00:00:00 grep postgres {quote} was: after i upgrade hawq to 2.0.1 and build, the hawq cluster can't start. 1.configure and build: {quote} ./configure --prefix=/opt/hawq-build --enable-depend --enable-cassert --enable-debug make && make install {quote} 2. start error: {quote} [gpadmin@hmaster pg_log]$ more /home/gpadmin/hawq-data-directory/masterdd/pg_log/hawq-2016-10-20_133056.csv 2016-10-20 13:30:56.549712 CST,"gpadmin","template1",p3279,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","57P03","the database system is in recovery mode",,,,,,, 0,,"postmaster.c",2656, 2016-10-20 13:30:56.556630 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system was interrupted at 2016-10-20 13:22:51 CST",,,,,,,0,,"xlog.c",6229, 2016-10-20 13:30:56.558414 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","checkpoint record is at 0/857ED8",,,,,,,0,,"xlog.c",6306, 2016-10-20 13:30:56.558464 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","redo record is at 0/857ED8; undo record is at 0/0; shutdown TRUE",,,,,,,0,,"xlog.c",6340, 2016-10-20 13:30:56.558495 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next transaction ID: 0/963; next OID: 10896",,,,,,,0,,"xlog.c",6344, 2016-10-20 13:30:56.558522 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next MultiXactId: 1; next MultiXactOffset: 0",,,,,,,0,,"xlog.c",6347, 2016-10-20 13:30:56.558559 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system was not properly shut down; automatic recovery in progress",,,,,,,0,,"xlog.c",6436, 2016-10-20 13:30:56.563303 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero length at 0/857F28",,,,,,,0,,"xlog.c",4110, 2016-10-20 13:30:56.563348 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","no record for redo after checkpoint, skip redo and proceed for recovery pass",,,,,,,0,,"xlog.c",6500, 2016-10-20 13:30:56.563411 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","end of transaction log location is 0/857F28",,,,,,,0,,"xlog.c",6584, 2016-10-20 13:30:56.568795 CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup pass 1. Proceeding to startup crash recovery passes 2 and 3.",,,,,,,0,,"xlog.c",681 8, 2016-10-20 13:30:56.580641 CST,,,p3281,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup crash recovery pass 2",,,,,,,0,,"xlog.c",6989, 2016-10-20 13:30:56.595325 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","recovery restart point at 0/857ED8",,,,,"xlog redo checkpoint: redo 0/857ED8; undo 0/0; tli 1; xid 0/ 963; oid 10896; multi 1; offset 0; shutdown REDO PASS 3 @ 0/857ED8; LSN 0/857F28: prev 0/857E88; xid 0: XLOG - checkpoint: redo 0/857ED8; undo 0/0; tli 1; xid 0/963; oid 10896; multi 1; offset 0; shutdown",,0,,"xlog.c",8331, 2016-10-20 13:30:56.595390 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero length at 0/857F28",,,,,,,0,,"xlog.c",4110, 2016-10-20 13:30:56.595477 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Oldest active transaction from prepared transactions 963",,,,,,,0,,"xlog.c",5998, 2016-10-20 13:30:56.603266 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system is ready",,,,,,,0,,"xlog.c",6024, 2016-10-20 13:30:56.603314 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:04 (with assert checking)",,,,,,,0,,"xlog.c",6034, 2016-10-20 13:30:56.607520 CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup crash recovery pass 3",,,,,,,0,,"xlog.c",7133, 2016-10-20 13:30:56.632316 CST,,,p3283,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup integrity checking",,,,,,,0,,"xlog.c",7161, 2016-10-20 13:30:56.645485 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Resource manager starts accepting resource request. Listening normal socket port 5437. Total list ened 1 FDs.",,,,,,,0,,"resourcemanager.c",2495, 2016-10-20 13:30:56.645622 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Wait for HAWQ RM -1",,,,,,,0,,"resourcemanager.c",421, 2016-10-20 13:30:56.645632 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","HAWQ :: Received signal notification that HAWQ RM works now.",,,,,,,0,,"resourcemanager.c",429, 2016-10-20 13:30:56.645645 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:47 (with assert checking)",,,,,,,0,,"postmaster.c",3694, 2016-10-20 13:30:56.645654 CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system is ready to accept connections","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux-gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:47 (with assert checking)",,,,,,0,,"postmast er.c",3701, 2016-10-20 13:30:56.647413 CST,"gpadmin","template1",p3291,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.647501 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when cleanup segment configuration catalog table, error code: 1, FAT AL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",418, 2016-10-20 13:30:56.648990 CST,"gpadmin","template1",p3292,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.650024 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when cleanup segment history catalog table, error code: 1, FATAL: i nvalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",486, {color:red}2016-10-20 13:30:56.651081 CST,"gpadmin","template1",p3293,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, {color} 2016-10-20 13:30:56.651301 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to connect database when add a new row into segment configuration catalog table, error c ode: 1, FATAL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",821, 2016-10-20 13:30:56.652237 CST,"gpadmin","template1",p3294,th-266811104,"[local]",,2016-10-20 13:30:56 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:56.652626 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Resource manager failed to connect database when loadingrole specifications from pg_authid, e rror code 1, reason: FATAL: invalid command-line arguments for server process","HINT: Try ""postgres --help"" for more information.",,,,,,0,,"resourcemanager.c",1168, 2016-10-20 13:30:56.652642 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Fail to load queue and user definition.",,,,,,,0,,"resourcemanager.c",1130, 2016-10-20 13:30:56.740128 CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"FATAL","XX000","Fail to load queue and user definition. (resourcemanager.c:496)",,"Process 3290 will wait for g p_debug_linger=120 seconds before termination. Note that its locks and other resources will not be released until then.",,,,,0,,"resourcemanager.c",496,"Stack trace: 1 0x9bf143 postgres errstart + 0x39f 2 0x9c16e4 postgres elog_finish + 0x119 3 0xa7c91c postgres ResManagerMainServer2ndPhase + 0x1c1 4 0xa7c436 postgres ResManagerMain + 0x567 5 0xa7c63e postgres ResManagerProcessStartup + 0x162 6 0x8834a3 postgres <symbol not found> + 0x8834a3 7 0x883e9c postgres <symbol not found> + 0x883e9c 8 0x8802a5 postgres <symbol not found> + 0x8802a5 9 0x87f35f postgres PostmasterMain + 0xf50 10 0x79ccab postgres main + 0x367 11 0x3b7781ed5d libc.so.6 __libc_start_main + 0xfd 12 0x4bcb59 postgres <symbol not found> + 0x4bcb59 " 2016-10-20 13:30:57.556199 CST,"gpadmin","template1",p3297,th-266811104,"[local]",,2016-10-20 13:30:57 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:58.562193 CST,"gpadmin","template1",p3298,th-266811104,"[local]",,2016-10-20 13:30:58 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:30:59.569291 CST,"gpadmin","template1",p3299,th-266811104,"[local]",,2016-10-20 13:30:59 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:00.575481 CST,"gpadmin","template1",p3300,th-266811104,"[local]",,2016-10-20 13:31:00 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:01.581725 CST,"gpadmin","template1",p3301,th-266811104,"[local]",,2016-10-20 13:31:01 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:02.592724 CST,"gpadmin","template1",p3302,th-266811104,"[local]",,2016-10-20 13:31:02 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, 2016-10-20 13:31:03.599184 CST,"gpadmin","template1",p3303,th-266811104,"[local]",,2016-10-20 13:31:03 CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for server proce ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, --More--(7%) {quote} 3.the os process list {quote} [gpadmin@hmaster pg_log]$ ps -ef|grep postgres gpadmin 3277 1 0 13:30 ? 00:00:01 /opt/hawq-build/bin/postgres -D /home/gpadmin/hawq-data-directory/masterdd -i -M master -p 5432 --silent-mode=true gpadmin 3278 3277 0 13:30 ? 00:00:00 postgres: port 5432, master logger process gpadmin 3284 3277 0 13:30 ? 00:00:00 postgres: port 5432, stats collector process gpadmin 3285 3277 0 13:30 ? 00:00:00 postgres: port 5432, writer process gpadmin 3286 3277 0 13:30 ? 00:00:00 postgres: port 5432, checkpoint process gpadmin 3287 3277 0 13:30 ? 00:00:00 postgres: port 5432, seqserver process gpadmin 3288 3277 0 13:30 ? 00:00:00 postgres: port 5432, WAL Send Server process gpadmin 3289 3277 0 13:30 ? 00:00:00 postgres: port 5432, DFS Metadata Cache process gpadmin 3797 3277 0 13:38 ? 00:00:00 postgres: port 5432, master resource manager con8 error exit in 0m 25s gpadmin 3905 3215 0 13:40 pts/0 00:00:00 grep postgres {quote} > can't start hawq cluster > ------------------------- > > Key: HAWQ-1117 > URL: https://issues.apache.org/jira/browse/HAWQ-1117 > Project: Apache HAWQ > Issue Type: Bug > Components: Core > Reporter: Devin Jia > Assignee: Lei Chang > > after i upgrade hawq to 2.0.1 and build, the hawq cluster can't start. > 1.configure and build: > {quote} > ./configure --prefix=/opt/hawq-build --enable-depend --enable-cassert > --enable-debug > make && make install > {quote} > 2. start error: > {quote} > [gpadmin@hmaster pg_log]$ more > /home/gpadmin/hawq-data-directory/masterdd/pg_log/hawq-2016-10-20_133056.csv > 2016-10-20 13:30:56.549712 > CST,"gpadmin","template1",p3279,th-266811104,"[local]",,2016-10-20 13:30:56 > CST,0,,,seg-10000,,,,,"FATAL","57P03","the database system is in recovery > mode",,,,,,, > 0,,"postmaster.c",2656, > 2016-10-20 13:30:56.556630 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system > was interrupted at 2016-10-20 13:22:51 CST",,,,,,,0,,"xlog.c",6229, > 2016-10-20 13:30:56.558414 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","checkpoint > record is at 0/857ED8",,,,,,,0,,"xlog.c",6306, > 2016-10-20 13:30:56.558464 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","redo record is > at 0/857ED8; undo record is at 0/0; shutdown TRUE",,,,,,,0,,"xlog.c",6340, > 2016-10-20 13:30:56.558495 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next transaction > ID: 0/963; next OID: 10896",,,,,,,0,,"xlog.c",6344, > 2016-10-20 13:30:56.558522 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","next > MultiXactId: 1; next MultiXactOffset: 0",,,,,,,0,,"xlog.c",6347, > 2016-10-20 13:30:56.558559 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system > was not properly shut down; automatic recovery in > progress",,,,,,,0,,"xlog.c",6436, > 2016-10-20 13:30:56.563303 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero > length at 0/857F28",,,,,,,0,,"xlog.c",4110, > 2016-10-20 13:30:56.563348 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","no record for > redo after checkpoint, skip redo and proceed for recovery > pass",,,,,,,0,,"xlog.c",6500, > 2016-10-20 13:30:56.563411 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","end of > transaction log location is 0/857F28",,,,,,,0,,"xlog.c",6584, > 2016-10-20 13:30:56.568795 > CST,,,p3280,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup > pass 1. Proceeding to startup crash recovery passes 2 and > 3.",,,,,,,0,,"xlog.c",681 > 8, > 2016-10-20 13:30:56.580641 > CST,,,p3281,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup > crash recovery pass 2",,,,,,,0,,"xlog.c",6989, > 2016-10-20 13:30:56.595325 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","recovery restart > point at 0/857ED8",,,,,"xlog redo checkpoint: redo 0/857ED8; undo 0/0; tli 1; > xid 0/ > 963; oid 10896; multi 1; offset 0; shutdown > REDO PASS 3 @ 0/857ED8; LSN 0/857F28: prev 0/857E88; xid 0: XLOG - > checkpoint: redo 0/857ED8; undo 0/0; tli 1; xid 0/963; oid 10896; multi 1; > offset 0; shutdown",,0,,"xlog.c",8331, > 2016-10-20 13:30:56.595390 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","record with zero > length at 0/857F28",,,,,,,0,,"xlog.c",4110, > 2016-10-20 13:30:56.595477 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Oldest active > transaction from prepared transactions 963",,,,,,,0,,"xlog.c",5998, > 2016-10-20 13:30:56.603266 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system > is ready",,,,,,,0,,"xlog.c",6024, > 2016-10-20 13:30:56.603314 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL > 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on > x86_64-unknown-linux > -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on > Oct 20 2016 12:27:04 (with assert checking)",,,,,,,0,,"xlog.c",6034, > 2016-10-20 13:30:56.607520 > CST,,,p3282,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup > crash recovery pass 3",,,,,,,0,,"xlog.c",7133, > 2016-10-20 13:30:56.632316 > CST,,,p3283,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Finished startup > integrity checking",,,,,,,0,,"xlog.c",7161, > 2016-10-20 13:30:56.645485 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Resource > manager starts accepting resource request. Listening normal socket port 5437. > Total list > ened 1 FDs.",,,,,,,0,,"resourcemanager.c",2495, > 2016-10-20 13:30:56.645622 > CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","Wait for HAWQ RM > -1",,,,,,,0,,"resourcemanager.c",421, > 2016-10-20 13:30:56.645632 > CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","HAWQ :: Received > signal notification that HAWQ RM works now.",,,,,,,0,,"resourcemanager.c",429, > 2016-10-20 13:30:56.645645 > CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","PostgreSQL > 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build dev) on > x86_64-unknown-linux > -gnu, compiled by GCC gcc (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on > Oct 20 2016 12:27:47 (with assert checking)",,,,,,,0,,"postmaster.c",3694, > 2016-10-20 13:30:56.645654 > CST,,,p3277,th-266811104,,,,0,,,seg-10000,,,,,"LOG","00000","database system > is ready to accept connections","PostgreSQL 8.2.15 (Greenplum Database 4.2.0 > build 1) > (HAWQ 2.0.1.0 build dev) on x86_64-unknown-linux-gnu, compiled by GCC gcc > (GCC) 4.8.2 20140120 (Red Hat 4.8.2-15) compiled on Oct 20 2016 12:27:47 > (with assert checking)",,,,,,0,,"postmast > er.c",3701, > 2016-10-20 13:30:56.647413 > CST,"gpadmin","template1",p3291,th-266811104,"[local]",,2016-10-20 13:30:56 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:30:56.647501 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to > connect database when cleanup segment configuration catalog table, error > code: 1, FAT > AL: invalid command-line arguments for server process","HINT: Try > ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",418, > 2016-10-20 13:30:56.648990 > CST,"gpadmin","template1",p3292,th-266811104,"[local]",,2016-10-20 13:30:56 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:30:56.650024 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to > connect database when cleanup segment history catalog table, error code: 1, > FATAL: i > nvalid command-line arguments for server process","HINT: Try ""postgres > --help"" for more information.",,,,,,0,,"resourcepool.c",486, > {color:red}2016-10-20 13:30:56.651081 > CST,"gpadmin","template1",p3293,th-266811104,"[local]",,2016-10-20 13:30:56 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > {color} > 2016-10-20 13:30:56.651301 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Fail to > connect database when add a new row into segment configuration catalog table, > error c > ode: 1, FATAL: invalid command-line arguments for server process","HINT: > Try ""postgres --help"" for more information.",,,,,,0,,"resourcepool.c",821, > 2016-10-20 13:30:56.652237 > CST,"gpadmin","template1",p3294,th-266811104,"[local]",,2016-10-20 13:30:56 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:30:56.652626 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"WARNING","01000","Resource > manager failed to connect database when loadingrole specifications from > pg_authid, e > rror code 1, reason: FATAL: invalid command-line arguments for server > process","HINT: Try ""postgres --help"" for more > information.",,,,,,0,,"resourcemanager.c",1168, > 2016-10-20 13:30:56.652642 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"LOG","00000","Fail to load > queue and user definition.",,,,,,,0,,"resourcemanager.c",1130, > 2016-10-20 13:30:56.740128 > CST,,,p3290,th-266811104,,,,0,con4,,seg-10000,,,,,"FATAL","XX000","Fail to > load queue and user definition. (resourcemanager.c:496)",,"Process 3290 will > wait for g > p_debug_linger=120 seconds before termination. > Note that its locks and other resources will not be released until > then.",,,,,0,,"resourcemanager.c",496,"Stack trace: > 1 0x9bf143 postgres errstart + 0x39f > 2 0x9c16e4 postgres elog_finish + 0x119 > 3 0xa7c91c postgres ResManagerMainServer2ndPhase + 0x1c1 > 4 0xa7c436 postgres ResManagerMain + 0x567 > 5 0xa7c63e postgres ResManagerProcessStartup + 0x162 > 6 0x8834a3 postgres <symbol not found> + 0x8834a3 > 7 0x883e9c postgres <symbol not found> + 0x883e9c > 8 0x8802a5 postgres <symbol not found> + 0x8802a5 > 9 0x87f35f postgres PostmasterMain + 0xf50 > 10 0x79ccab postgres main + 0x367 > 11 0x3b7781ed5d libc.so.6 __libc_start_main + 0xfd > 12 0x4bcb59 postgres <symbol not found> + 0x4bcb59 > " > 2016-10-20 13:30:57.556199 > CST,"gpadmin","template1",p3297,th-266811104,"[local]",,2016-10-20 13:30:57 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:30:58.562193 > CST,"gpadmin","template1",p3298,th-266811104,"[local]",,2016-10-20 13:30:58 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:30:59.569291 > CST,"gpadmin","template1",p3299,th-266811104,"[local]",,2016-10-20 13:30:59 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:31:00.575481 > CST,"gpadmin","template1",p3300,th-266811104,"[local]",,2016-10-20 13:31:00 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:31:01.581725 > CST,"gpadmin","template1",p3301,th-266811104,"[local]",,2016-10-20 13:31:01 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:31:02.592724 > CST,"gpadmin","template1",p3302,th-266811104,"[local]",,2016-10-20 13:31:02 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > 2016-10-20 13:31:03.599184 > CST,"gpadmin","template1",p3303,th-266811104,"[local]",,2016-10-20 13:31:03 > CST,0,,,seg-10000,,,,,"FATAL","42601","invalid command-line arguments for > server proce > ss",,"Try ""postgres --help"" for more information.",,,,,0,,"postgres.c",4326, > --More--(7%) > {quote} > 3.the os process list > {quote} > [gpadmin@hmaster pg_log]$ ps -ef|grep postgres > gpadmin 3277 1 0 13:30 ? 00:00:01 > /opt/hawq-build/bin/postgres -D /home/gpadmin/hawq-data-directory/masterdd -i > -M master -p 5432 --silent-mode=true > gpadmin 3278 3277 0 13:30 ? 00:00:00 postgres: port 5432, > master logger process > > gpadmin 3284 3277 0 13:30 ? 00:00:00 postgres: port 5432, stats > collector process > > gpadmin 3285 3277 0 13:30 ? 00:00:00 postgres: port 5432, > writer process > > gpadmin 3286 3277 0 13:30 ? 00:00:00 postgres: port 5432, > checkpoint process > > gpadmin 3287 3277 0 13:30 ? 00:00:00 postgres: port 5432, > seqserver process > > gpadmin 3288 3277 0 13:30 ? 00:00:00 postgres: port 5432, WAL > Send Server process > > gpadmin 3289 3277 0 13:30 ? 00:00:00 postgres: port 5432, DFS > Metadata Cache process > > {color:red}gpadmin 3797 3277 0 13:38 ? 00:00:00 postgres: port > 5432, master resource manager con8 error exit in 0m 25s > {color} > gpadmin 3905 3215 0 13:40 pts/0 00:00:00 grep postgres > {quote} -- This message was sent by Atlassian JIRA (v6.3.4#6332)