[ https://issues.apache.org/jira/browse/HBASE-27552?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
chaijunjie updated HBASE-27552: ------------------------------- Description: 2023-01-05 19:56:41,385 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=102, ppid=97, state=SUCCESS; OpenRegionProcedure 1903713b7f970a75db1e7a0e72da21d7, server=node-master2mesq,21302,1672817611868 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,385 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=103, ppid=96, state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, server=node-master2mesq,21302,1672817611868 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,402 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=106, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=ImportTable1, region=050bcf6e15ddd079d750992bbfb53163, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,403 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=107, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,404 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=108, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,404 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=109, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=ImportTable1, region=24e0cb0a958d242976a790ff435d24b5, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,405 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=110, ppid=82, state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure table=ImportTable1, region=a2e7b85420a3cf98fc731ad93f7129a2, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) *2023-01-05 19:56:41,405 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=111, ppid=82, state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure table=hbase:namespace, region=9be1542260fa8af4a712ddda322b7b6f, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343)* 2023-01-05 19:56:41,406 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=112, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:rsgroup, region=eaf1531c6cc0738027def0b4d4615b5f, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,406 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=125, ppid=95, state=SUCCESS; OpenRegionProcedure 85301e5c14a8c3e5ba31822d7db0a6fc, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,407 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=126, ppid=96, state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,408 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=127, ppid=94, state=SUCCESS; OpenRegionProcedure 448b88d503d4e31c47b80ac10d8ef6a4, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | hdfs://hacluster/hbase/WALs/node-master2mesq,21302,1672919282543-splitting dir is empty, no logs to split. | org.apache.hadoop.hbase.master.SplitLogManager.getFileList(SplitLogManager.java:171) 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | node-master2mesq,21302,1672919282543 WAL count=0, meta=false | org.apache.hadoop.hbase.master.SplitWALManager.getWALsToSplit(SplitWALManager.java:106) *2023-01-05 19:57:27,068 | WARN | master/node-master3MPYe:21300:becomeActiveMaster | hbase:namespace,,1672387265579.9be1542260fa8af4a712ddda322b7b6f. is NOT online; state=\{9be1542260fa8af4a712ddda322b7b6f state=ABNORMALLY_CLOSED, ts=1672919843989, server=node-master1ficj,21302,1672820444411}; ServerCrashProcedures=true. Master startup cannot progress, in holding-pattern until region onlined. | org.apache.hadoop.hbase.master.HMaster.isRegionOnline(HMaster.java:1264)* 2023-01-05 19:57:27,227 | INFO | PEWorker-13 | Initialized subprocedures=[ {pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN} , {pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN} , {pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN} , {pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN} , {pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN} ] | org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1683) 2023-01-05 19:57:27,235 | INFO | PEWorker-14 | Took xlock for pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,236 | INFO | PEWorker-4 | Took xlock for pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,242 | INFO | PEWorker-16 | Took xlock for pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,245 | INFO | PEWorker-13 | Took xlock for pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,249 | INFO | PEWorker-15 | Took xlock for pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,250 | INFO | PEWorker-14 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,252 | INFO | PEWorker-14 | Starting pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,254 | INFO | PEWorker-4 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,257 | INFO | PEWorker-4 | Starting pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,266 | INFO | PEWorker-16 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,266 | INFO | PEWorker-13 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,267 | INFO | PEWorker-16 | Starting pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,267 | INFO | PEWorker-13 | Starting pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Starting pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,280 | INFO | PEWorker-19 | pid=606 updating hbase:meta row=8d29da5ef730c7a003cafb0be8981674, regionState=OPENING, regionLocation=node-master1ficj,21302,1672919746624 | org.apache.hadoop.hbase.master.assignment.RegionStateStore.updateUserRegionLocation(RegionStateStore.java:219) *The hbase:namespace is in ABNORMALLY_CLOSED state and the the proc is corrupt, then the hbase:namespace never assigned and HMaster Initialization failed* *I think we should process the regions in ABNORMALLY_CLOSED state in org.apache.hadoop.hbase.master.assignment.AssignmentManager#processOfflineRegions? or check proc in org.apache.hadoop.hbase.master.HMaster#waitForNamespaceOnline?* was: 2023-01-05 19:56:41,385 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=102, ppid=97, state=SUCCESS; OpenRegionProcedure 1903713b7f970a75db1e7a0e72da21d7, server=node-master2mesq,21302,1672817611868 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,385 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=103, ppid=96, state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, server=node-master2mesq,21302,1672817611868 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,402 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=106, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=ImportTable1, region=050bcf6e15ddd079d750992bbfb53163, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,403 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=107, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,404 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=108, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,404 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=109, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=ImportTable1, region=24e0cb0a958d242976a790ff435d24b5, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,405 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=110, ppid=82, state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure table=ImportTable1, region=a2e7b85420a3cf98fc731ad93f7129a2, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) *2023-01-05 19:56:41,405 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=111, ppid=82, state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure table=hbase:namespace, region=9be1542260fa8af4a712ddda322b7b6f, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343)* 2023-01-05 19:56:41,406 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=112, ppid=82, state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; TransitRegionStateProcedure table=hbase:rsgroup, region=eaf1531c6cc0738027def0b4d4615b5f, ASSIGN | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,406 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=125, ppid=95, state=SUCCESS; OpenRegionProcedure 85301e5c14a8c3e5ba31822d7db0a6fc, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,407 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=126, ppid=96, state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:56:41,408 | ERROR | master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=127, ppid=94, state=SUCCESS; OpenRegionProcedure 448b88d503d4e31c47b80ac10d8ef6a4, server=node-master3mpye,21302,1672817640502 | org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | hdfs://hacluster/hbase/WALs/node-master2mesq,21302,1672919282543-splitting dir is empty, no logs to split. | org.apache.hadoop.hbase.master.SplitLogManager.getFileList(SplitLogManager.java:171) 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | node-master2mesq,21302,1672919282543 WAL count=0, meta=false | org.apache.hadoop.hbase.master.SplitWALManager.getWALsToSplit(SplitWALManager.java:106) *2023-01-05 19:57:27,068 | WARN | master/node-master3MPYe:21300:becomeActiveMaster | hbase:namespace,,1672387265579.9be1542260fa8af4a712ddda322b7b6f. is NOT online; state=\{9be1542260fa8af4a712ddda322b7b6f state=ABNORMALLY_CLOSED, ts=1672919843989, server=node-master1ficj,21302,1672820444411}; ServerCrashProcedures=true. Master startup cannot progress, in holding-pattern until region onlined. | org.apache.hadoop.hbase.master.HMaster.isRegionOnline(HMaster.java:1264)* 2023-01-05 19:57:27,227 | INFO | PEWorker-13 | Initialized subprocedures=[ {pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN} , {pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN} , {pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN} , {pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN} , {pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN} ] | org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1683) 2023-01-05 19:57:27,235 | INFO | PEWorker-14 | Took xlock for pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,236 | INFO | PEWorker-4 | Took xlock for pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,242 | INFO | PEWorker-16 | Took xlock for pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,245 | INFO | PEWorker-13 | Took xlock for pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,249 | INFO | PEWorker-15 | Took xlock for pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN | org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) 2023-01-05 19:57:27,250 | INFO | PEWorker-14 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,252 | INFO | PEWorker-14 | Starting pid=606, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=8d29da5ef730c7a003cafb0be8981674, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,254 | INFO | PEWorker-4 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,257 | INFO | PEWorker-4 | Starting pid=607, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=hbase:hindex, region=6789443c0a98d2b34f891ae60878aac3, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,266 | INFO | PEWorker-16 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,266 | INFO | PEWorker-13 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,267 | INFO | PEWorker-16 | Starting pid=609, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=hbase:acl, region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,267 | INFO | PEWorker-13 | Starting pid=610, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=9051d9e17c5b811c09471616044ed8be, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Setting lastHost as the region location node-master2mesq,21302,1672919282543 | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Starting pid=608, ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; TransitRegionStateProcedure table=ImportTable1, region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN; state=OPEN, location=node-master2mesq,21302,1672919282543; forceNewPlan=false, retain=true | org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) 2023-01-05 19:57:27,280 | INFO | PEWorker-19 | pid=606 updating hbase:meta row=8d29da5ef730c7a003cafb0be8981674, regionState=OPENING, regionLocation=node-master1ficj,21302,1672919746624 | org.apache.hadoop.hbase.master.assignment.RegionStateStore.updateUserRegionLocation(RegionStateStore.java:219) *The hbase:namespace is in ABNORMALLY_CLOSED state and the the proc is corrupt, then the hbase:namespace never assigned and HMaster Initialization failed* > HMaster can not finish Initialization when hbase:namespace is in > ABNORMALLY_CLOSED state and the proc corrupt > ------------------------------------------------------------------------------------------------------------- > > Key: HBASE-27552 > URL: https://issues.apache.org/jira/browse/HBASE-27552 > Project: HBase > Issue Type: Bug > Components: proc-v2 > Affects Versions: 2.4.14 > Reporter: chaijunjie > Priority: Major > > 2023-01-05 19:56:41,385 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=102, ppid=97, > state=SUCCESS; OpenRegionProcedure 1903713b7f970a75db1e7a0e72da21d7, > server=node-master2mesq,21302,1672817611868 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,385 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=103, ppid=96, > state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, > server=node-master2mesq,21302,1672817611868 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,402 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=106, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=ImportTable1, > region=050bcf6e15ddd079d750992bbfb53163, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,403 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=107, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,404 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=108, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,404 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=109, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=ImportTable1, > region=24e0cb0a958d242976a790ff435d24b5, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,405 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=110, ppid=82, > state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure > table=ImportTable1, region=a2e7b85420a3cf98fc731ad93f7129a2, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > *2023-01-05 19:56:41,405 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=111, ppid=82, > state=RUNNABLE:REGION_STATE_TRANSITION_OPEN; TransitRegionStateProcedure > table=hbase:namespace, region=9be1542260fa8af4a712ddda322b7b6f, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343)* > 2023-01-05 19:56:41,406 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=112, ppid=82, > state=WAITING:REGION_STATE_TRANSITION_CONFIRM_OPENED; > TransitRegionStateProcedure table=hbase:rsgroup, > region=eaf1531c6cc0738027def0b4d4615b5f, ASSIGN | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,406 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=125, ppid=95, > state=SUCCESS; OpenRegionProcedure 85301e5c14a8c3e5ba31822d7db0a6fc, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,407 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=126, ppid=96, > state=SUCCESS; OpenRegionProcedure 6695b9c5ad80249bc43830ddc5259487, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > 2023-01-05 19:56:41,408 | ERROR | > master/node-master3MPYe:21300:becomeActiveMaster | Corrupt pid=127, ppid=94, > state=SUCCESS; OpenRegionProcedure 448b88d503d4e31c47b80ac10d8ef6a4, > server=node-master3mpye,21302,1672817640502 | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.handleCorrupted(ProcedureExecutor.java:343) > > 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | > hdfs://hacluster/hbase/WALs/node-master2mesq,21302,1672919282543-splitting > dir is empty, no logs to split. | > org.apache.hadoop.hbase.master.SplitLogManager.getFileList(SplitLogManager.java:171) > 2023-01-05 19:57:26,250 | INFO | PEWorker-13 | > node-master2mesq,21302,1672919282543 WAL count=0, meta=false | > org.apache.hadoop.hbase.master.SplitWALManager.getWALsToSplit(SplitWALManager.java:106) > *2023-01-05 19:57:27,068 | WARN | > master/node-master3MPYe:21300:becomeActiveMaster | > hbase:namespace,,1672387265579.9be1542260fa8af4a712ddda322b7b6f. is NOT > online; state=\{9be1542260fa8af4a712ddda322b7b6f state=ABNORMALLY_CLOSED, > ts=1672919843989, server=node-master1ficj,21302,1672820444411}; > ServerCrashProcedures=true. Master startup cannot progress, in > holding-pattern until region onlined. | > org.apache.hadoop.hbase.master.HMaster.isRegionOnline(HMaster.java:1264)* > 2023-01-05 19:57:27,227 | INFO | PEWorker-13 | Initialized subprocedures=[ > {pid=606, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN} > , > {pid=607, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN} > , > {pid=608, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN} > , > {pid=609, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN} > , > {pid=610, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN} > ] | > org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1683) > 2023-01-05 19:57:27,235 | INFO | PEWorker-14 | Took xlock for pid=606, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,236 | INFO | PEWorker-4 | Took xlock for pid=607, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,242 | INFO | PEWorker-16 | Took xlock for pid=609, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,245 | INFO | PEWorker-13 | Took xlock for pid=610, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,249 | INFO | PEWorker-15 | Took xlock for pid=608, > ppid=603, state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN | > org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler.waitRegions(MasterProcedureScheduler.java:727) > 2023-01-05 19:57:27,250 | INFO | PEWorker-14 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,252 | INFO | PEWorker-14 | Starting pid=606, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=8d29da5ef730c7a003cafb0be8981674, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,254 | INFO | PEWorker-4 | Setting lastHost as the region > location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,257 | INFO | PEWorker-4 | Starting pid=607, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=hbase:hindex, > region=6789443c0a98d2b34f891ae60878aac3, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,266 | INFO | PEWorker-16 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,266 | INFO | PEWorker-13 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,267 | INFO | PEWorker-16 | Starting pid=609, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=hbase:acl, > region=96a2ec5ea797e6847188c965f8c78ce1, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,267 | INFO | PEWorker-13 | Starting pid=610, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=9051d9e17c5b811c09471616044ed8be, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Setting lastHost as the > region location node-master2mesq,21302,1672919282543 | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:197) > 2023-01-05 19:57:27,271 | INFO | PEWorker-15 | Starting pid=608, ppid=603, > state=RUNNABLE:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, locked=true; > TransitRegionStateProcedure table=ImportTable1, > region=3117c3b77e4cf7a9f7ae9fa1aec87f08, ASSIGN; state=OPEN, > location=node-master2mesq,21302,1672919282543; forceNewPlan=false, > retain=true | > org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure.queueAssign(TransitRegionStateProcedure.java:201) > 2023-01-05 19:57:27,280 | INFO | PEWorker-19 | pid=606 updating hbase:meta > row=8d29da5ef730c7a003cafb0be8981674, regionState=OPENING, > regionLocation=node-master1ficj,21302,1672919746624 | > org.apache.hadoop.hbase.master.assignment.RegionStateStore.updateUserRegionLocation(RegionStateStore.java:219) > > *The hbase:namespace is in ABNORMALLY_CLOSED state and the the proc is > corrupt, then the hbase:namespace never assigned and HMaster Initialization > failed* > > *I think we should process the regions in ABNORMALLY_CLOSED state in > org.apache.hadoop.hbase.master.assignment.AssignmentManager#processOfflineRegions? > or check proc in > org.apache.hadoop.hbase.master.HMaster#waitForNamespaceOnline?* -- This message was sent by Atlassian Jira (v8.20.10#820010)