[ https://issues.apache.org/jira/browse/HDFS-12749?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16691789#comment-16691789 ]
Hadoop QA commented on HDFS-12749: ---------------------------------- | (x) *{color:red}-1 overall{color}* | \\ \\ || Vote || Subsystem || Runtime || Comment || | {color:blue}0{color} | {color:blue} reexec {color} | {color:blue} 0m 21s{color} | {color:blue} Docker mode activated. {color} | || || || || {color:brown} Prechecks {color} || | {color:green}+1{color} | {color:green} @author {color} | {color:green} 0m 0s{color} | {color:green} The patch does not contain any @author tags. {color} | | {color:green}+1{color} | {color:green} test4tests {color} | {color:green} 0m 0s{color} | {color:green} The patch appears to include 1 new or modified test files. {color} | || || || || {color:brown} trunk Compile Tests {color} || | {color:red}-1{color} | {color:red} mvninstall {color} | {color:red} 5m 3s{color} | {color:red} root in trunk failed. {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 1m 4s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} checkstyle {color} | {color:green} 0m 51s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 1m 1s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 12m 41s{color} | {color:green} branch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 1m 59s{color} | {color:green} trunk passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 0m 47s{color} | {color:green} trunk passed {color} | || || || || {color:brown} Patch Compile Tests {color} || | {color:green}+1{color} | {color:green} mvninstall {color} | {color:green} 1m 2s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} compile {color} | {color:green} 0m 52s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javac {color} | {color:green} 0m 52s{color} | {color:green} the patch passed {color} | | {color:orange}-0{color} | {color:orange} checkstyle {color} | {color:orange} 0m 43s{color} | {color:orange} hadoop-hdfs-project/hadoop-hdfs: The patch generated 2 new + 47 unchanged - 0 fixed = 49 total (was 47) {color} | | {color:green}+1{color} | {color:green} mvnsite {color} | {color:green} 0m 56s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} whitespace {color} | {color:green} 0m 0s{color} | {color:green} The patch has no whitespace issues. {color} | | {color:green}+1{color} | {color:green} shadedclient {color} | {color:green} 11m 48s{color} | {color:green} patch has no errors when building and testing our client artifacts. {color} | | {color:green}+1{color} | {color:green} findbugs {color} | {color:green} 1m 57s{color} | {color:green} the patch passed {color} | | {color:green}+1{color} | {color:green} javadoc {color} | {color:green} 0m 44s{color} | {color:green} the patch passed {color} | || || || || {color:brown} Other Tests {color} || | {color:red}-1{color} | {color:red} unit {color} | {color:red}214m 29s{color} | {color:red} hadoop-hdfs in the patch failed. {color} | | {color:green}+1{color} | {color:green} asflicense {color} | {color:green} 0m 34s{color} | {color:green} The patch does not generate ASF License warnings. {color} | | {color:black}{color} | {color:black} {color} | {color:black}256m 30s{color} | {color:black} {color} | \\ \\ || Reason || Tests || | Failed junit tests | hadoop.hdfs.protocol.datatransfer.sasl.TestSaslDataTransfer | | | hadoop.hdfs.server.namenode.TestAuditLoggerWithCommands | | | hadoop.hdfs.server.blockmanagement.TestUnderReplicatedBlocks | | | hadoop.hdfs.server.namenode.TestReconstructStripedBlocks | | | hadoop.hdfs.server.namenode.TestFileContextAcl | | | hadoop.hdfs.TestClientReportBadBlock | | | hadoop.hdfs.TestReconstructStripedFileWithRandomECPolicy | | | hadoop.hdfs.TestDFSStripedOutputStream | | | hadoop.hdfs.TestReservedRawPaths | | | hadoop.hdfs.server.mover.TestMover | | | hadoop.hdfs.server.sps.TestExternalStoragePolicySatisfier | | | hadoop.hdfs.server.namenode.TestFsck | | | hadoop.hdfs.server.namenode.snapshot.TestINodeFileUnderConstructionWithSnapshot | | | hadoop.hdfs.TestUnsetAndChangeDirectoryEcPolicy | | | hadoop.hdfs.server.namenode.TestDiskspaceQuotaUpdate | | | hadoop.hdfs.server.namenode.metrics.TestNameNodeMetrics | | | hadoop.hdfs.server.namenode.TestBackupNode | | | hadoop.hdfs.client.impl.TestClientBlockVerification | | | hadoop.hdfs.tools.TestViewFSStoragePolicyCommands | | | hadoop.hdfs.server.blockmanagement.TestBlockTokenWithDFS | | | hadoop.hdfs.server.namenode.TestNamenodeRetryCache | | | hadoop.hdfs.server.namenode.TestReencryptionWithKMS | | | hadoop.hdfs.server.namenode.snapshot.TestRenameWithSnapshots | | | hadoop.hdfs.server.namenode.TestCheckpoint | | | hadoop.hdfs.server.datanode.TestDataNodeReconfiguration | | | hadoop.hdfs.client.impl.TestBlockReaderLocal | | | hadoop.hdfs.TestAppendSnapshotTruncate | | | hadoop.hdfs.server.namenode.TestFSImage | | | hadoop.hdfs.TestFileStatus | | | hadoop.hdfs.TestErasureCodingMultipleRacks | | | hadoop.hdfs.web.TestWebHdfsTimeouts | | | hadoop.hdfs.TestDistributedFileSystemWithECFile | | | hadoop.hdfs.TestParallelUnixDomainRead | | | hadoop.hdfs.TestFileAppend4 | | | hadoop.hdfs.TestDFSStripedOutputStreamWithFailureWithRandomECPolicy | | | hadoop.hdfs.shortcircuit.TestShortCircuitLocalRead | | | hadoop.hdfs.TestErasureCodingPolicies | | | hadoop.hdfs.server.namenode.TestNamenodeCapacityReport | | | hadoop.hdfs.server.namenode.TestAddBlockRetry | | | hadoop.hdfs.TestSafeMode | | | hadoop.hdfs.server.namenode.snapshot.TestOpenFilesWithSnapshot | | | hadoop.hdfs.TestDFSStripedOutputStreamWithFailure | | | hadoop.hdfs.server.datanode.TestIncrementalBlockReports | | | hadoop.hdfs.TestErasureCodingPoliciesWithRandomECPolicy | | | hadoop.hdfs.TestDFSStripedOutputStreamWithRandomECPolicy | | | hadoop.hdfs.TestFileCreation | | | hadoop.hdfs.TestEncryptionZonesWithHA | | | hadoop.hdfs.server.namenode.sps.TestStoragePolicySatisfierWithStripedFile | | | hadoop.hdfs.TestEncryptionZonesWithKMS | | | hadoop.hdfs.TestMiniDFSCluster | | | hadoop.hdfs.server.namenode.TestAuditLogs | | | hadoop.hdfs.TestDFSShell | | | hadoop.hdfs.TestFileAppend2 | | | hadoop.hdfs.TestDatanodeRegistration | | | hadoop.hdfs.server.namenode.TestFSEditLogLoader | | | hadoop.hdfs.TestDFSStorageStateRecovery | | | hadoop.hdfs.TestFileChecksum | | | hadoop.hdfs.server.namenode.TestCacheDirectives | | | hadoop.hdfs.TestDFSInputStream | | | hadoop.hdfs.client.impl.TestBlockReaderFactory | | | hadoop.hdfs.TestHDFSFileSystemContract | | | hadoop.hdfs.TestFileChecksumCompositeCrc | | | hadoop.hdfs.TestRead | | | hadoop.hdfs.TestReadStripedFileWithMissingBlocks | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshotDeletion | | | hadoop.hdfs.TestDatanodeLayoutUpgrade | | | hadoop.hdfs.server.namenode.TestNameEditsConfigs | | | hadoop.hdfs.server.namenode.TestNameNodeStatusMXBean | | | hadoop.hdfs.qjournal.server.TestJournalNodeSync | | | hadoop.hdfs.server.namenode.ha.TestRetryCacheWithHA | | | hadoop.hdfs.web.TestWebHdfsWithRestCsrfPreventionFilter | | | hadoop.hdfs.tools.TestDFSAdmin | | | hadoop.hdfs.TestEncryptedTransfer | | | hadoop.hdfs.server.blockmanagement.TestBlockTokenWithDFSStriped | | | hadoop.hdfs.server.blockmanagement.TestBlocksWithNotEnoughRacks | | | hadoop.hdfs.server.diskbalancer.command.TestDiskBalancerCommand | | | hadoop.hdfs.server.namenode.TestAddOverReplicatedStripedBlocks | | | hadoop.hdfs.server.namenode.TestFSImageWithSnapshot | | | hadoop.hdfs.server.namenode.snapshot.TestRandomOpsWithSnapshots | | | hadoop.hdfs.client.impl.TestBlockReaderLocalLegacy | | | hadoop.hdfs.server.namenode.snapshot.TestAclWithSnapshot | | | hadoop.hdfs.TestDFSInotifyEventInputStream | | | hadoop.hdfs.TestErasureCodingExerciseAPIs | | | hadoop.hdfs.TestDFSStripedInputStream | | | hadoop.hdfs.TestDistributedFileSystemWithECFileWithRandomECPolicy | | | hadoop.hdfs.TestReconstructStripedFile | | | hadoop.hdfs.server.namenode.TestAddStripedBlocks | | | hadoop.hdfs.server.namenode.TestFavoredNodesEndToEnd | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshot | | | hadoop.hdfs.TestErasureCodingPolicyWithSnapshotWithRandomECPolicy | | | hadoop.hdfs.server.namenode.TestNameNodeRecovery | | | hadoop.hdfs.TestReplication | | | hadoop.hdfs.TestFileAppend3 | | | hadoop.hdfs.TestErasureCodeBenchmarkThroughput | | | hadoop.hdfs.server.namenode.TestFSDirectory | | | hadoop.hdfs.TestPread | | | hadoop.hdfs.TestDatanodeDeath | | | hadoop.hdfs.server.namenode.snapshot.TestSnapRootDescendantDiff | | | hadoop.hdfs.server.namenode.TestQuotaByStorageType | | | hadoop.hdfs.server.balancer.TestBalancerRPCDelay | | | hadoop.hdfs.TestWriteRead | | | hadoop.hdfs.TestLeaseRecoveryStriped | | | hadoop.hdfs.TestErasureCodingPolicyWithSnapshot | | | hadoop.hdfs.TestFileAppend | | | hadoop.hdfs.tools.offlineEditsViewer.TestOfflineEditsViewer | | | hadoop.hdfs.server.namenode.TestDefaultBlockPlacementPolicy | | | hadoop.hdfs.TestReadStripedFileWithDecoding | | | hadoop.hdfs.TestDecommission | | | hadoop.hdfs.qjournal.client.TestQuorumJournalManager | | | hadoop.hdfs.TestAbandonBlock | | | hadoop.hdfs.server.blockmanagement.TestOverReplicatedBlocks | | | hadoop.hdfs.qjournal.client.TestQJMWithFaults | | | hadoop.hdfs.TestReadStripedFileWithDNFailure | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshotBlocksMap | | | hadoop.hdfs.TestSetrepDecreasing | | | hadoop.hdfs.server.namenode.snapshot.TestSnapshotDiffReport | | | hadoop.hdfs.qjournal.client.TestQuorumCall | | | hadoop.hdfs.TestCrcCorruption | | | hadoop.hdfs.TestDecommissionWithStriped | | | hadoop.hdfs.server.namenode.TestBlockPlacementPolicyRackFaultTolerant | | | hadoop.hdfs.web.TestWebHDFSAcl | | | hadoop.hdfs.server.balancer.TestBalancer | | | hadoop.hdfs.server.namenode.TestCommitBlockWithInvalidGenStamp | | | hadoop.hdfs.TestBlockStoragePolicy | | | hadoop.hdfs.TestLeaseRecovery2 | | | hadoop.hdfs.TestEncryptionZones | | | hadoop.hdfs.tools.offlineImageViewer.TestOfflineImageViewerWithStripedBlocks | | | hadoop.hdfs.server.blockmanagement.TestBlockManager | | | hadoop.hdfs.TestDFSStripedInputStreamWithRandomECPolicy | | | hadoop.cli.TestHDFSCLI | | | hadoop.hdfs.server.namenode.TestReencryption | | | hadoop.hdfs.server.namenode.TestPersistentStoragePolicySatisfier | | | hadoop.hdfs.server.namenode.TestEditLogRace | \\ \\ || Subsystem || Report/Notes || | Docker | Client=17.05.0-ce Server=17.05.0-ce Image:yetus/hadoop:8f97d6f | | JIRA Issue | HDFS-12749 | | JIRA Patch URL | https://issues.apache.org/jira/secure/attachment/12925261/HDFS-12749-trunk.005.patch | | Optional Tests | dupname asflicense compile javac javadoc mvninstall mvnsite unit shadedclient findbugs checkstyle | | uname | Linux 70d97ca088fe 4.4.0-138-generic #164-Ubuntu SMP Tue Oct 2 17:16:02 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux | | Build tool | maven | | Personality | /testptch/patchprocess/precommit/personality/provided.sh | | git revision | trunk / 5a7ca6a | | maven | version: Apache Maven 3.3.9 | | Default Java | 1.8.0_181 | | mvninstall | https://builds.apache.org/job/PreCommit-HDFS-Build/25556/artifact/out/branch-mvninstall-root.txt | | findbugs | v3.1.0-RC1 | | checkstyle | https://builds.apache.org/job/PreCommit-HDFS-Build/25556/artifact/out/diff-checkstyle-hadoop-hdfs-project_hadoop-hdfs.txt | | unit | https://builds.apache.org/job/PreCommit-HDFS-Build/25556/artifact/out/patch-unit-hadoop-hdfs-project_hadoop-hdfs.txt | | Test Results | https://builds.apache.org/job/PreCommit-HDFS-Build/25556/testReport/ | | Max. process+thread count | 2620 (vs. ulimit of 10000) | | modules | C: hadoop-hdfs-project/hadoop-hdfs U: hadoop-hdfs-project/hadoop-hdfs | | Console output | https://builds.apache.org/job/PreCommit-HDFS-Build/25556/console | | Powered by | Apache Yetus 0.8.0 http://yetus.apache.org | This message was automatically generated. > DN may not send block report to NN after NN restart > --------------------------------------------------- > > Key: HDFS-12749 > URL: https://issues.apache.org/jira/browse/HDFS-12749 > Project: Hadoop HDFS > Issue Type: Bug > Components: datanode > Affects Versions: 2.7.1, 2.8.3, 2.7.5, 3.0.0, 2.9.1 > Reporter: TanYuxin > Assignee: He Xiaoqiao > Priority: Major > Attachments: HDFS-12749-branch-2.7.002.patch, > HDFS-12749-trunk.003.patch, HDFS-12749-trunk.004.patch, > HDFS-12749-trunk.005.patch, HDFS-12749.001.patch > > > Now our cluster have thousands of DN, millions of files and blocks. When NN > restart, NN's load is very high. > After NN restart,DN will call BPServiceActor#reRegister method to register. > But register RPC will get a IOException since NN is busy dealing with Block > Report. The exception is caught at BPServiceActor#processCommand. > Next is the caught IOException: > {code:java} > WARN org.apache.hadoop.hdfs.server.datanode.DataNode: Error processing > datanode Command > java.io.IOException: Failed on local exception: java.io.IOException: > java.net.SocketTimeoutException: 60000 millis timeout while waiting for > channel to be ready for read. ch : java.nio.channels.SocketChannel[connected > local=/DataNode_IP:Port remote=NameNode_Host/IP:Port]; Host Details : local > host is: "DataNode_Host/Datanode_IP"; destination host is: > "NameNode_Host":Port; > at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:773) > at org.apache.hadoop.ipc.Client.call(Client.java:1474) > at org.apache.hadoop.ipc.Client.call(Client.java:1407) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:229) > at com.sun.proxy.$Proxy13.registerDatanode(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB.registerDatanode(DatanodeProtocolClientSideTranslatorPB.java:126) > at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor.register(BPServiceActor.java:793) > at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor.reRegister(BPServiceActor.java:926) > at > org.apache.hadoop.hdfs.server.datanode.BPOfferService.processCommandFromActor(BPOfferService.java:604) > at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor.processCommand(BPServiceActor.java:898) > at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor.offerService(BPServiceActor.java:711) > at > org.apache.hadoop.hdfs.server.datanode.BPServiceActor.run(BPServiceActor.java:864) > at java.lang.Thread.run(Thread.java:745) > {code} > The un-catched IOException breaks BPServiceActor#register, and the Block > Report can not be sent immediately. > {code} > /** > * Register one bp with the corresponding NameNode > * <p> > * The bpDatanode needs to register with the namenode on startup in order > * 1) to report which storage it is serving now and > * 2) to receive a registrationID > * > * issued by the namenode to recognize registered datanodes. > * > * @param nsInfo current NamespaceInfo > * @see FSNamesystem#registerDatanode(DatanodeRegistration) > * @throws IOException > */ > void register(NamespaceInfo nsInfo) throws IOException { > // The handshake() phase loaded the block pool storage > // off disk - so update the bpRegistration object from that info > DatanodeRegistration newBpRegistration = bpos.createRegistration(); > LOG.info(this + " beginning handshake with NN"); > while (shouldRun()) { > try { > // Use returned registration from namenode with updated fields > newBpRegistration = bpNamenode.registerDatanode(newBpRegistration); > newBpRegistration.setNamespaceInfo(nsInfo); > bpRegistration = newBpRegistration; > break; > } catch(EOFException e) { // namenode might have just restarted > LOG.info("Problem connecting to server: " + nnAddr + " :" > + e.getLocalizedMessage()); > sleepAndLogInterrupts(1000, "connecting to server"); > } catch(SocketTimeoutException e) { // namenode is busy > LOG.info("Problem connecting to server: " + nnAddr); > sleepAndLogInterrupts(1000, "connecting to server"); > } > } > > LOG.info("Block pool " + this + " successfully registered with NN"); > bpos.registrationSucceeded(this, bpRegistration); > // random short delay - helps scatter the BR from all DNs > scheduler.scheduleBlockReport(dnConf.initialBlockReportDelay); > } > {code} > But NameNode has processed registerDatanode successfully, so it won't ask DN > to re-register again -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org