[ 
https://issues.apache.org/jira/browse/HDFS-15095?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17009935#comment-17009935
 ] 

Ahmed Hussein commented on HDFS-15095:
--------------------------------------

Stack trace of failed tests from second submission:

*1. TestRedudantBlocks.testProcessOverReplicatedAndRedudantBlock*

{code:bash}
[INFO] Running org.apache.hadoop.hdfs.server.namenode.TestRedudantBlocks
[ERROR] Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 10.046 
s <<< FAILURE! - in org.apache.hadoop.hdfs.server.namenode.TestRedudantBlocks
[ERROR] 
testProcessOverReplicatedAndRedudantBlock(org.apache.hadoop.hdfs.server.namenode.TestRedudantBlocks)
  Time elapsed: 9.966 s  <<< FAILURE!
java.lang.AssertionError: expected:<5> but was:<4>
        at org.junit.Assert.fail(Assert.java:88)
        at org.junit.Assert.failNotEquals(Assert.java:834)
        at org.junit.Assert.assertEquals(Assert.java:645)
        at org.junit.Assert.assertEquals(Assert.java:631)
        at 
org.apache.hadoop.hdfs.server.namenode.TestRedudantBlocks.testProcessOverReplicatedAndRedudantBlock(TestRedudantBlocks.java:138)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
        at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
        at 
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
        at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
        at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
        at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
        at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
        at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
        at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
        at 
org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
        at 
org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
        at 
org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
        at 
org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
        at 
org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
        at 
org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
        at 
org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
{code}

*2. TestBlockStatsMXBean.testStorageTypeStatsWhenStorageFailed*

{code:bash}
[ERROR] Tests run: 3, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 25.87 s 
<<< FAILURE! - in 
org.apache.hadoop.hdfs.server.blockmanagement.TestBlockStatsMXBean
[ERROR] 
testStorageTypeStatsWhenStorageFailed(org.apache.hadoop.hdfs.server.blockmanagement.TestBlockStatsMXBean)
  Time elapsed: 16.192 s  <<< FAILURE!
java.lang.AssertionError: expected:<6> but was:<3>
        at org.junit.Assert.fail(Assert.java:88)
        at org.junit.Assert.failNotEquals(Assert.java:834)
        at org.junit.Assert.assertEquals(Assert.java:645)
        at org.junit.Assert.assertEquals(Assert.java:631)
        at 
org.apache.hadoop.hdfs.server.blockmanagement.TestBlockStatsMXBean.testStorageTypeStatsWhenStorageFailed(TestBlockStatsMXBean.java:213)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
        at 
org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
        at 
org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298)
        at 
org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.lang.Thread.run(Thread.java:748)

{code}

*3. TestNameNodeMXBean.testDecommissioningNodes*

{code:java}
[ERROR] Tests run: 12, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 38.265 
s <<< FAILURE! - in org.apache.hadoop.hdfs.server.namenode.TestNameNodeMXBean
[ERROR] 
testDecommissioningNodes(org.apache.hadoop.hdfs.server.namenode.TestNameNodeMXBean)
  Time elapsed: 5.345 s  <<< FAILURE!
org.junit.ComparisonFailure: 
expected:<...0,"lastBlockReport":[0]},"127.0.0.1:40905":...> but 
was:<...0,"lastBlockReport":[107569]},"127.0.0.1:40905":...>
        at org.junit.Assert.assertEquals(Assert.java:115)
        at org.junit.Assert.assertEquals(Assert.java:144)
        at 
org.apache.hadoop.hdfs.server.namenode.TestNameNodeMXBean.testDecommissioningNodes(TestNameNodeMXBean.java:369)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298)
        at 
org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at java.lang.Thread.run(Thread.java:748)

{code}


> Fix accidental comment in flaky test TestDecommissioningStatus
> --------------------------------------------------------------
>
>                 Key: HDFS-15095
>                 URL: https://issues.apache.org/jira/browse/HDFS-15095
>             Project: Hadoop HDFS
>          Issue Type: Bug
>          Components: hdfs
>            Reporter: Ahmed Hussein
>            Assignee: Ahmed Hussein
>            Priority: Major
>         Attachments: HDFS-15095.001.patch, HDFS-15095.002.patch
>
>
> There are some old Jiras suggesting that "{{testDecommissionStatus"}} is 
> flaky.
>  * HDFS-12188
>  * HDFS-9599
>  * HDFS-9950
>  * HDFS-10755
> However, HDFS-14854 fix accidentally commented out one of the checks in 
> {{TestDecommissioningStatus.testDecommissionStatus()"}}. This Jira will 
> restore the commented out code and adds a blocking queue to make the test 
> case deterministic.
> My intuition is that monitor task launched by AdminManager may not have 
> enough time to act before we start verifying the status. I suggest the force 
> the main thread to block until the node is added to the blocked node.
>   
>   



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to