[
https://issues.apache.org/jira/browse/HDFS-15762?focusedWorklogId=533837&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-533837
]
ASF GitHub Bot logged work on HDFS-15762:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 10/Jan/21 04:23
Start Date: 10/Jan/21 04:23
Worklog Time Spent: 10m
Work Description: touchida commented on a change in pull request #2598:
URL: https://github.com/apache/hadoop/pull/2598#discussion_r554038392
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java
##########
@@ -251,62 +252,48 @@ public void testMultipleNNPortOverwriteDownStream()
throws Exception {
clientConf.set(HADOOP_RPC_PROTECTION, "privacy");
FileSystem fsPrivacy = FileSystem.get(uriPrivacyPort, clientConf);
doTest(fsPrivacy, PATH1);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- // It may take some time for the qop to populate
- // to all DNs, check in a loop.
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ long count = dataNodes.stream()
+ .map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ // For each data pipeline, targetQOPs of sasl clients in the first two
+ // datanodes become equal to auth.
+ // Note that it is not necessarily the case for all datanodes,
+ // since a datanode may be always at the last position in pipelines.
+ assertTrue("At least two qops should be auth", count >= 2);
Review comment:
@amahussein Thanks for your review!
Yes, I did.
But I think we don't have to retry it here, since the test already makes
sure that blocks have been replicated.
https://github.com/apache/hadoop/blob/443a77fbe8dc29e4b36cac7b935405d70bc79158/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java#L305-L307
IIUC, qops are populated at sasl negotiation, which should be done before
datanodes send packets (i.e., before replication is completed).
https://github.com/apache/hadoop/blob/443a77fbe8dc29e4b36cac7b935405d70bc79158/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java#L414
It seems that the author of the description considered that the flakiness of
this test was caused by insufficient sleep time, but I believe it's not.
https://issues.apache.org/jira/browse/HDFS-15148?focusedCommentId=17026282&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17026282
I also considered to wait for replication inside
TestMultipleNNPortQOP#doTest, since FileSystemTestHelper.createFile's success
will not necessarily imply replication is completed.
https://github.com/apache/hadoop/blob/443a77fbe8dc29e4b36cac7b935405d70bc79158/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java#L301
But I'm not sure if we really need this.
May I ask your opinion?
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java
##########
@@ -251,62 +252,48 @@ public void testMultipleNNPortOverwriteDownStream()
throws Exception {
clientConf.set(HADOOP_RPC_PROTECTION, "privacy");
FileSystem fsPrivacy = FileSystem.get(uriPrivacyPort, clientConf);
doTest(fsPrivacy, PATH1);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- // It may take some time for the qop to populate
- // to all DNs, check in a loop.
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ long count = dataNodes.stream()
+ .map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ // For each data pipeline, targetQOPs of sasl clients in the first two
+ // datanodes become equal to auth.
+ // Note that it is not necessarily the case for all datanodes,
+ // since a datanode may be always at the last position in pipelines.
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "integrity");
FileSystem fsIntegrity = FileSystem.get(uriIntegrityPort, clientConf);
+ // Reset targetQOPs to null to clear the last state.
+ resetTargetQOP(dataNodes);
doTest(fsIntegrity, PATH2);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream().map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "authentication");
FileSystem fsAuth = FileSystem.get(uriAuthPort, clientConf);
+ // Reset negotiatedQOPs to null to clear the last state.
+ resetNegotiatedQOP(dataNodes);
doTest(fsAuth, PATH3);
- for (int i = 0; i < 3; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferServer saslServer = dn.getSaslServer();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslServer.getNegotiatedQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream()
+ .map(dn -> dn.getSaslServer().getNegotiatedQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertEquals("All qops should be auth", 3, count);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
+ private static void resetTargetQOP(List<DataNode> dns) {
Review comment:
The reason why the reset steps get added is that targetQOP and
negotiatedQOP are already being populated to auth by previous
`doTest(fsPrivacy, PATH1)`.
Without the step, therefore, the second and third assertions for qop will
become true, regardless of whether the overwriting by `doTest(fsIntegrity,
PATH2)` and `doTest(fsAuth, PATH3)` works or not.
Not-null check in the original code is just to confirm if a qop has been
updated, but I think it's not necessary as answered in
https://github.com/apache/hadoop/pull/2598/files#r554038392.
> Is there a better way to be more specific for each step?
Sorry, but I do not understand your question well :bow:
If you're saying `count >= 2` is ambiguous, then I agree.
But unfortunately, I don't know how to get the order of a datanode pipeline
for now.
If it's possible, we can write an assertion for qops of the first two
datanodes in the pipeline.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java
##########
@@ -405,6 +405,11 @@ public String getTargetQOP() {
return targetQOP;
}
+ @VisibleForTesting
+ public void setTargetQOP(String targetQOP) {
Review comment:
I totally agree with you.
Let me find a better way.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java
##########
@@ -251,62 +252,48 @@ public void testMultipleNNPortOverwriteDownStream()
throws Exception {
clientConf.set(HADOOP_RPC_PROTECTION, "privacy");
FileSystem fsPrivacy = FileSystem.get(uriPrivacyPort, clientConf);
doTest(fsPrivacy, PATH1);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- // It may take some time for the qop to populate
- // to all DNs, check in a loop.
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ long count = dataNodes.stream()
+ .map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ // For each data pipeline, targetQOPs of sasl clients in the first two
+ // datanodes become equal to auth.
+ // Note that it is not necessarily the case for all datanodes,
+ // since a datanode may be always at the last position in pipelines.
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "integrity");
FileSystem fsIntegrity = FileSystem.get(uriIntegrityPort, clientConf);
+ // Reset targetQOPs to null to clear the last state.
+ resetTargetQOP(dataNodes);
doTest(fsIntegrity, PATH2);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream().map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "authentication");
FileSystem fsAuth = FileSystem.get(uriAuthPort, clientConf);
+ // Reset negotiatedQOPs to null to clear the last state.
+ resetNegotiatedQOP(dataNodes);
doTest(fsAuth, PATH3);
- for (int i = 0; i < 3; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferServer saslServer = dn.getSaslServer();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslServer.getNegotiatedQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream()
+ .map(dn -> dn.getSaslServer().getNegotiatedQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertEquals("All qops should be auth", 3, count);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
+ private static void resetTargetQOP(List<DataNode> dns) {
Review comment:
The reason why the reset steps get added is that targetQOP and
negotiatedQOP are already being populated to auth by previous
`doTest(fsPrivacy, PATH1)`.
Without the steps, therefore, the second and third assertions for qop will
become true, regardless of whether the overwriting by `doTest(fsIntegrity,
PATH2)` and `doTest(fsAuth, PATH3)` works or not.
Not-null check in the original code is just to confirm if a qop has been
updated, but I think it's not necessary as answered in
https://github.com/apache/hadoop/pull/2598/files#r554038392.
> Is there a better way to be more specific for each step?
Sorry, but I do not understand your question well :bow:
If you're saying `count >= 2` is ambiguous, then I agree.
But unfortunately, I don't know how to get the order of a datanode pipeline
for now.
If it's possible, we can write an assertion for qops of the first two
datanodes in the pipeline.
##########
File path:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMultipleNNPortQOP.java
##########
@@ -251,62 +252,48 @@ public void testMultipleNNPortOverwriteDownStream()
throws Exception {
clientConf.set(HADOOP_RPC_PROTECTION, "privacy");
FileSystem fsPrivacy = FileSystem.get(uriPrivacyPort, clientConf);
doTest(fsPrivacy, PATH1);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- // It may take some time for the qop to populate
- // to all DNs, check in a loop.
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ long count = dataNodes.stream()
+ .map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ // For each data pipeline, targetQOPs of sasl clients in the first two
+ // datanodes become equal to auth.
+ // Note that it is not necessarily the case for all datanodes,
+ // since a datanode may be always at the last position in pipelines.
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "integrity");
FileSystem fsIntegrity = FileSystem.get(uriIntegrityPort, clientConf);
+ // Reset targetQOPs to null to clear the last state.
+ resetTargetQOP(dataNodes);
doTest(fsIntegrity, PATH2);
- for (int i = 0; i < 2; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferClient saslClient = dn.getSaslClient();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslClient.getTargetQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream().map(dn -> dn.getSaslClient().getTargetQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertTrue("At least two qops should be auth", count >= 2);
clientConf.set(HADOOP_RPC_PROTECTION, "authentication");
FileSystem fsAuth = FileSystem.get(uriAuthPort, clientConf);
+ // Reset negotiatedQOPs to null to clear the last state.
+ resetNegotiatedQOP(dataNodes);
doTest(fsAuth, PATH3);
- for (int i = 0; i < 3; i++) {
- DataNode dn = dataNodes.get(i);
- SaslDataTransferServer saslServer = dn.getSaslServer();
- String qop = null;
- for (int trial = 0; trial < 10; trial++) {
- qop = saslServer.getNegotiatedQOP();
- if (qop != null) {
- break;
- }
- Thread.sleep(100);
- }
- assertEquals("auth", qop);
- }
+ count = dataNodes.stream()
+ .map(dn -> dn.getSaslServer().getNegotiatedQOP())
+ .filter(Objects::nonNull).filter(qop -> qop.equals("auth")).count();
+ assertEquals("All qops should be auth", 3, count);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
+ private static void resetTargetQOP(List<DataNode> dns) {
Review comment:
After all, I decided not to reset qops (fa7ada7). Now I think the
original author's intention is to see if inter-DN's qop does not change to
`auth-conf` and `auth-int` through **consecutive** client calls with various
qops.
Moreover, the reset is not related to the flakiness at all.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 533837)
Time Spent: 2h 20m (was: 2h 10m)
> TestMultipleNNPortQOP#testMultipleNNPortOverwriteDownStream fails
> intermittently
> --------------------------------------------------------------------------------
>
> Key: HDFS-15762
> URL: https://issues.apache.org/jira/browse/HDFS-15762
> Project: Hadoop HDFS
> Issue Type: Sub-task
> Reporter: Toshihiko Uchida
> Assignee: Toshihiko Uchida
> Priority: Minor
> Labels: flaky-test, pull-request-available
> Attachments: PR2585#1-TestMultipleNNPortQOP-output.txt
>
> Time Spent: 2h 20m
> Remaining Estimate: 0h
>
> This unit test failed in https://github.com/apache/hadoop/pull/2585 due to an
> AssertionError.
> {code}
> java.lang.AssertionError: expected:<auth> but was:<null>
> at org.junit.Assert.fail(Assert.java:88)
> at org.junit.Assert.failNotEquals(Assert.java:834)
> at org.junit.Assert.assertEquals(Assert.java:118)
> at org.junit.Assert.assertEquals(Assert.java:144)
> at
> org.apache.hadoop.hdfs.TestMultipleNNPortQOP.testMultipleNNPortOverwriteDownStream(TestMultipleNNPortQOP.java:267)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> at
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> at
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> at
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> at
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
> at
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
> at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
> at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
> at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
> at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
> at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
> at
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
> at
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
> at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
> at
> org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
> at
> org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
> at
> org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
> at
> org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
> at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
> {code}
> The failure occurred at the following assertion.
> {code}
> doTest(fsPrivacy, PATH1);
> for (int i = 0; i < 2; i++) {
> DataNode dn = dataNodes.get(i);
> SaslDataTransferClient saslClient = dn.getSaslClient();
> String qop = null;
> // It may take some time for the qop to populate
> // to all DNs, check in a loop.
> for (int trial = 0; trial < 10; trial++) {
> qop = saslClient.getTargetQOP();
> if (qop != null) {
> break;
> }
> Thread.sleep(100);
> }
> assertEquals("auth", qop);
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]