ayushtkn commented on code in PR #5582:
URL: https://github.com/apache/hadoop/pull/5582#discussion_r1174450298
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java:
##########
@@ -262,13 +262,22 @@ private static DirectoryListing getListing(FSDirectory
fsd, INodesInPath iip,
needLocation, false);
listingCnt++;
if (listing[i] instanceof HdfsLocatedFileStatus) {
- // Once we hit lsLimit locations, stop.
+ // Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
- // Approximate #locations with locatedBlockCount() * repl_factor
LocatedBlocks blks =
((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks();
- locationBudget -= (blks == null) ? 0 :
- blks.locatedBlockCount() * listing[i].getReplication();
+ if (blks != null) {
+ ErasureCodingPolicy ecPolicy =
+ listing[i].getErasureCodingPolicy();
+ if (ecPolicy != null) {
Review Comment:
in case we set replicated as the ecpolicy, does this come null as well here?
if not add ```&& !ecPolicy.isReplicationPolicy```
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java:
##########
@@ -674,6 +679,57 @@ public void testClearStatistics() throws Exception {
}
}
+ @Test
+ public void testGetListingLimit() throws Exception {
+ final Configuration conf = getTestConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9);
+ final MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(9).build();
+ try {
+ cluster.waitActive();
+ ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy();
+ final DistributedFileSystem fs = cluster.getFileSystem();
+ fs.dfs = spy(fs.dfs);
+ fs.enableErasureCodingPolicy(ecPolicy.getName());
+ Path dir1 = new Path("/testRep");
+ Path dir2 = new Path("/testEC");
+ fs.mkdirs(dir1);
+ fs.mkdirs(dir2);
+ fs.setErasureCodingPolicy(dir2, ecPolicy.getName());
+ for (int i = 0; i < 3; i++) {
+ DFSTestUtil.createFile(fs, new Path(dir1, String.valueOf(i)),
+ 20 * 1024L, (short) 3, 1);
+ DFSTestUtil.createStripedFile(cluster, new Path(dir2,
+ String.valueOf(i)), dir2, 1, 1, false);
+ }
+
+ RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir1);
+ int total = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ ++total;
+ }
+ assertEquals(3, total);
+ Mockito.verify(fs.dfs, Mockito.times(1)).listPaths(anyString(), any(),
+ anyBoolean());
+
+ iter = fs.listLocatedStatus(dir2);
+ total = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ ++total;
+ }
Review Comment:
can use
```
List<LocatedFileStatus> str =
RemoteIterators.toList(fs.listLocatedStatus(dir2));
assertThat(str).hasSize(3);
```
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java:
##########
@@ -262,13 +262,22 @@ private static DirectoryListing getListing(FSDirectory
fsd, INodesInPath iip,
needLocation, false);
listingCnt++;
if (listing[i] instanceof HdfsLocatedFileStatus) {
- // Once we hit lsLimit locations, stop.
+ // Once we hit lsLimit locations, stop.
// This helps to prevent excessively large response payloads.
- // Approximate #locations with locatedBlockCount() * repl_factor
LocatedBlocks blks =
((HdfsLocatedFileStatus)listing[i]).getLocatedBlocks();
- locationBudget -= (blks == null) ? 0 :
- blks.locatedBlockCount() * listing[i].getReplication();
+ if (blks != null) {
+ ErasureCodingPolicy ecPolicy =
+ listing[i].getErasureCodingPolicy();
+ if (ecPolicy != null) {
+ locationBudget -= blks.locatedBlockCount() *
+ (ecPolicy.getNumDataUnits() +
ecPolicy.getNumParityUnits());
+ } else {
+ // Approximate #locations with locatedBlockCount() *
repl_factor
Review Comment:
This line is common for both erasure coded and replicated files. For EC
files, for RS-6-3-1024k, if your cluster has only 6 datanodes, the write would
be successful, but the block group will be having 6 nodes only. So, this lines
holds true for both EC & replicated files. So, pull this line up
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java:
##########
@@ -674,6 +679,57 @@ public void testClearStatistics() throws Exception {
}
}
+ @Test
+ public void testGetListingLimit() throws Exception {
+ final Configuration conf = getTestConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9);
+ final MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(9).build();
+ try {
+ cluster.waitActive();
+ ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy();
+ final DistributedFileSystem fs = cluster.getFileSystem();
+ fs.dfs = spy(fs.dfs);
+ fs.enableErasureCodingPolicy(ecPolicy.getName());
Review Comment:
isn't the default ec policy always enabled?
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java:
##########
@@ -674,6 +679,57 @@ public void testClearStatistics() throws Exception {
}
}
+ @Test
+ public void testGetListingLimit() throws Exception {
+ final Configuration conf = getTestConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9);
+ final MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(9).build();
+ try {
+ cluster.waitActive();
Review Comment:
Use try with resources for the cluster
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java:
##########
@@ -674,6 +679,57 @@ public void testClearStatistics() throws Exception {
}
}
+ @Test
+ public void testGetListingLimit() throws Exception {
+ final Configuration conf = getTestConfiguration();
+ conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, 9);
+ final MiniDFSCluster cluster =
+ new MiniDFSCluster.Builder(conf).numDataNodes(9).build();
+ try {
+ cluster.waitActive();
+ ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy();
+ final DistributedFileSystem fs = cluster.getFileSystem();
+ fs.dfs = spy(fs.dfs);
+ fs.enableErasureCodingPolicy(ecPolicy.getName());
+ Path dir1 = new Path("/testRep");
+ Path dir2 = new Path("/testEC");
+ fs.mkdirs(dir1);
+ fs.mkdirs(dir2);
+ fs.setErasureCodingPolicy(dir2, ecPolicy.getName());
+ for (int i = 0; i < 3; i++) {
+ DFSTestUtil.createFile(fs, new Path(dir1, String.valueOf(i)),
+ 20 * 1024L, (short) 3, 1);
+ DFSTestUtil.createStripedFile(cluster, new Path(dir2,
+ String.valueOf(i)), dir2, 1, 1, false);
+ }
+
+ RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir1);
+ int total = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ ++total;
+ }
+ assertEquals(3, total);
Review Comment:
can use
```
List<LocatedFileStatus> str =
RemoteIterators.toList(fs.listLocatedStatus(dir1));
assertThat(str).hasSize(3);
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]