[ 
https://issues.apache.org/jira/browse/HDDS-1461?focusedWorklogId=243208&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-243208
 ]

ASF GitHub Bot logged work on HDDS-1461:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 16/May/19 09:16
            Start Date: 16/May/19 09:16
    Worklog Time Spent: 10m 
      Work Description: mukul1987 commented on pull request #782: HDDS-1461. 
Optimize listStatus api in OzoneFileSystem
URL: https://github.com/apache/hadoop/pull/782#discussion_r284612628
 
 

 ##########
 File path: 
hadoop-ozone/ozonefs/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneFileSystem.java
 ##########
 @@ -494,130 +491,33 @@ private boolean o3Exists(final Path f) throws 
IOException {
     }
   }
 
-  private class ListStatusIterator extends OzoneListingIterator {
-    // _fileStatuses_ maintains a list of file(s) which is either the input
-    // path itself or a child of the input directory path.
-    private List<FileStatus> fileStatuses = new ArrayList<>(LISTING_PAGE_SIZE);
-    // _subDirStatuses_ maintains a list of sub-dirs of the input directory
-    // path.
-    private Map<Path, FileStatus> subDirStatuses =
-        new HashMap<>(LISTING_PAGE_SIZE);
-    private Path f; // the input path
-
-    ListStatusIterator(Path f) throws IOException {
-      super(f);
-      this.f = f;
-    }
+  @Override
+  public FileStatus[] listStatus(Path f) throws IOException {
+    incrementCounter(Statistic.INVOCATION_LIST_STATUS);
+    statistics.incrementReadOps(1);
+    LOG.trace("listStatus() path:{}", f);
+    int numEntries = LISTING_PAGE_SIZE;
+    LinkedList<OzoneFileStatus> statuses = new LinkedList<>();
+    List<OzoneFileStatus> tmpStatus;
+    String startKey = "";
 
-    /**
-     * Add the key to the listStatus result if the key corresponds to the
-     * input path or is an immediate child of the input path.
-     *
-     * @param key key to be processed
-     * @return always returns true
-     * @throws IOException
-     */
-    @Override
-    boolean processKey(String key) throws IOException {
-      Path keyPath = new Path(OZONE_URI_DELIMITER + key);
-      if (key.equals(getPathKey())) {
-        if (pathIsDirectory()) {
-          // if input path is a directory, we add the sub-directories and
-          // files under this directory.
-          return true;
-        } else {
-          addFileStatus(keyPath);
-          return true;
-        }
-      }
-      // Left with only subkeys now
-      // We add only the immediate child files and sub-dirs i.e. we go only
-      // upto one level down the directory tree structure.
-      if (pathToKey(keyPath.getParent()).equals(pathToKey(f))) {
-        // This key is an immediate child. Can be file or directory
-        if (key.endsWith(OZONE_URI_DELIMITER)) {
-          // Key is a directory
-          addSubDirStatus(keyPath);
+    do {
+      tmpStatus = adapter.listStatus(pathToKey(f), false, startKey, 
numEntries);
+      if (!tmpStatus.isEmpty()) {
+        if (startKey.isEmpty()) {
+          statuses.addAll(tmpStatus);
         } else {
-          addFileStatus(keyPath);
-        }
-      } else {
-        // This key is not the immediate child of the input directory. So we
-        // traverse the parent tree structure of this key until we get the
-        // immediate child of the input directory.
-        Path immediateChildPath = getImmediateChildPath(keyPath.getParent());
-        if (immediateChildPath != null) {
-          addSubDirStatus(immediateChildPath);
+          statuses.addAll(tmpStatus.subList(1, tmpStatus.size()));
         }
+        startKey = pathToKey(statuses.getLast().getPath());
       }
-      return true;
-    }
+    } while (tmpStatus.size() == numEntries);
 
 Review comment:
   Please add a comment here to explain this line
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 243208)
    Time Spent: 1.5h  (was: 1h 20m)

> Optimize listStatus api in OzoneFileSystem
> ------------------------------------------
>
>                 Key: HDDS-1461
>                 URL: https://issues.apache.org/jira/browse/HDDS-1461
>             Project: Hadoop Distributed Data Store
>          Issue Type: Sub-task
>          Components: Ozone Filesystem, Ozone Manager
>            Reporter: Lokesh Jain
>            Assignee: Lokesh Jain
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1.5h
>  Remaining Estimate: 0h
>
> Currently in listStatus we make multiple getFileStatus calls. This can be 
> optimized by converting to a single rpc call for listStatus.
> Also currently listStatus has to traverse a directory recursively in order to 
> list its immediate children. This happens because in OzoneManager all the 
> metadata is stored in rocksdb sorted on keynames. The Jira also aims to fix 
> this by using seek api provided by rocksdb.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to