Github user adeneche commented on a diff in the pull request:

    https://github.com/apache/drill/pull/270#discussion_r45417976
  
    --- Diff: 
exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java 
---
    @@ -102,77 +92,33 @@ public boolean containsDirectories(DrillFileSystem fs) 
throws IOException {
       }
     
       public FileSelection minusDirectories(DrillFileSystem fs) throws 
IOException {
    -    Stopwatch timer = new Stopwatch();
    -    timer.start();
    -    init(fs);
    -    List<FileStatus> newList = Lists.newArrayList();
    -    for (FileStatus p : statuses) {
    -      if (p.isDirectory()) {
    -        List<FileStatus> statuses = fs.list(true, p.getPath());
    -        for (FileStatus s : statuses) {
    -          newList.add(s);
    -        }
    -      } else {
    -        newList.add(p);
    -      }
    -    }
    -    logger.info("FileSelection.minusDirectories() took {} ms, numFiles: 
{}",
    -        timer.elapsed(TimeUnit.MILLISECONDS), newList.size());
    -    return new FileSelection(newList, selectionRoot);
    -  }
    -
    -  public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
    -    init(fs);
    -    return statuses.get(0);
    -  }
    -
    -  public List<String> getAsFiles() {
    -    if (!files.isEmpty()) {
    -      return files;
    -    }
    -    if (statuses == null) {
    -      return Collections.emptyList();
    +    final List<FileStatus> statuses = getStatuses(fs);
    +    final int total = statuses.size();
    +    final Path[] paths = new Path[total];
    +    for (int i=0; i<total; i++) {
    +      paths[i] = statuses.get(i).getPath();
         }
    -    List<String> files = Lists.newArrayList();
    -    for (FileStatus s : statuses) {
    -      files.add(s.getPath().toString());
    -    }
    -    return files;
    -  }
    -
    -  private void init(DrillFileSystem fs) throws IOException {
    -    Stopwatch timer = new Stopwatch();
    -    timer.start();
    -    if (files != null && statuses == null) {
    -      statuses = Lists.newArrayList();
    -      for (String p : files) {
    -        statuses.add(fs.getFileStatus(new Path(p)));
    +    final List<FileStatus> allStats = fs.list(true, paths);
    +    final List<FileStatus> nonDirectories = 
Lists.newArrayList(Iterables.filter(allStats, new Predicate<FileStatus>() {
    +      @Override
    +      public boolean apply(@Nullable FileStatus status) {
    +        return !status.isDirectory();
           }
    -    }
    -    logger.info("FileSelection.init() took {} ms, numFiles: {}",
    -        timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : 
statuses.size());
    -  }
    +    }));
     
    -  public List<FileStatus> getFileStatusList(DrillFileSystem fs) throws 
IOException {
    -    init(fs);
    -    return statuses;
    +    return create(nonDirectories, null, selectionRoot);
       }
     
    -  /**
    -   * Return the parquet table metadata that may have been read
    -   * from a metadata cache file during creation of this file selection.
    -   * It will always be null for non-parquet files and null for cases
    -   * where no metadata cache was created.
    -   */
    -  public ParquetTableMetadata_v1 getParquetMetadata() {
    -    return parquetMeta;
    +  public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
    +    return getStatuses(fs).get(0);
       }
     
    -  private static String commonPath(FileStatus... paths) {
    +  private static String commonPath(List<FileStatus> statuses) {
    --- End diff --
    
    this method assumes statuses is not null. This is not always the case, see 
my comment below


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to