[
https://issues.apache.org/jira/browse/DRILL-2618?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15014768#comment-15014768
]
ASF GitHub Bot commented on DRILL-2618:
---------------------------------------
Github user adeneche commented on a diff in the pull request:
https://github.com/apache/drill/pull/270#discussion_r45417976
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
---
@@ -102,77 +92,33 @@ public boolean containsDirectories(DrillFileSystem fs)
throws IOException {
}
public FileSelection minusDirectories(DrillFileSystem fs) throws
IOException {
- Stopwatch timer = new Stopwatch();
- timer.start();
- init(fs);
- List<FileStatus> newList = Lists.newArrayList();
- for (FileStatus p : statuses) {
- if (p.isDirectory()) {
- List<FileStatus> statuses = fs.list(true, p.getPath());
- for (FileStatus s : statuses) {
- newList.add(s);
- }
- } else {
- newList.add(p);
- }
- }
- logger.info("FileSelection.minusDirectories() took {} ms, numFiles:
{}",
- timer.elapsed(TimeUnit.MILLISECONDS), newList.size());
- return new FileSelection(newList, selectionRoot);
- }
-
- public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
- init(fs);
- return statuses.get(0);
- }
-
- public List<String> getAsFiles() {
- if (!files.isEmpty()) {
- return files;
- }
- if (statuses == null) {
- return Collections.emptyList();
+ final List<FileStatus> statuses = getStatuses(fs);
+ final int total = statuses.size();
+ final Path[] paths = new Path[total];
+ for (int i=0; i<total; i++) {
+ paths[i] = statuses.get(i).getPath();
}
- List<String> files = Lists.newArrayList();
- for (FileStatus s : statuses) {
- files.add(s.getPath().toString());
- }
- return files;
- }
-
- private void init(DrillFileSystem fs) throws IOException {
- Stopwatch timer = new Stopwatch();
- timer.start();
- if (files != null && statuses == null) {
- statuses = Lists.newArrayList();
- for (String p : files) {
- statuses.add(fs.getFileStatus(new Path(p)));
+ final List<FileStatus> allStats = fs.list(true, paths);
+ final List<FileStatus> nonDirectories =
Lists.newArrayList(Iterables.filter(allStats, new Predicate<FileStatus>() {
+ @Override
+ public boolean apply(@Nullable FileStatus status) {
+ return !status.isDirectory();
}
- }
- logger.info("FileSelection.init() took {} ms, numFiles: {}",
- timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 :
statuses.size());
- }
+ }));
- public List<FileStatus> getFileStatusList(DrillFileSystem fs) throws
IOException {
- init(fs);
- return statuses;
+ return create(nonDirectories, null, selectionRoot);
}
- /**
- * Return the parquet table metadata that may have been read
- * from a metadata cache file during creation of this file selection.
- * It will always be null for non-parquet files and null for cases
- * where no metadata cache was created.
- */
- public ParquetTableMetadata_v1 getParquetMetadata() {
- return parquetMeta;
+ public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
+ return getStatuses(fs).get(0);
}
- private static String commonPath(FileStatus... paths) {
+ private static String commonPath(List<FileStatus> statuses) {
--- End diff --
this method assumes statuses is not null. This is not always the case, see
my comment below
> BasicFormatMatcher calls getFirstPath(...) without checking # of paths is not
> zero
> ----------------------------------------------------------------------------------
>
> Key: DRILL-2618
> URL: https://issues.apache.org/jira/browse/DRILL-2618
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - Other
> Reporter: Daniel Barclay (Drill)
> Assignee: Deneche A. Hakim
> Fix For: 1.4.0
>
>
> {{BasicFormatMatcher.isReadable(...)}} calls {{getFirstPath(...)}} without
> checking that there is at least one path. This can cause an
> IndexOutOfBoundsException.
> To reproduce, create an empty directory {{/tmp/CaseInsensitiveColumnNames}}
> and run
> {{exec/java-exec/src/test/java/org/apache/drill/TestExampleQueries.java}}.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)