IMPALA-5042: Use a HashSet instead of ArrayList for O(1) look ups Testing: Ran the metadata perf benchmark. No regressions and found good gains in the following cases.
100K-PARTITIONS-1M-FILES-CUSTOM-05-QUERY-AFTER-INVALIDATE ~81.3% 100K-PARTITIONS-1M-FILES-CUSTOM-07-REFRESH ~81.3% 100K-PARTITIONS-1M-FILES-CUSTOM-10-REFRESH-AFTER-ADD-PARTITION ~81.7 Change-Id: Ia9eccfe853583a0b78a5280f1b9525ce97f88cb5 Reviewed-on: http://gerrit.cloudera.org:8080/6319 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6dff9066 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6dff9066 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6dff9066 Branch: refs/heads/master Commit: 6dff90661c07241794de5c24f4f27e7712dca82c Parents: 6951030 Author: Bharath Vissapragada <[email protected]> Authored: Wed Mar 8 11:10:27 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Mar 17 10:20:57 2017 +0000 ---------------------------------------------------------------------- fe/src/main/java/org/apache/impala/catalog/HdfsTable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6dff9066/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 3e51cba..30241b0 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -718,7 +718,7 @@ public class HdfsTable extends Table { // separately. // TODO: We can still do some advanced optimization by grouping all the partition // directories under the same ancestor path up the tree. - List<Path> dirsToLoad = Lists.newArrayList(tblLocation); + Set<Path> dirsToLoad = Sets.newHashSet(tblLocation); if (msTbl.getPartitionKeysSize() == 0) { Preconditions.checkArgument(msPartitions == null || msPartitions.isEmpty()); @@ -839,7 +839,7 @@ public class HdfsTable extends Table { * and filtering only the paths from 'partsByPath'. Also loads the disk IDs * corresponding to these block locations. */ - private void loadMetadataAndDiskIds(List<Path> locations, + private void loadMetadataAndDiskIds(Set<Path> locations, HashMap<Path, List<HdfsPartition>> partsByPath) { LOG.info(String.format( "Loading file and block metadata for %s partitions from %s paths: %s",
