Repository: incubator-impala Updated Branches: refs/heads/master a5044a30c -> 3426a0495
IMPALA-4788: Use HashSet in RECOVER PARTITIONS duplicate checks RECOVER PARTITIONS needs to avoid recovering partitions that are already in HMS. Before this patch, that check is done by makeing a list of the existing partitions and searching in that list for each path found in the search for partitions eligible for recovery. This patch changes the container to a HashSet for performance reasons. Change-Id: I4b9b6f8eb85f854e8c0896c18a231cebe32b4678 Reviewed-on: http://gerrit.cloudera.org:8080/5745 Reviewed-by: Alex Behm <[email protected]> Reviewed-by: Marcel Kornacker <[email protected]> Tested-by: Jim Apple <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6cf3efdf Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6cf3efdf Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6cf3efdf Branch: refs/heads/master Commit: 6cf3efdfec7cb9245b2b8ce2196e9629f4fe3f89 Parents: a5044a3 Author: Jim Apple <[email protected]> Authored: Thu Jan 19 09:45:35 2017 -0800 Committer: Jim Apple <[email protected]> Committed: Thu Jan 19 22:57:00 2017 +0000 ---------------------------------------------------------------------- fe/src/main/java/org/apache/impala/catalog/HdfsTable.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6cf3efdf/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 857216f..a6d0f47 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -1615,7 +1615,7 @@ public class HdfsTable extends Table { * partition key column. */ public List<List<String>> getPathsWithoutPartitions() throws CatalogException { - List<List<LiteralExpr>> existingPartitions = new ArrayList<List<LiteralExpr>>(); + HashSet<List<LiteralExpr>> existingPartitions = new HashSet<List<LiteralExpr>>(); // Get the list of partition values of existing partitions in Hive Metastore. for (HdfsPartition partition: partitionMap_.values()) { if (partition.isDefaultPartition()) continue; @@ -1643,7 +1643,7 @@ public class HdfsTable extends Table { * type compatibility check. Also these partitions are not already part of the table. */ private void getAllPartitionsNotInHms(Path path, List<String> partitionKeys, - List<List<LiteralExpr>> existingPartitions, + HashSet<List<LiteralExpr>> existingPartitions, List<List<String>> partitionsNotInHms) throws IOException { FileSystem fs = path.getFileSystem(CONF); // Check whether the base directory exists. @@ -1671,7 +1671,7 @@ public class HdfsTable extends Table { */ private void getAllPartitionsNotInHms(Path path, List<String> partitionKeys, int depth, FileSystem fs, List<String> partitionValues, - List<LiteralExpr> partitionExprs, List<List<LiteralExpr>> existingPartitions, + List<LiteralExpr> partitionExprs, HashSet<List<LiteralExpr>> existingPartitions, List<List<String>> partitionsNotInHms) throws IOException { if (depth == partitionKeys.size()) { if (existingPartitions.contains(partitionExprs)) {
