Repository: drill Updated Branches: refs/heads/master 0686bc23e -> eeaea7a8a
DRILL-3746: Get Hive partition values from MetaStore instead of from parsing the partition location path 1) Added a partition with custom location to test Hive table. Existing partition tests now work after the fix. 2) Enabled a test which was disabled previously due to a bug in interpreter code which is fixed recently. This closes #151 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/eeaea7a8 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/eeaea7a8 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/eeaea7a8 Branch: refs/heads/master Commit: eeaea7a8a71a5d6cd5f99358f0a0ea175eb8071a Parents: 0686bc2 Author: vkorukanti <venki.koruka...@gmail.com> Authored: Wed Sep 9 17:42:45 2015 -0700 Committer: vkorukanti <venki.koruka...@gmail.com> Committed: Thu Sep 10 09:02:02 2015 -0700 ---------------------------------------------------------------------- .../planner/sql/HivePartitionDescriptor.java | 12 +++----- .../exec/planner/sql/HivePartitionLocation.java | 31 +++++++------------- .../drill/exec/TestHivePartitionPruning.java | 4 --- .../apache/drill/exec/hive/TestHiveStorage.java | 11 +++++++ .../exec/store/hive/HiveTestDataGenerator.java | 9 ++++++ 5 files changed, 34 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/eeaea7a8/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionDescriptor.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionDescriptor.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionDescriptor.java index fa70755..0328af0 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionDescriptor.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionDescriptor.java @@ -47,7 +47,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; public class HivePartitionDescriptor implements PartitionDescriptor { private final Map<String, Integer> partitionMap = new HashMap<>(); - private final int MAX_NESTED_SUBDIRS; + private final int numPartitionLevels; private final DrillScanRel scanRel; private final String defaultPartitionValue; private final DrillBuf managedBuffer; @@ -62,7 +62,7 @@ public class HivePartitionDescriptor implements PartitionDescriptor { partitionMap.put(wrapper.name, i); i++; } - MAX_NESTED_SUBDIRS = i; + numPartitionLevels = i; } @Override @@ -77,7 +77,7 @@ public class HivePartitionDescriptor implements PartitionDescriptor { @Override public int getMaxHierarchyLevel() { - return MAX_NESTED_SUBDIRS; + return numPartitionLevels; } public String getBaseTableLocation() { @@ -110,12 +110,8 @@ public class HivePartitionDescriptor implements PartitionDescriptor { public List<PartitionLocation> getPartitions() { List<PartitionLocation> partitions = new LinkedList<>(); HiveReadEntry origEntry = ((HiveScan) scanRel.getGroupScan()).hiveReadEntry; - List<String> allFileLocations = new LinkedList<>(); for (Partition partition: origEntry.getPartitions()) { - allFileLocations.add(partition.getSd().getLocation()); - } - for (String file: allFileLocations) { - partitions.add(new HivePartitionLocation(MAX_NESTED_SUBDIRS, getBaseTableLocation(), file)); + partitions.add(new HivePartitionLocation(partition.getValues(), partition.getSd().getLocation())); } return partitions; } http://git-wip-us.apache.org/repos/asf/drill/blob/eeaea7a8/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionLocation.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionLocation.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionLocation.java index 8a99026..49e3361 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionLocation.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/planner/sql/HivePartitionLocation.java @@ -17,34 +17,23 @@ */ package org.apache.drill.exec.planner.sql; +import com.google.common.collect.ImmutableList; import org.apache.drill.exec.planner.PartitionLocation; +import java.util.List; + public class HivePartitionLocation implements PartitionLocation { private final String partitionLocation; - private final String[] partitionValue; - // The path names passed in to this class are already sanitised and use the forward slash as the separator - private static final String fileSeparator = "/"; - public HivePartitionLocation(int max, String baseTableLocation, String entireLocation) { - this.partitionLocation = entireLocation; - partitionValue = new String[max]; - int start = partitionLocation.indexOf(baseTableLocation) + baseTableLocation.length(); - String postPath = entireLocation.substring(start); - if (postPath.length() == 0) { - return; - } - if (postPath.startsWith(fileSeparator)) { - postPath = postPath.substring(postPath.indexOf(fileSeparator) + 1); - } - String[] mostDirs = postPath.split(fileSeparator); - assert mostDirs.length <= max; - for (int i = 0; i < mostDirs.length; i++) { - this.partitionValue[i] = mostDirs[i].substring(mostDirs[i].indexOf("=") + 1); - } + private final List<String> partitionValues; + + public HivePartitionLocation(final List<String> partitionValues, final String partitionLocation) { + this.partitionValues = ImmutableList.copyOf(partitionValues); + this.partitionLocation = partitionLocation; } @Override public String getPartitionValue(int index) { - assert index < partitionValue.length; - return partitionValue[index]; + assert index < partitionValues.size(); + return partitionValues.get(index); } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/eeaea7a8/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHivePartitionPruning.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHivePartitionPruning.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHivePartitionPruning.java index 436f4fd..f0b4bdc 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHivePartitionPruning.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/TestHivePartitionPruning.java @@ -116,14 +116,10 @@ public class TestHivePartitionPruning extends HiveTestBase { public void selectFromPartitionedTableWithNullPartitions() throws Exception { final String query = "SELECT count(*) nullCount FROM hive.partition_pruning_test " + "WHERE c IS NULL OR d IS NULL OR e IS NULL"; - - /** Currently there is an issue with interpreter based partition pruning where some functions on partitions don't - * work. IS NULL is one of those functions. final String plan = getPlanInString("EXPLAIN PLAN FOR " + query, OPTIQ_FORMAT); // Check and make sure that Filter is not present in the plan assertFalse(plan.contains("Filter")); - */ testBuilder() .sqlQuery(query) http://git-wip-us.apache.org/repos/asf/drill/blob/eeaea7a8/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java index 3f74091..9211af6 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java @@ -207,4 +207,15 @@ public class TestHiveStorage extends HiveTestBase { .baselineValues("5", " key_5") .go(); } + + + @Test // DRILL-3746 + public void readFromPartitionWithCustomLocation() throws Exception { + testBuilder() + .sqlQuery("SELECT count(*) as cnt FROM hive.partition_pruning_test WHERE c=99 AND d=98 AND e=97") + .unOrdered() + .baselineColumns("cnt") + .baselineValues(1L) + .go(); + } } http://git-wip-us.apache.org/repos/asf/drill/blob/eeaea7a8/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java index 3be846d..34a7ed6 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java @@ -312,6 +312,15 @@ public class HiveTestDataGenerator { executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) " + "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable"); + // Add a partition with custom location + executeQuery(hiveDriver, + String.format("ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'", + getTempDir("part1"))); + executeQuery(hiveDriver, + String.format("INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) " + + "SELECT '%s', '%s' FROM kv LIMIT 1", + new Date(System.currentTimeMillis()).toString(), new Timestamp(System.currentTimeMillis()).toString())); + executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable"); ss.close();