Repository: hive Updated Branches: refs/heads/hive-14535 2014ece97 -> 693b85659
HIVE-16032 : MM tables: encrypted/(minimr?) CLI driver + fetch optimizer => no results (Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/693b8565 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/693b8565 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/693b8565 Branch: refs/heads/hive-14535 Commit: 693b8565950bd5a49b9145c59d05ca6fd3300af6 Parents: 2014ece Author: Sergey Shelukhin <[email protected]> Authored: Fri Feb 24 15:25:33 2017 -0800 Committer: Sergey Shelukhin <[email protected]> Committed: Fri Feb 24 15:25:33 2017 -0800 ---------------------------------------------------------------------- .../hadoop/hive/common/ValidWriteIds.java | 2 +- .../java/org/apache/hadoop/hive/ql/Driver.java | 15 +++++++++++++-- .../hadoop/hive/ql/exec/FetchOperator.java | 20 +++++++++++++++----- 3 files changed, 29 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/693b8565/common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java b/common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java index 2ce4040..4cbeb89 100644 --- a/common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java +++ b/common/src/java/org/apache/hadoop/hive/common/ValidWriteIds.java @@ -120,7 +120,7 @@ public class ValidWriteIds { conf.set(key, source); } - public static void clearConf(HiveConf conf, String dbName, String tblName) { + public static void clearConf(Configuration conf, String dbName, String tblName) { if (LOG.isDebugEnabled()) { LOG.debug("Unsetting " + createConfKey(dbName, tblName)); } http://git-wip-us.apache.org/repos/asf/hive/blob/693b8565/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index 8b79d27..f01c3d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -39,6 +39,7 @@ import java.util.concurrent.locks.ReentrantLock; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidWriteIds; @@ -1595,20 +1596,29 @@ public class Driver implements CommandProcessor { } } + private static void acquireWriteIds(QueryPlan plan, HiveConf conf) throws HiveException { // Output IDs are put directly into FileSinkDesc; here, we only need to take care of inputs. + Configuration fetchConf = null; + if (plan.getFetchTask() != null) { + fetchConf = plan.getFetchTask().getFetchConf(); + } for (ReadEntity input : plan.getInputs()) { + Utilities.LOG14535.debug("Looking at " + input); Table t = extractTable(input); if (t == null) continue; Utilities.LOG14535.info("Checking " + t.getTableName() + " for being a MM table: " + t.getParameters()); if (!MetaStoreUtils.isInsertOnlyTable(t.getParameters())) { ValidWriteIds.clearConf(conf, t.getDbName(), t.getTableName()); + if (fetchConf != null) { + ValidWriteIds.clearConf(fetchConf, t.getDbName(), t.getTableName()); + } continue; } ValidWriteIds ids = Hive.get().getValidWriteIdsForTable(t.getDbName(), t.getTableName()); ids.addToConf(conf, t.getDbName(), t.getTableName()); - if (plan.getFetchTask() != null) { - ids.addToConf(plan.getFetchTask().getFetchConf(), t.getDbName(), t.getTableName()); + if (fetchConf != null) { + ids.addToConf(fetchConf, t.getDbName(), t.getTableName()); } } } @@ -1866,6 +1876,7 @@ public class Driver implements CommandProcessor { for (Task<? extends Serializable> tsk : plan.getRootTasks()) { // This should never happen, if it does, it's a bug with the potential to produce // incorrect results. + LOG.error("TODO# running " + tsk); assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty(); driverCxt.addToRunnable(tsk); http://git-wip-us.apache.org/repos/asf/hive/blob/693b8565/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 9acd14d..da78a99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -269,11 +269,15 @@ public class FetchOperator implements Serializable { while (iterPath.hasNext()) { currPath = iterPath.next(); currDesc = iterPartDesc.next(); + Utilities.LOG14535.debug("Considering " + currPath); if (isNonNativeTable) { return true; } FileSystem fs = currPath.getFileSystem(job); if (fs.exists(currPath)) { + if (extractWriteIdsForCurrentTable() != null) { + return true; + } for (FileStatus fStat : listStatusUnderPath(fs, currPath)) { if (fStat.getLen() > 0) { return true; @@ -281,6 +285,7 @@ public class FetchOperator implements Serializable { } } } + Utilities.LOG14535.debug("Done with all the paths"); return false; } @@ -310,6 +315,7 @@ public class FetchOperator implements Serializable { if (splits == null) { return null; } + if (!isPartitioned || convertedOI == null) { currSerDe = tableSerDe; ObjectConverter = null; @@ -397,11 +403,7 @@ public class FetchOperator implements Serializable { if (inputFormat instanceof HiveInputFormat) { return StringUtils.escapeString(currPath.toString()); // No need to process here. } - if (writeIdMap == null) { - writeIdMap = new HashMap<String, ValidWriteIds>(); - } - // No need to check for MM table - if it is, the IDs should be in the job config. - ValidWriteIds ids = HiveInputFormat.extractWriteIds(writeIdMap, job, currDesc.getTableName()); + ValidWriteIds ids = extractWriteIdsForCurrentTable(); if (ids != null) { Utilities.LOG14535.info("Observing " + currDesc.getTableName() + ": " + ids); } @@ -417,6 +419,13 @@ public class FetchOperator implements Serializable { return str.toString(); } + private ValidWriteIds extractWriteIdsForCurrentTable() { + if (writeIdMap == null) { + writeIdMap = new HashMap<String, ValidWriteIds>(); + } + return HiveInputFormat.extractWriteIds(writeIdMap, job, currDesc.getTableName()); + } + private FetchInputFormatSplit[] splitSampling(SplitSample splitSample, FetchInputFormatSplit[] splits) { long totalSize = 0; @@ -725,6 +734,7 @@ public class FetchOperator implements Serializable { } public RecordReader<WritableComparable, Writable> getRecordReader(JobConf job) throws IOException { + LOG.error("TODO# calling origina getRr on " + inputFormat + "; " + getInputSplit()); return inputFormat.getRecordReader(getInputSplit(), job, Reporter.NULL); } }
