Repository: hive Updated Branches: refs/heads/master c65651930 -> a60fec2eb
HIVE-12712: HiveInputFormat may fail to column names to read in some cases (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a60fec2e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a60fec2e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a60fec2e Branch: refs/heads/master Commit: a60fec2ebfad276b21f801f4c0b025cfc069f5dd Parents: c656519 Author: Prasanth Jayachandran <[email protected]> Authored: Mon Dec 21 18:02:30 2015 -0600 Committer: Prasanth Jayachandran <[email protected]> Committed: Mon Dec 21 18:02:30 2015 -0600 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/HiveInputFormat.java | 49 ++++++++++++++++---- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 15 ++++-- 2 files changed, 50 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a60fec2e/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 2607d9c..1f262d0 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -424,8 +424,7 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> TableDesc table = part.getTableDesc(); TableScanOperator tableScan = null; - List<String> aliases = - mrwork.getPathToAliases().get(dir.toUri().toString()); + List<String> aliases = mrwork.getPathToAliases().get(dir.toString()); // Make filter pushdown information available to getSplits. if ((aliases != null) && (aliases.size() == 1)) { @@ -442,6 +441,10 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> // push down filters pushFilters(newjob, tableScan); } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("aliases: {} pathToAliases: {} dir: {}", aliases, mrwork.getPathToAliases(), dir); + } } if (!currentDirs.isEmpty() && @@ -453,7 +456,15 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> } if (!currentDirs.isEmpty()) { - LOG.info("Generating splits"); + if (LOG.isInfoEnabled()) { + LOG.info("Generating splits as currentDirs is not empty. currentDirs: {}", currentDirs); + } + + // set columns to read in conf + if (pushDownProjection) { + pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer); + } + addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), @@ -466,16 +477,16 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> currentTable = table; currentInputFormatClass = inputFormatClass; } + + // set columns to read in conf if (pushDownProjection) { - newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); - newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColumnsBuffer.toString()); - newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColumnNamesBuffer.toString()); - LOG.info(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR + "=" + readColumnsBuffer.toString()); - LOG.info(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR + "=" + readColumnNamesBuffer.toString()); + pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer); } if (dirs.length != 0) { - LOG.info("Generating splits"); + if (LOG.isInfoEnabled()) { + LOG.info("Generating splits for dirs: {}", dirs); + } addSplitsForGroup(currentDirs, currentTableScan, newjob, getInputFormatFromCache(currentInputFormatClass, job), currentInputFormatClass, currentDirs.size()*(numSplits / dirs.length), @@ -483,11 +494,29 @@ public class HiveInputFormat<K extends WritableComparable, V extends Writable> } Utilities.clearWorkMapForConf(job); - LOG.info("number of splits " + result.size()); + if (LOG.isInfoEnabled()) { + LOG.info("number of splits " + result.size()); + } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS); return result.toArray(new HiveInputSplit[result.size()]); } + private void pushProjection(final JobConf newjob, final StringBuilder readColumnsBuffer, + final StringBuilder readColumnNamesBuffer) { + String readColIds = readColumnsBuffer.toString(); + String readColNames = readColumnNamesBuffer.toString(); + boolean readAllColumns = readColIds.isEmpty() ? true : false; + newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, readAllColumns); + newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); + newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); + + if (LOG.isInfoEnabled()) { + LOG.info("{} = {}", ColumnProjectionUtils.READ_ALL_COLUMNS, readAllColumns); + LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds); + LOG.info("{} = {}", ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames); + } + } + protected static PartitionDesc getPartitionDescFromPath( Map<String, PartitionDesc> pathToPartitionInfo, Path dir) throws IOException { http://git-wip-us.apache.org/repos/asf/hive/blob/a60fec2e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index b3b48d5..359cbf7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -414,7 +414,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, private static String[] extractNeededColNames( List<OrcProto.Type> types, Configuration conf, boolean[] include, boolean isOriginal) { - return extractNeededColNames(types, getNeededColumnNamesString(conf), include, isOriginal); + String colNames = getNeededColumnNamesString(conf); + if (colNames == null) { + return null; + } + return extractNeededColNames(types, colNames, include, isOriginal); } private static String[] extractNeededColNames( @@ -1068,10 +1072,13 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, // we can't eliminate stripes if there are deltas because the // deltas may change the rows making them match the predicate. if ((deltas == null || deltas.isEmpty()) && context.sarg != null) { - SearchArgument sarg = ConvertAstToSearchArg.createFromConf(context.conf); String[] colNames = extractNeededColNames(types, context.conf, includedCols, isOriginal); - includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, - stripeStats, stripes.size(), file.getPath()); + if (colNames == null) { + LOG.warn("Skipping split elimination for {} as column names is null", file.getPath()); + } else { + includeStripe = pickStripes(context.sarg, colNames, writerVersion, isOriginal, + stripeStats, stripes.size(), file.getPath()); + } } // if we didn't have predicate pushdown, read everything
