Repository: hive
Updated Branches:
  refs/heads/branch-1 a4deea329 -> f31061c8c


HIVE-12712: HiveInputFormat may fail to column names to read in some cases 
(Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f31061c8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f31061c8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f31061c8

Branch: refs/heads/branch-1
Commit: f31061c8cc1d5743f13ea9d887860dc5f4c8c8ca
Parents: a4deea3
Author: Prasanth Jayachandran <[email protected]>
Authored: Mon Dec 21 18:12:46 2015 -0600
Committer: Prasanth Jayachandran <[email protected]>
Committed: Mon Dec 21 18:12:46 2015 -0600

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/HiveInputFormat.java      | 50 ++++++++++++++++----
 1 file changed, 40 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f31061c8/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 27499ad..2d6e752 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -376,8 +376,7 @@ public class HiveInputFormat<K extends WritableComparable, 
V extends Writable>
       TableDesc table = part.getTableDesc();
       TableScanOperator tableScan = null;
 
-      List<String> aliases =
-          mrwork.getPathToAliases().get(dir.toUri().toString());
+      List<String> aliases = mrwork.getPathToAliases().get(dir.toString());
 
       // Make filter pushdown information available to getSplits.
       if ((aliases != null) && (aliases.size() == 1)) {
@@ -394,6 +393,11 @@ public class HiveInputFormat<K extends WritableComparable, 
V extends Writable>
           // push down filters
           pushFilters(newjob, tableScan);
         }
+      } else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("aliases: " + aliases + " pathToAliases: " + 
mrwork.getPathToAliases() +
+              " dir: " + dir);
+        }
       }
 
       if (!currentDirs.isEmpty() &&
@@ -405,7 +409,15 @@ public class HiveInputFormat<K extends WritableComparable, 
V extends Writable>
       }
 
       if (!currentDirs.isEmpty()) {
-        LOG.info("Generating splits");
+        if (LOG.isInfoEnabled()) {
+          LOG.info("Generating splits as currentDirs is not empty. 
currentDirs: " + currentDirs);
+        }
+
+        // set columns to read in conf
+        if (pushDownProjection) {
+          pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer);
+        }
+
         addSplitsForGroup(currentDirs, currentTableScan, newjob,
             getInputFormatFromCache(currentInputFormatClass, job),
             currentInputFormatClass, currentDirs.size()*(numSplits / 
dirs.length),
@@ -418,16 +430,16 @@ public class HiveInputFormat<K extends 
WritableComparable, V extends Writable>
       currentTable = table;
       currentInputFormatClass = inputFormatClass;
     }
+
+    // set columns to read in conf
     if (pushDownProjection) {
-      newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
-      newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, 
readColumnsBuffer.toString());
-      newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, 
readColumnNamesBuffer.toString());
-      LOG.info(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR + "=" + 
readColumnsBuffer.toString());
-      LOG.info(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR + "=" + 
readColumnNamesBuffer.toString());
+      pushProjection(newjob, readColumnsBuffer, readColumnNamesBuffer);
     }
 
     if (dirs.length != 0) {
-      LOG.info("Generating splits");
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Generating splits for dirs: " + dirs);
+      }
       addSplitsForGroup(currentDirs, currentTableScan, newjob,
           getInputFormatFromCache(currentInputFormatClass, job),
           currentInputFormatClass, currentDirs.size()*(numSplits / 
dirs.length),
@@ -435,11 +447,29 @@ public class HiveInputFormat<K extends 
WritableComparable, V extends Writable>
     }
 
     Utilities.clearWorkMapForConf(job);
-    LOG.info("number of splits " + result.size());
+    if (LOG.isInfoEnabled()) {
+      LOG.info("number of splits " + result.size());
+    }
     perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.GET_SPLITS);
     return result.toArray(new HiveInputSplit[result.size()]);
   }
 
+  private void pushProjection(final JobConf newjob, final StringBuilder 
readColumnsBuffer,
+      final StringBuilder readColumnNamesBuffer) {
+    String readColIds = readColumnsBuffer.toString();
+    String readColNames = readColumnNamesBuffer.toString();
+    boolean readAllColumns = readColIds.isEmpty() ? true : false;
+    newjob.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, readAllColumns);
+    newjob.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIds);
+    newjob.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, readColNames);
+
+    if (LOG.isInfoEnabled()) {
+      LOG.info(ColumnProjectionUtils.READ_ALL_COLUMNS + " = " + 
readAllColumns);
+      LOG.info(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR + " = " + 
readColIds);
+      LOG.info(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR + " = " + 
readColNames);
+    }
+  }
+
   protected static PartitionDesc getPartitionDescFromPath(
       Map<String, PartitionDesc> pathToPartitionInfo, Path dir)
       throws IOException {

Reply via email to