Author: khorgath
Date: Thu Aug  7 23:57:14 2014
New Revision: 1616642

URL: http://svn.apache.org/r1616642
Log:
HIVE-7072 : HCatLoader only loads first region of hbase table (Sushanth 
Sowmyan, reviewed by Daniel Dai)

Modified:
    
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java
    
hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java

Modified: 
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java?rev=1616642&r1=1616641&r2=1616642&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java
 (original)
+++ 
hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java
 Thu Aug  7 23:57:14 2014
@@ -20,6 +20,7 @@ package org.apache.hive.hcatalog.mapredu
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
@@ -37,6 +38,7 @@ import java.util.Map;
  * class that allows us to still be as generic as possible
  * in the main codeflow path, and call attention to the special
  * cases here.
+ *
  * Note : For all methods introduced here, please document why
  * the special case is necessary, providing a jira number if
  * possible.
@@ -50,6 +52,11 @@ public class SpecialCases {
    * instantiating a storage handler to write. We set any parameters
    * we want to be visible to the job in jobProperties, and this will
    * be available to the job via jobconf at run time.
+   *
+   * This is mostly intended to be used by StorageHandlers that wrap
+   * File-based OutputFormats such as FosterStorageHandler that wraps
+   * RCFile, ORC, etc.
+   *
    * @param jobProperties : map to write to
    * @param jobInfo : information about this output job to read from
    * @param ofclass : the output format in use
@@ -78,5 +85,26 @@ public class SpecialCases {
     }
   }
 
+  /**
+   * Method to do any storage-handler specific special casing while 
instantiating a
+   * HCatLoader
+   *
+   * @param conf : configuration to write to
+   * @param tableInfo : the table definition being used
+   */
+  public static void addSpecialCasesParametersForHCatLoader(
+      Configuration conf, HCatTableInfo tableInfo) {
+    if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){
+      return;
+    }
+    String shClass = tableInfo.getStorerInfo().getStorageHandlerClass();
+    if ((shClass != null) && 
shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){
+      // NOTE: The reason we use a string name of the hive hbase handler here 
is
+      // because we do not want to introduce a compile-dependency on the 
hive-hbase-handler
+      // module from within hive-hcatalog.
+      // This parameter was added due to the requirement in HIVE-7072
+      conf.set("pig.noSplitCombination", "true");
+    }
+  }
 
 }

Modified: 
hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java?rev=1616642&r1=1616641&r2=1616642&view=diff
==============================================================================
--- 
hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
 (original)
+++ 
hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
 Thu Aug  7 23:57:14 2014
@@ -43,6 +43,7 @@ import org.apache.hive.hcatalog.data.Pai
 import org.apache.hive.hcatalog.data.schema.HCatSchema;
 import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
 import org.apache.hive.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hive.hcatalog.mapreduce.SpecialCases;
 import org.apache.pig.Expression;
 import org.apache.pig.Expression.BinaryExpression;
 import org.apache.pig.PigException;
@@ -125,6 +126,12 @@ public class HCatLoader extends HCatBase
       Job clone = new Job(job.getConfiguration());
       HCatInputFormat.setInput(job, dbName, tableName, 
getPartitionFilterString());
 
+      InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(
+          job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO));
+
+      
SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(),
+          inputJobInfo.getTableInfo());
+
       // We will store all the new /changed properties in the job in the
       // udf context, so the the HCatInputFormat.setInput method need not
       //be called many times.


Reply via email to