Author: khorgath Date: Thu Aug 7 23:57:14 2014 New Revision: 1616642 URL: http://svn.apache.org/r1616642 Log: HIVE-7072 : HCatLoader only loads first region of hbase table (Sushanth Sowmyan, reviewed by Daniel Dai)
Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java Modified: hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java?rev=1616642&r1=1616641&r2=1616642&view=diff ============================================================================== --- hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java (original) +++ hive/trunk/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/SpecialCases.java Thu Aug 7 23:57:14 2014 @@ -20,6 +20,7 @@ package org.apache.hive.hcatalog.mapredu import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcFile; @@ -37,6 +38,7 @@ import java.util.Map; * class that allows us to still be as generic as possible * in the main codeflow path, and call attention to the special * cases here. + * * Note : For all methods introduced here, please document why * the special case is necessary, providing a jira number if * possible. @@ -50,6 +52,11 @@ public class SpecialCases { * instantiating a storage handler to write. We set any parameters * we want to be visible to the job in jobProperties, and this will * be available to the job via jobconf at run time. + * + * This is mostly intended to be used by StorageHandlers that wrap + * File-based OutputFormats such as FosterStorageHandler that wraps + * RCFile, ORC, etc. + * * @param jobProperties : map to write to * @param jobInfo : information about this output job to read from * @param ofclass : the output format in use @@ -78,5 +85,26 @@ public class SpecialCases { } } + /** + * Method to do any storage-handler specific special casing while instantiating a + * HCatLoader + * + * @param conf : configuration to write to + * @param tableInfo : the table definition being used + */ + public static void addSpecialCasesParametersForHCatLoader( + Configuration conf, HCatTableInfo tableInfo) { + if ((tableInfo == null) || (tableInfo.getStorerInfo() == null)){ + return; + } + String shClass = tableInfo.getStorerInfo().getStorageHandlerClass(); + if ((shClass != null) && shClass.equals("org.apache.hadoop.hive.hbase.HBaseStorageHandler")){ + // NOTE: The reason we use a string name of the hive hbase handler here is + // because we do not want to introduce a compile-dependency on the hive-hbase-handler + // module from within hive-hcatalog. + // This parameter was added due to the requirement in HIVE-7072 + conf.set("pig.noSplitCombination", "true"); + } + } } Modified: hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java?rev=1616642&r1=1616641&r2=1616642&view=diff ============================================================================== --- hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java (original) +++ hive/trunk/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java Thu Aug 7 23:57:14 2014 @@ -43,6 +43,7 @@ import org.apache.hive.hcatalog.data.Pai import org.apache.hive.hcatalog.data.schema.HCatSchema; import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.SpecialCases; import org.apache.pig.Expression; import org.apache.pig.Expression.BinaryExpression; import org.apache.pig.PigException; @@ -125,6 +126,12 @@ public class HCatLoader extends HCatBase Job clone = new Job(job.getConfiguration()); HCatInputFormat.setInput(job, dbName, tableName, getPartitionFilterString()); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + + SpecialCases.addSpecialCasesParametersForHCatLoader(job.getConfiguration(), + inputJobInfo.getTableInfo()); + // We will store all the new /changed properties in the job in the // udf context, so the the HCatInputFormat.setInput method need not //be called many times.