Author: toffer
Date: Tue Jan 24 03:33:44 2012
New Revision: 1235121
URL: http://svn.apache.org/viewvc?rev=1235121&view=rev
Log:
HCAT-219. Adding HBase e2e tests (daijy via toffer)
Added:
incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java
Modified:
incubator/hcatalog/trunk/CHANGES.txt
incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml
incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf
incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm
incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf
incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
incubator/hcatalog/trunk/storage-drivers/hbase/ivy/libraries.properties
Modified: incubator/hcatalog/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Tue Jan 24 03:33:44 2012
@@ -60,6 +60,8 @@ Trunk (unreleased changes)
HCAT-63. RPM package integration with Hadoop (khorgath via hashutosh)
IMPROVEMENTS
+ HCAT-219. Adding HBase e2e tests (daijy via toffer)
+
HCAT-226. HBase storage drivers should load hbase resources (hbase-site,
hbase-default) for the user (hashutosh via toffer)
HCAT-223. Include HWI (vikram.dixit via khorgath)
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml Tue Jan 24
03:33:44 2012
@@ -198,6 +198,7 @@
<env key="PH_CLUSTER_BIN" value="${harness.cluster.bin}"/>
<env key="PIG_HOME" value="${harness.pig.home}"/>
<env key="PIG_JAR" value="${harness.pig.jar}"/> <!-- Pig jar without
antlr -->
+ <env key="HBASE_CONF_DIR" value="${hbase.conf.dir}"/>
<arg line="${tests.to.run}"/>
<arg value="${test.location}/tests/pig.conf"/>
<arg value="${test.location}/tests/hive.conf"/>
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf Tue Jan 24
03:33:44 2012
@@ -85,6 +85,7 @@ $cfg = {
#HIVE
, 'hive_bin_location' => "$ENV{HIVE_ROOT}/build/dist/bin"
+ , 'hbaseconfigpath' => "$ENV{HBASE_CONF_DIR}"
, 'hivehome' => "$ENV{HIVE_HOME}"
};
Modified:
incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
(original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
Tue Jan 24 03:33:44 2012
@@ -343,6 +343,10 @@ sub runHadoop
$ENV{'HADOOP_CLASSPATH'} = $cp;
}
+ if (defined($testCmd->{'hbaseconfigpath'})) {
+ $ENV{'HADOOP_CLASSPATH'} =
"$ENV{'HADOOP_CLASSPATH'}:$testCmd->{'hbaseconfigpath'}";
+ }
+
if (defined($testCmd->{'metastore.principal'}) &&
($testCmd->{'metastore.principal'} =~ m/\S+/)) {
$ENV{'HADOOP_OPTS'} = "-Dhcat.metastore.principal=" .
$testCmd->{'metastore.principal'};
$ENV{'HADOOP_CLIENT_OPTS'} = "-Dhcat.metastore.principal=" .
$testCmd->{'metastore.principal'};
@@ -624,6 +628,7 @@ sub getPigCmd($$$)
$pcp .= ":" . $testCmd->{'additionaljars'} if
(defined($testCmd->{'additionaljars'}));
# Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
$pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne
"local"); #&& (! defined $ENV{'HADOOP_HOME'});
+ $pcp .= ":" . $testCmd->{'hbaseconfigpath'} if ($testCmd->{'exectype'} ne
"local" && defined($testCmd->{'hbaseconfigpath'} &&
$testCmd->{'hbaseconfigpath'} ne ""));
# Set it in our current environment. It will get inherited by the IPC::Run
# command.
Modified:
incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
(original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm Tue
Jan 24 03:33:44 2012
@@ -375,6 +375,7 @@ sub getPigCmd($$$)
$pcp .= ":" . $testCmd->{'additionaljars'} if
(defined($testCmd->{'additionaljars'}));
# Only add testconfigpath to PIG_CLASSPATH if HADOOP_HOME isn't defined
$pcp .= ":" . $testCmd->{'testconfigpath'} if ($testCmd->{'exectype'} ne
"local"); #&& (! defined $ENV{'HADOOP_HOME'});
+ $pcp .= ":" . $testCmd->{'hbaseconfigpath'} if ($testCmd->{'exectype'} ne
"local" && defined($testCmd->{'hbaseconfigpath'} &&
$testCmd->{'hbaseconfigpath'} ne ""));
# Set it in our current environment. It will get inherited by the IPC::Run
# command.
@@ -396,7 +397,7 @@ sub getPigCmd($$$)
$opts = $opts . " " . join(" ", @{$testCmd->{'java_params'}});
}
- $ENV{'PIG_OPTS'} = $opts;
+ $ENV{'PIG_OPTS'} = $ENV{'PIG_OPTS'} . " " . $opts;
print $log "Returning Pig command " . join(" ", @pigCmd) . "\n";
print $log "With PIG_CLASSPATH set to " . $ENV{'PIG_CLASSPATH'} . " and
PIG_OPTS set to " . $ENV{'PIG_OPTS'} . "\n";
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm Tue Jan 24
03:33:44 2012
@@ -562,7 +562,7 @@ sub findPigWithoutHadoopJar($$)
{
my ($cfg, $log) = @_;
- my $jar = `ls $cfg->{'pigpath'}/pig-*-withouthadoop.jar`;
+ my $jar = `ls $cfg->{'pigpath'}/pig-*withouthadoop.jar`;
chomp $jar;
return $jar;
}
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf Tue Jan 24
03:33:44 2012
@@ -158,5 +158,21 @@ jar :FUNCPATH:/testudf.jar org.apache.hc
},
],
}, # end g
+ {
+ 'name' => 'Hadoop_HBase',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'hcat_prep'=>q\drop table if exists
hadoop_hbase_1;
+create table hadoop_hbase_1(key string, gpa string) STORED BY
'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES
('hbase.columns.mapping'=':key,info:gpa');\
+ ,'hadoop' => q\
+jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.HBaseReadWrite -libjars
:HCAT_JAR: :THRIFTSERVER: :INPATH:/studenttab10k hadoop_hbase_1 :OUTPATH:
+\,
+ ,'sql' => q\select name, sum(gpa) from
studenttab10k group by name;\
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ ],
+ }, # end g
]
}
Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf Tue Jan 24
03:33:44 2012
@@ -174,6 +174,41 @@ store b into 'pig_write_3' using org.apa
}
],
}, # end g
+ {
+ 'name' => 'Pig_HBase',
+ 'tests' => [
+ {
+ 'num' => 1
+ ,'pig' => q\set hcat.hbase.output.bulkMode
'false'
+e = load 'pig_hbase_1' using org.apache.hcatalog.pig.HCatLoader();
+store e into ':OUTPATH:';\,
+ ,'result_table' => ['?']
+ ,'sql' => [ 'select name, avg(cast(age as
decimal(10,5))), avg(gpa) from studenttab10k group by name;']
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ },
+ {
+ # multiquery
+ 'num' => 2
+ ,'hcat_prep'=>q\drop table if exists
pig_hbase_2_1;
+create table pig_hbase_2_1(key string, age string, gpa string) STORED BY
'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES
('hbase.columns.mapping'=':key,info:age,info:gpa');
+drop table if exists pig_hbase_2_2;
+create table pig_hbase_2_2(key string, age string, gpa string) STORED BY
'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES
('hbase.columns.mapping'=':key,info:age,info:gpa');
+\
+ ,'pig' => q\set hcat.hbase.output.bulkMode
'false'
+a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float);
+b = group a by name;
+c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa;
+d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa;
+store d into 'pig_hbase_2_1' using org.apache.hcatalog.pig.HCatStorer();
+store d into 'pig_hbase_2_2' using org.apache.hcatalog.pig.HCatStorer();\,
+ ,'result_table' =>
['pig_hbase_2_1','pig_hbase_2_2']
+ ,'sql' => [ 'select name, avg(cast(age as
decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name,
avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;']
+ ,'floatpostprocess' => 1
+ ,'delimiter' => ' '
+ }
+ ],
+ }, # end g
]
}
Added:
incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java?rev=1235121&view=auto
==============================================================================
---
incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java
(added)
+++
incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java
Tue Jan 24 03:33:44 2012
@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hcatalog.utils;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hcatalog.common.HCatConstants;
+import org.apache.hcatalog.data.DefaultHCatRecord;
+import org.apache.hcatalog.data.HCatRecord;
+import org.apache.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.hcatalog.mapreduce.HCatOutputFormat;
+import org.apache.hcatalog.mapreduce.InputJobInfo;
+import org.apache.hcatalog.mapreduce.OutputJobInfo;
+
+/**
+ * This is a map reduce test for testing hcat which goes against the "numbers"
+ * table. It performs a group by on the first column and a SUM operation on the
+ * other columns. This is to simulate a typical operation in a map reduce
+ * program to test that hcat hands the right data to the map reduce program
+ *
+ * Usage: hadoop jar sumnumbers <serveruri> <output dir> <-libjars hive-hcat
+ * jar> The <tab|ctrla> argument controls the output delimiter The hcat jar
+ * location should be specified as file://<full path to jar>
+ */
+public class HBaseReadWrite extends Configured implements Tool {
+
+ public static class HBaseWriteMap extends
+ Mapper<LongWritable, Text, Text, Text> {
+
+ String name;
+ String age;
+ String gpa;
+
+ @Override
+ protected void map(
+ LongWritable key,
+ Text value,
+ org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text,
Text>.Context context)
+ throws IOException, InterruptedException {
+ String line = value.toString();
+ String[] tokens = line.split("\t");
+ name = tokens[0];
+
+ context.write(new Text(name), value);
+ }
+ }
+
+
+ public static class HBaseWriteReduce extends
+ Reducer<Text, Text, WritableComparable, HCatRecord> {
+
+ String name;
+ String age;
+ String gpa;
+
+ @Override
+ protected void reduce(Text key, Iterable<Text> values, Context context)
+ throws IOException, InterruptedException {
+ name = key.toString();
+ int count = 0;
+ double sum = 0;
+ for (Text value : values) {
+ String line = value.toString();
+ String[] tokens = line.split("\t");
+ name = tokens[0];
+ age = tokens[1];
+ gpa = tokens[2];
+
+ count++;
+ sum += Double.parseDouble(gpa.toString());
+ }
+
+ HCatRecord record = new DefaultHCatRecord(2);
+ record.set(0, name);
+ record.set(1, Double.toString(sum));
+
+ context.write(null, record);
+ }
+ }
+
+ public static class HBaseReadMap extends
+ Mapper<WritableComparable, HCatRecord, Text, Text> {
+
+ String name;
+ String age;
+ String gpa;
+
+ @Override
+ protected void map(
+ WritableComparable key,
+ HCatRecord value,
+ org.apache.hadoop.mapreduce.Mapper<WritableComparable,
HCatRecord, Text, Text>.Context context)
+ throws IOException, InterruptedException {
+ name = (String) value.get(0);
+ gpa = (String) value.get(1);
+ context.write(new Text(name), new Text(gpa));
+ }
+ }
+
+
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ args = new GenericOptionsParser(conf, args).getRemainingArgs();
+
+ String serverUri = args[0];
+ String inputDir = args[1];
+ String tableName = args[2];
+ String outputDir = args[3];
+ String dbName = null;
+
+ String principalID = System
+ .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL);
+ if (principalID != null)
+ conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID);
+ conf.set("hcat.hbase.output.bulkMode", "false");
+ Job job = new Job(conf, "HBaseWrite");
+ FileInputFormat.setInputPaths(job, inputDir);
+
+ job.setInputFormatClass(TextInputFormat.class);
+ job.setOutputFormatClass(HCatOutputFormat.class);
+ job.setJarByClass(HBaseReadWrite.class);
+ job.setMapperClass(HBaseWriteMap.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(Text.class);
+ job.setReducerClass(HBaseWriteReduce.class);
+ job.setOutputKeyClass(WritableComparable.class);
+ job.setOutputValueClass(DefaultHCatRecord.class);
+ HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName,
+ tableName, null, serverUri, principalID));
+
+ boolean succ = job.waitForCompletion(true);
+
+ if (!succ) return 1;
+
+ job = new Job(conf, "HBaseRead");
+ HCatInputFormat.setInput(job, InputJobInfo.create(dbName, tableName,
+ null, serverUri, principalID));
+
+ job.setInputFormatClass(HCatInputFormat.class);
+ job.setOutputFormatClass(TextOutputFormat.class);
+ job.setJarByClass(HBaseReadWrite.class);
+ job.setMapperClass(HBaseReadMap.class);
+ job.setOutputKeyClass(Text.class);
+ job.setOutputValueClass(Text.class);
+ job.setNumReduceTasks(0);
+ TextOutputFormat.setOutputPath(job, new Path(outputDir));
+
+ succ = job.waitForCompletion(true);
+
+ if (!succ) return 2;
+
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = ToolRunner.run(new HBaseReadWrite(), args);
+ System.exit(exitCode);
+ }
+}
Modified:
incubator/hcatalog/trunk/storage-drivers/hbase/ivy/libraries.properties
URL:
http://svn.apache.org/viewvc/incubator/hcatalog/trunk/storage-drivers/hbase/ivy/libraries.properties?rev=1235121&r1=1235120&r2=1235121&view=diff
==============================================================================
--- incubator/hcatalog/trunk/storage-drivers/hbase/ivy/libraries.properties
(original)
+++ incubator/hcatalog/trunk/storage-drivers/hbase/ivy/libraries.properties Tue
Jan 24 03:33:44 2012
@@ -16,6 +16,6 @@
junit.version=3.8.1
ivy.version=2.2.0
rats-lib.version=0.5.1
-hbase.version=0.90.3
+hbase.version=0.90.5
zookeeper.version=3.4.0
thrift.version=0.7.0