Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm?rev=1211077&r1=1211076&r2=1211077&view=diff ============================================================================== --- incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm (original) +++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm Tue Dec 6 20:05:37 2011 @@ -26,6 +26,289 @@ package Util; +use IPC::Run qw(run); +use Log::Log4perl qw(:easy); + +sub prepareHCat +{ + my ($self, $testCmd, $log) = @_; + my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out"; + my $hcatCmd = $self->replaceParameters( $testCmd->{'hcat_prep'}, $outfile, $testCmd, $log); + + my @hivefiles = (); + my @outfiles = (); + # Write the hive script to a file. + $hivefiles[0] = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . + $testCmd->{'num'} . ".0.sql"; + $outfiles[0] = $testCmd->{'thisResultsPath'} . "/" . $testCmd->{'group'} . + "_" . $testCmd->{'num'} . ".0.out"; + + open(FH, "> $hivefiles[0]") or + die "Unable to open file $hivefiles[0] to write SQL script, $ERRNO\n"; + print FH $testCmd->{'hcat_prep'} . "\n"; + close(FH); + + Util::runHCatCmdFromFile($testCmd, $log, $hivefiles[0]); +} + +############################################################################## +# Sub: setupHiveProperties +# +# Assure that necessary values are set in config in order to set Hive +# Java properties. +# +# Returns: +# Nothing +sub setupHiveProperties($$) +{ + my ($cfg, $log) = @_; + + # Set up values for the metastore + if (defined($cfg->{'metastore_thrift'}) && $cfg->{'metastore_thrift'} == 1) { + if (! defined $cfg->{'metastore_host'} || $cfg->{'metastore_host'} eq "") { + print $log "When using thrift, you must set the key " . + " 'metastore_host' to the machine your metastore is on\n"; + die "metastore_host is not set in existing.conf\n"; + } + + $cfg->{'metastore_connection'} = + "jdbc:$cfg->{'metastore_db'}://$cfg->{'metastore_host'}/hivemetastoredb?createDatabaseIfNotExist=true"; + + if (! defined $cfg->{'metastore_passwd'} || $cfg->{'metastore_passwd'} eq "") { + $cfg->{'metastore_passwd'} = 'hive'; + } + + if (! defined $cfg->{'metastore_port'} || $cfg->{'metastore_port'} eq "") { + $cfg->{'metastore_port'} = '9933'; + } + + $cfg->{'metastore_uri'} = + "thrift://$cfg->{'metastore_host'}:$cfg->{'metastore_port'}"; + } else { + $cfg->{'metastore_connection'} = + "jdbc:derby:;databaseName=metastore_db;create=true"; + $cfg->{'metastore_driver'} = "org.apache.derby.jdbc.EmbeddedDriver"; + } +} + +sub getHadoopCmd +{ + my ( $properties ) = @_; + + my $subName = (caller(0))[3]; + my @baseCmd; + + die "$0.$subName: null properties" if (! $properties ); + + my $cmd; + + $cmd = $properties->{'hadoopbin'}; + if ( ! -x "$cmd" ) { + print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n"; + $cmd = `which hadoop`; + chomp $cmd; + print STDERR "$0::$subName WARNING: Instead using command: $cmd\n"; + } + if ( ! -x "$cmd" ) { + die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n"; + } + push (@baseCmd, $cmd); + + push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'}); + + return @baseCmd; +} + +############################################################################## +# Sub: runHiveCmdFromFile +# +# Run the provided file using the Hive command line. +# +# cfg - The configuration file for the test +# log - reference to the log file, should be an open file pointer +# sql - name of file containing SQL to run. Optional, if present -f $sql +# will be appended to the command. +# outfile - open file pointer (or variable reference) to write stdout to for +# this test. Optional, will be written to $log if this value is not +# provided. +# outfile - open file pointer (or variable reference) to write stderr to for +# this test. Optional, will be written to $log if this value is not +# provided. +# noFailOnFail - if true, do not fail when the Hive command returns non-zero +# value. +# Returns: +# Nothing +sub runHiveCmdFromFile($$;$$$$) +{ + my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_; + + if (!defined($ENV{'HADOOP_HOME'})) { + die "Cannot run hive when HADOOP_HOME environment variable is not set."; + } + + $outfile = $log if (!defined($outfile)); + $errfile = $log if (!defined($errfile)); + + my @cmd = ("$cfg->{'hivehome'}/bin/hive"); + + # Add all of the modified properties we want to set + push(@cmd, "--hiveconf", "hive.metastore.uris=$cfg->{'thriftserver'}"); + push(@cmd, "--hiveconf", "hive.metastore.local=false"); + + if( defined($cfg->{'metastore.principal'}) && ($cfg->{'metastore.principal'} =~ m/\S+/) + && ($cfg->{'metastore.principal'} ne '${metastore.principal}')){ + push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=true", "--hiveconf", "hive.metastore.kerberos.principal=$cfg->{'metastore.principal'}"); + } else { + push(@cmd, "--hiveconf", "hive.metastore.sasl.enabled=false"); + } + + if (defined($cfg->{'additionaljarspath'})) { + $ENV{'HIVE_AUX_JARS_PATH'} = $cfg->{'additionaljarspath'}; + } + + if (defined($cfg->{'hiveconf'})) { + foreach my $hc (@{$cfg->{'hiveconf'}}) { + push(@cmd, "--hiveconf", $hc); + } + } + + if (defined($cfg->{'hivecmdargs'})) { + push(@cmd, @{$cfg->{'hivecmdargs'}}); + } + + if (defined($cfg->{'hiveops'})) { + $ENV{'HIVE_OPTS'} = join(" ", @{$cfg->{'hiveops'}}); + } + + $ENV{'HIVE_HOME'} = $cfg->{'hivehome'}; + + my $envStr; + for my $k (keys(%ENV)) { + $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/); + } + $envStr .= " "; + + if (defined($sql)) { + push(@cmd, "-f", $sql); + } + print $log "Going to run hive command [" . join(" ", @cmd) . + "] with environment set to [$envStr]\n"; + my $runrc = run(\@cmd, \undef, $outfile, $errfile); + my $rc = $? >> 8; + + return $runrc if $runrc; # success + + if (defined($noFailOnFail) && $noFailOnFail) { + return $rc; + } else { + die "Failed running hive command [" . join(" ", @cmd) . "]\n"; + } +} + +############################################################################# +# Sub: runHiveCmdFromFile +# +# Run the provided file using the Hive command line. +# +# cfg - The configuration file for the test +# log - reference to the log file, should be an open file pointer +# sql - name of file containing SQL to run. Optional, if present -f $sql +# will be appended to the command. +# outfile - open file pointer (or variable reference) to write stdout to for +# this test. Optional, will be written to $log if this value is not +# provided. +# outfile - open file pointer (or variable reference) to write stderr to for +# this test. Optional, will be written to $log if this value is not +# provided. +# noFailOnFail - if true, do not fail when the Hive command returns non-zero +# value. +# Returns: +# Nothing +sub runHCatCmdFromFile($$;$$$$) +{ + my ($cfg, $log, $sql, $outfile, $errfile, $noFailOnFail) = @_; + + if (!defined($ENV{'HADOOP_HOME'})) { + die "Cannot run hive when HADOOP_HOME environment variable is not set."; + } + + $outfile = $log if (!defined($outfile)); + $errfile = $log if (!defined($errfile)); + + # unset HADOOP_CLASSPATH + $ENV{'HADOOP_CLASSPATH'} = ""; + + my @cmd; + if (defined($sql)) { + @cmd = ("$cfg->{'hcathome'}/bin/hcat", "-f", $sql); + } else { + @cmd = ("$cfg->{'hcathome'}/bin/hcat"); + } + + my $envStr; + for my $k (keys(%ENV)) { + $envStr .= $k . "=" . $ENV{$k} . " " if ($k =~ /HADOOP/ || $k =~ /HIVE/); + } + $envStr .= " "; + print $log "Going to run hcat command [" . join(" ", @cmd) . + "] with environment set to [$envStr]\n"; + my $runrc = run(\@cmd, \undef, $outfile, $errfile); + my $rc = $? >> 8; + + return $runrc if $runrc; # success + + if (defined($noFailOnFail) && $noFailOnFail) { + return $rc; + } else { + die "Failed running hcat command [" . join(" ", @cmd) . "]\n"; + } +} + +############################################################################## +# Sub: runDbCmd +# +# Run the provided mysql command +# +# Returns: +# Nothing +sub runDbCmd($$$;$) +{ + my ($cfg, $log, $sqlfile, $outfile) = @_; + + $outfile = $log if (!defined($outfile)); + + open(SQL, "< $sqlfile") or die "Unable to open $sqlfile for reading, $!\n"; + + my @cmd = ('mysql', '-u', $cfg->{'dbuser'}, '-D', $cfg->{'dbdb'}, + '-h', $cfg->{'dbhost'}, "--password=$cfg->{'dbpasswd'}", + "--skip-column-names"); + + print $log "Going to run [" . join(" ", @cmd) . "] passing in [$sqlfile]\n"; + + run(\@cmd, \*SQL, $outfile, $log) or + die "Failed running " . join(" ", @cmd) . "\n"; + close(SQL); +} + +# Sub: runHadoopCmd +# +# Run the provided hadoop command +# +# Returns: +# Nothing +sub runHadoopCmd($$$) +{ + my ($cfg, $log, $c) = @_; + + my @cmd = ("$ENV{'HADOOP_HOME'}/bin/hadoop"); + push(@cmd, split(' ', $c)); + + print $log "Going to run [" . join(" ", @cmd) . "]\n"; + + run(\@cmd, \undef, $log, $log) or + die "Failed running " . join(" ", @cmd) . "\n"; +} + ############################################################################## # Sub: localTime # @@ -111,35 +394,6 @@ sub execCmd() { return @result; } -sub getHadoopCmd -{ - my ( $properties ) = @_; - - my $subName = (caller(0))[3]; - my @baseCmd; - - die "$0.$subName: null properties" if (! $properties ); - - my $cmd; - - $cmd = $properties->{'gridstack.root'} . "/hadoop/current/bin/hadoop"; - if ( ! -x "$cmd" ) { - print STDERR "\n$0::$subName WARNING: Can't find hadoop command: $cmd\n"; - $cmd = `which hadoop`; - chomp $cmd; - print STDERR "$0::$subName WARNING: Instead using command: $cmd\n"; - } - if ( ! -x "$cmd" ) { - die "\n$0::$subName FATAL: Hadoop command does not exist: $cmd\n"; - } - push (@baseCmd, $cmd); - - push (@baseCmd, '--config', $properties->{'testconfigpath'}) if defined($properties->{'testconfigpath'}); - - return @baseCmd; -} - - sub getHiveCmd { my ( $properties ) = @_; @@ -187,6 +441,17 @@ sub getHCatCmd return @baseCmd; } +sub show_call_stack { + my ( $path, $line, $subr ); + my $max_depth = 30; + my $i = 1; + print("--- Begin stack trace ---"); + while ( (my @call_details = (caller($i++))) && ($i<$max_depth) ) { + print("$call_details[1] line $call_details[2] in function $ ++call_details[3]"); + print("--- End stack trace ---"); + } +} sub getPigCmd @@ -198,6 +463,7 @@ sub getPigCmd my @baseCmd; die "$0.$subName: null properties" if (! $properties ); +show_call_stack(); #UGLY HACK for pig sql support if ( $jarkey =~ /testsql/ ) { @@ -217,6 +483,7 @@ sub getPigCmd # This allows for testing of the pig script as installed, and for testin of # the pig script's options, including error testing. +print 'use-pig.pl?????'; $cmd = $properties->{'gridstack.root'} . "/pig/" . $properties->{'pigTestBuildName'} . "/bin/pig"; if ( ! -x "$cmd" ) { @@ -241,6 +508,7 @@ sub getPigCmd } else { $cmd="java"; +print 'not use-pig.pl?????'; # Set JAVA options # User can provide only one of
Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf?rev=1211077&view=auto ============================================================================== --- incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf (added) +++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf Tue Dec 6 20:05:37 2011 @@ -0,0 +1,161 @@ +#!/home/y/bin/perl + + # + # Do + # egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less + # to get an outline of this test conf file + # + + # Has a couple of Hive set directives: + # set hive.exec.dynamic.partition.mode=nonstrict; + # set hive.exec.dynamic.partition=true; + + +$cfg = { + 'driver' => 'Hadoop', + 'groups' => [ +# This first group should be moved to deployer ? + { + 'name' => 'Hadoop_Checkin', + 'tests' => [ + { + 'num' => 1 + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH: +\, + ,'sql' => q\select name, gpa from studenttab10k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 2 + ,'hcat_prep'=>q\drop table if exists hadoop_checkin_2; +create table hadoop_checkin_2 (name string, age int, gpa double) STORED AS TEXTFILE;\ + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2 +\, + ,'result_table' => 'hadoop_checkin_2' + ,'sql' => q\select * from studenttab10k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 3 + ,'hcat_prep'=>q\drop table if exists hadoop_checkin_3; +create table hadoop_checkin_3 (name string, cnt int) STORED AS TEXTFILE;\ + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3 +\, + ,'result_table' => 'hadoop_checkin_3' + ,'sql' => q\select age, count(*) from studenttab10k group by age;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + ], + }, # end g + { + 'name' => 'Hadoop_Read', + 'tests' => [ + { + 'num' => 1 + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH: +\, + ,'sql' => q\select * from all100k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 2 + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH: +\, + ,'sql' => q\select s, i, d from all100kjson;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 3 + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH: +\, + ,'sql' => q\select * from all100krc;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + ], + }, # end g + { + 'name' => 'Hadoop_Write', + 'tests' => [ + { + 'num' => 1 + ,'hcat_prep'=>q\ +drop table if exists hadoop_write_1; +create table hadoop_write_1( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + s string) + row format delimited + fields terminated by ':' + stored as textfile;\ + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1 +\, + ,'result_table' => 'hadoop_write_1' + ,'sql' => q\select * from all100k;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 2 + ,'hcat_prep' => q\ +drop table if exists hadoop_write_2; +create table hadoop_write_2( + s string, + i int, + d double, + m map<string, string>, + bb array<struct<a: int, b: string>>) + STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat' + INPUTDRIVER 'org.apache.hcatalog.pig.drivers.LoadFuncBasedInputDriver' OUTPUTDRIVER 'org.apache.hcatalog.pig.drivers.StoreFuncBasedOutputDriver' + TBLPROPERTIES ('hcat.pig.loader'='org.apache.pig.builtin.JsonLoader', 'hcat.pig.storer'='org.apache.pig.builtin.JsonStorage', 'hcat.pig.loader.args'= +'s:chararray, i:int, d:double, m:map[chararray], bb:{t:(a:int, b:chararray)}', 'hcat.pig.args.delimiter'=' ');\ + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2 +\, + ,'result_table' => 'hadoop_write_2' + ,'sql' => q\select s, i, d, '', '' from all100kjson;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + { + 'num' => 3 + ,'hcat_prep' => q\ +drop table if exists hadoop_write_3; +create table hadoop_write_3( + name string, + age int, + gpa double) +stored as rcfile +TBLPROPERTIES ( + 'hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver', + 'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver' +); +\, + ,'hadoop' => q\ +jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3 +\, + ,'result_table' => 'hadoop_write_3' + ,'sql' => q\select * from all100krc;\ + ,'floatpostprocess' => 1 + ,'delimiter' => ' ' + }, + ], + }, # end g + ] +}
