Author: cws Date: Thu May 10 21:50:39 2012 New Revision: 1336913 URL: http://svn.apache.org/viewvc?rev=1336913&view=rev Log: HIVE-2979. Implement INCLUDE_HADOOP_MAJOR_VERSION test macro (Zhenxiao Luo via cws)
Added: hive/trunk/ql/src/test/queries/clientnegative/archive_corrupt.q hive/trunk/ql/src/test/results/clientnegative/archive_corrupt.q.out Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java hive/trunk/ql/src/test/queries/clientpositive/archive_corrupt.q hive/trunk/ql/src/test/queries/clientpositive/combine2.q hive/trunk/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q hive/trunk/ql/src/test/queries/clientpositive/split_sample.q hive/trunk/ql/src/test/results/clientpositive/archive_corrupt.q.out hive/trunk/ql/src/test/results/clientpositive/combine2.q.out hive/trunk/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original) +++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Thu May 10 21:50:39 2012 @@ -33,6 +33,7 @@ import java.io.InputStreamReader; import java.io.PrintStream; import java.io.Serializable; import java.io.UnsupportedEncodingException; +import java.lang.UnsupportedOperationException; import java.util.ArrayList; import java.util.Arrays; import java.util.Deque; @@ -45,6 +46,7 @@ import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; @@ -276,50 +278,68 @@ public class QTestUtil { } public void addFile(File qf) throws Exception { - FileInputStream fis = new FileInputStream(qf); BufferedInputStream bis = new BufferedInputStream(fis); BufferedReader br = new BufferedReader(new InputStreamReader(bis, "UTF8")); StringBuilder qsb = new StringBuilder(); // Look for a hint to not run a test on some Hadoop versions - Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)"); - - - // Read the entire query + Pattern pattern = Pattern.compile("-- (EX|IN)CLUDE_HADOOP_MAJOR_VERSIONS\\((.*)\\)"); + boolean excludeQuery = false; + boolean includeQuery = false; + Set<String> versionSet = new HashSet<String>(); String hadoopVer = ShimLoader.getMajorVersion(); String line; + + // Read the entire query while ((line = br.readLine()) != null) { - // While we are reading the lines, detect whether this query wants to be - // excluded from running because the Hadoop version is incorrect + // Each qfile may include at most one INCLUDE or EXCLUDE directive. + // + // If a qfile contains an INCLUDE directive, and hadoopVer does + // not appear in the list of versions to include, then the qfile + // is skipped. + // + // If a qfile contains an EXCLUDE directive, and hadoopVer is + // listed in the list of versions to EXCLUDE, then the qfile is + // skipped. + // + // Otherwise, the qfile is included. Matcher matcher = pattern.matcher(line); if (matcher.find()) { - String group = matcher.group(); - int start = group.indexOf('('); - int end = group.indexOf(')'); - assert end > start; - // versions might be something like '0.17, 0.19' - String versions = group.substring(start+1, end); + if (excludeQuery || includeQuery) { + String message = "QTestUtil: qfile " + qf.getName() + + " contains more than one reference to (EX|IN)CLUDE_HADOOP_MAJOR_VERSIONS"; + throw new UnsupportedOperationException(message); + } + + String prefix = matcher.group(1); + if ("EX".equals(prefix)) { + excludeQuery = true; + } else { + includeQuery = true; + } - Set<String> excludedVersionSet = new HashSet<String>(); + String versions = matcher.group(2); for (String s : versions.split("\\,")) { s = s.trim(); - excludedVersionSet.add(s); - } - if (excludedVersionSet.contains(hadoopVer)) { - excludeQuery = true; + versionSet.add(s); } } qsb.append(line + "\n"); } qMap.put(qf.getName(), qsb.toString()); - if(excludeQuery) { - System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " + - "adding query " + qf.getName() + " to the set of tests to skip"); + + if (excludeQuery && versionSet.contains(hadoopVer)) { + System.out.println("QTestUtil: " + qf.getName() + + " EXCLUDE list contains Hadoop Version " + hadoopVer + ". Skipping..."); + qSkipSet.add(qf.getName()); + } else if (includeQuery && !versionSet.contains(hadoopVer)) { + System.out.println("QTestUtil: " + qf.getName() + + " INCLUDE list does not contain Hadoop Version " + hadoopVer + ". Skipping..."); qSkipSet.add(qf.getName()); - } + } br.close(); } Added: hive/trunk/ql/src/test/queries/clientnegative/archive_corrupt.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientnegative/archive_corrupt.q?rev=1336913&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientnegative/archive_corrupt.q (added) +++ hive/trunk/ql/src/test/queries/clientnegative/archive_corrupt.q Thu May 10 21:50:39 2012 @@ -0,0 +1,18 @@ +USE default; + +set hive.archive.enabled = true; +set hive.enforce.bucketing = true; + +drop table tstsrcpart; + +create table tstsrcpart like srcpart; + +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- The version of GzipCodec that is provided in Hadoop 0.20 silently ignores +-- file format errors. However, versions of Hadoop that include +-- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception +-- to be thrown during the LOAD step. This former behavior is tested +-- in clientpositive/archive_corrupt.q + +load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11'); + Modified: hive/trunk/ql/src/test/queries/clientpositive/archive_corrupt.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/archive_corrupt.q?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/archive_corrupt.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/archive_corrupt.q Thu May 10 21:50:39 2012 @@ -1,3 +1,5 @@ +USE default; + set hive.archive.enabled = true; set hive.enforce.bucketing = true; @@ -5,6 +7,13 @@ drop table tstsrcpart; create table tstsrcpart like srcpart; +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- The version of GzipCodec provided in Hadoop 0.20 silently ignores +-- file format errors. However, versions of Hadoop that include +-- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception +-- to be thrown during the LOAD step. This behavior is now tested in +-- clientnegative/archive_corrupt.q + load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11'); insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') @@ -16,8 +25,6 @@ select key, value from srcpart where ds= insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') select key, value from srcpart where ds='2008-04-09' and hr='12'; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - describe extended tstsrcpart partition (ds='2008-04-08', hr='11'); alter table tstsrcpart archive partition (ds='2008-04-08', hr='11'); Modified: hive/trunk/ql/src/test/queries/clientpositive/combine2.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/combine2.q?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/combine2.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/combine2.q Thu May 10 21:50:39 2012 @@ -1,3 +1,5 @@ +USE default; + set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.min.split.size=256; set mapred.min.split.size.per.node=256; @@ -8,8 +10,18 @@ set hive.exec.dynamic.partition.mode=non set mapred.cache.shared.enabled=false; set hive.merge.smallfiles.avgsize=0; + + create table combine2(key string) partitioned by (value string); +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 Modified: hive/trunk/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/sample_islocalmode_hook.q Thu May 10 21:50:39 2012 @@ -1,6 +1,4 @@ -drop table if exists sih_i_part; -drop table if exists sih_src; -drop table if exists sih_src2; +USE default; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.max.split.size=300; @@ -22,7 +20,14 @@ set hive.exec.post.hooks = org.apache.ha set mapred.job.tracker=does.notexist.com:666; set hive.exec.mode.local.auto.input.files.max=1; --- sample split, running locally limited by num tasks +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. +-- Sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent); set mapred.job.tracker=does.notexist.com:666; @@ -36,7 +41,3 @@ set mapred.job.tracker=does.notexist.com -- sample split, running locally limited by max bytes select count(1) from sih_src tablesample(1 percent); - -drop table sih_i_part; -drop table sih_src; -drop table sih_src2; Modified: hive/trunk/ql/src/test/queries/clientpositive/split_sample.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/split_sample.q?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/split_sample.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/split_sample.q Thu May 10 21:50:39 2012 @@ -1,10 +1,4 @@ -drop table ss_src1; -drop table ss_src2; -drop table ss_src3; -drop table ss_i_part; -drop table ss_t3; -drop table ss_t4; -drop table ss_t5; +USE default; set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.max.split.size=300; @@ -19,6 +13,15 @@ insert overwrite table ss_i_part partiti insert overwrite table ss_i_part partition (p='2') select key, value from src; insert overwrite table ss_i_part partition (p='3') select key, value from src; create table ss_src2 as select key, value from ss_i_part; + +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + select count(1) from ss_src2 tablesample(1 percent); -- sample first split @@ -75,12 +78,3 @@ set mapred.min.split.size.per.node=30000 set mapred.min.split.size.per.rack=300000; select count(1) from ss_src2 tablesample(1 percent); select count(1) from ss_src2 tablesample(50 percent); - - -drop table ss_src1; -drop table ss_src2; -drop table ss_src3; -drop table ss_i_part; -drop table ss_t3; -drop table ss_t4; -drop table ss_t5; Added: hive/trunk/ql/src/test/results/clientnegative/archive_corrupt.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientnegative/archive_corrupt.q.out?rev=1336913&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientnegative/archive_corrupt.q.out (added) +++ hive/trunk/ql/src/test/results/clientnegative/archive_corrupt.q.out Thu May 10 21:50:39 2012 @@ -0,0 +1,25 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: drop table tstsrcpart +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tstsrcpart +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tstsrcpart like srcpart +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tstsrcpart like srcpart +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tstsrcpart +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- The version of GzipCodec that is provided in Hadoop 0.20 silently ignores +-- file format errors. However, versions of Hadoop that include +-- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception +-- to be thrown during the LOAD step. This former behavior is tested +-- in clientpositive/archive_corrupt.q + +load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') +PREHOOK: type: LOAD +PREHOOK: Output: default@tstsrcpart +Failed with exception Wrong file format. Please check the file's format. +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask Modified: hive/trunk/ql/src/test/results/clientpositive/archive_corrupt.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/archive_corrupt.q.out?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/archive_corrupt.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/archive_corrupt.q.out Thu May 10 21:50:39 2012 @@ -1,3 +1,7 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE PREHOOK: query: drop table tstsrcpart PREHOOK: type: DROPTABLE POSTHOOK: query: drop table tstsrcpart @@ -7,10 +11,24 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tstsrcpart like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tstsrcpart -PREHOOK: query: load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- The version of GzipCodec provided in Hadoop 0.20 silently ignores +-- file format errors. However, versions of Hadoop that include +-- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception +-- to be thrown during the LOAD step. This behavior is now tested in +-- clientnegative/archive_corrupt.q + +load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') PREHOOK: type: LOAD PREHOOK: Output: default@tstsrcpart -POSTHOOK: query: load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- The version of GzipCodec provided in Hadoop 0.20 silently ignores +-- file format errors. However, versions of Hadoop that include +-- HADOOP-6835 (e.g. 0.23 and 1.x) cause a Wrong File Format exception +-- to be thrown during the LOAD step. This behavior is now tested in +-- clientnegative/archive_corrupt.q + +load data local inpath '../data/files/archive_corrupt.rc' overwrite into table tstsrcpart partition (ds='2008-04-08', hr='11') POSTHOOK: type: LOAD POSTHOOK: Output: default@tstsrcpart POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 @@ -56,13 +74,9 @@ POSTHOOK: Lineage: tstsrcpart PARTITION( POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11') +PREHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') PREHOOK: type: DESCTABLE -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - -describe extended tstsrcpart partition (ds='2008-04-08', hr='11') +POSTHOOK: query: describe extended tstsrcpart partition (ds='2008-04-08', hr='11') POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] Modified: hive/trunk/ql/src/test/results/clientpositive/combine2.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/combine2.q.out?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/combine2.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/combine2.q.out Thu May 10 21:50:39 2012 @@ -1,9 +1,21 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE PREHOOK: query: create table combine2(key string) partitioned by (value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@combine2 -PREHOOK: query: insert overwrite table combine2 partition(value) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all @@ -13,7 +25,15 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: insert overwrite table combine2 partition(value) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all Modified: hive/trunk/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out Thu May 10 21:50:39 2012 @@ -1,15 +1,7 @@ -PREHOOK: query: drop table if exists sih_i_part -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists sih_i_part -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table if exists sih_src -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists sih_src -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table if exists sih_src2 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table if exists sih_src2 -POSTHOOK: type: DROPTABLE +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE PREHOOK: query: -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE @@ -83,7 +75,14 @@ POSTHOOK: Lineage: sih_i_part PARTITION( POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- sample split, running locally limited by num tasks +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. +-- Sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src @@ -102,15 +101,3 @@ PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### 1500 -PREHOOK: query: drop table sih_i_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@sih_i_part -PREHOOK: Output: default@sih_i_part -PREHOOK: query: drop table sih_src -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@sih_src -PREHOOK: Output: default@sih_src -PREHOOK: query: drop table sih_src2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@sih_src2 -PREHOOK: Output: default@sih_src2 Modified: hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out?rev=1336913&r1=1336912&r2=1336913&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/split_sample.q.out Thu May 10 21:50:39 2012 @@ -1,31 +1,7 @@ -PREHOOK: query: drop table ss_src1 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_src1 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_src2 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_src2 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_src3 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_src3 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_i_part -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_i_part -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_t3 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_t3 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_t4 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_t4 -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table ss_t5 -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table ss_t5 -POSTHOOK: type: DROPTABLE +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE PREHOOK: query: -- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE @@ -86,11 +62,27 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +select count(1) from ss_src2 tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@ss_src2 #### A masked pattern was here #### -POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +select count(1) from ss_src2 tablesample(1 percent) POSTHOOK: type: QUERY POSTHOOK: Input: default@ss_src2 #### A masked pattern was here #### @@ -4309,143 +4301,3 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 1000 -PREHOOK: query: drop table ss_src1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_src1 -PREHOOK: Output: default@ss_src1 -POSTHOOK: query: drop table ss_src1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_src1 -POSTHOOK: Output: default@ss_src1 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_src2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_src2 -PREHOOK: Output: default@ss_src2 -POSTHOOK: query: drop table ss_src2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_src2 -POSTHOOK: Output: default@ss_src2 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_src3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_src3 -PREHOOK: Output: default@ss_src3 -POSTHOOK: query: drop table ss_src3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_src3 -POSTHOOK: Output: default@ss_src3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_i_part -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_i_part -PREHOOK: Output: default@ss_i_part -POSTHOOK: query: drop table ss_i_part -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_i_part -POSTHOOK: Output: default@ss_i_part -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_t3 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_t3 -PREHOOK: Output: default@ss_t3 -POSTHOOK: query: drop table ss_t3 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_t3 -POSTHOOK: Output: default@ss_t3 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_t4 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_t4 -PREHOOK: Output: default@ss_t4 -POSTHOOK: query: drop table ss_t4 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_t4 -POSTHOOK: Output: default@ss_t4 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: drop table ss_t5 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@ss_t5 -PREHOOK: Output: default@ss_t5 -POSTHOOK: query: drop table ss_t5 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@ss_t5 -POSTHOOK: Output: default@ss_t5 -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]