HIVE-14913 : Add new unit tests (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bbfc4d9d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bbfc4d9d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bbfc4d9d Branch: refs/heads/master Commit: bbfc4d9d25f6e28a490f410294022c8fa5f1b389 Parents: b303fca Author: Vineet Garg <[email protected]> Authored: Thu Oct 20 09:01:08 2016 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Thu Oct 20 09:01:08 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../queries/clientpositive/acid_table_stats.q | 1 + ql/src/test/queries/clientpositive/cte_1.q | 32 +- ql/src/test/queries/clientpositive/cte_2.q | 5 + .../clientpositive/current_date_timestamp.q | 102 + .../queries/clientpositive/join_acid_non_acid.q | 10 + .../test/queries/clientpositive/lvj_mapjoin.q | 9 + .../test/queries/clientpositive/orc_ppd_basic.q | 48 + .../schema_evol_orc_acid_part_update.q | 42 + .../queries/clientpositive/vectorization_0.q | 67 + .../clientpositive/acid_table_stats.q.out | 13 + ql/src/test/results/clientpositive/cte_1.q.out | 18948 +++++++++++ ql/src/test/results/clientpositive/cte_2.q.out | 28 + .../clientpositive/current_date_timestamp.q.out | 350 + .../results/clientpositive/llap/cte_1.q.out | 18948 +++++++++++ .../results/clientpositive/llap/cte_2.q.out | 28 + .../llap/join_acid_non_acid.q.out | 16 + .../clientpositive/llap/lvj_mapjoin.q.out | 47 + .../clientpositive/llap/orc_ppd_basic.q.out | 220 + .../llap/schema_evol_orc_acid_part_update.q.out | 1148 + .../clientpositive/llap/vectorization_0.q.out | 29603 ++++++++++++++++ .../clientpositive/spark/vectorization_0.q.out | 29599 ++++++++++++++++ .../clientpositive/vectorization_0.q.out | 29871 +++++++++++++++++ 23 files changed, 129135 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 8868631..09833ff 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -468,6 +468,7 @@ minillaplocal.query.files=acid_globallimit.q,\ cbo_rp_windowing_2.q,\ cbo_subq_not_in.q,\ constprog_dpp.q,\ + current_date_timestamp.q,\ correlationoptimizer1.q,\ correlationoptimizer2.q,\ correlationoptimizer4.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/acid_table_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/acid_table_stats.q b/ql/src/test/queries/clientpositive/acid_table_stats.q index 23d0df4..9a3d97e 100644 --- a/ql/src/test/queries/clientpositive/acid_table_stats.q +++ b/ql/src/test/queries/clientpositive/acid_table_stats.q @@ -64,6 +64,7 @@ desc formatted acid partition(ds='2008-04-08'); set hive.compute.query.using.stats=false; select count(*) from acid where ds='2008-04-08'; +select count(key) from acid; set hive.compute.query.using.stats=true; explain select count(*) from acid where ds='2008-04-08'; http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/cte_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cte_1.q b/ql/src/test/queries/clientpositive/cte_1.q index ca4132f..2956339 100644 --- a/ql/src/test/queries/clientpositive/cte_1.q +++ b/ql/src/test/queries/clientpositive/cte_1.q @@ -25,4 +25,34 @@ select * from (select key from q1) a; with q1 as ( select key from q2 where key = '5'), q2 as ( select key from src where key = '5') -select * from (select key from q1) a; \ No newline at end of file +select * from (select key from q1) a; + +with q1 as (select * from alltypesorc) + select s1.key, s1.value + from src s1 + where key > 3 + and s1.value in (select q1.cstring1 + from q1 + where cint > 900); + +with q1 as (select * from src) + select key, value, + max(value) over (partition by key) + from q1; + +with q1 as (select * from alltypesorc) + from q1 + select cint, cstring1, avg(csmallint) + group by cint, cstring1 with rollup; + +drop table if exists cte9_t1; +create table cte9_t1 as + with q1 as (select cint, cstring1 from alltypesorc where cint > 70) + select * from q1; + +drop table if exists cte10_t1; +create table cte10_t1 as + with q1 as (select cint, cstring1 from alltypesorc where cint > 70) + select * from q1; +with q1 as (select cint , cstring1 from alltypesorc where age < 50) + select * from cte10_t1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/cte_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cte_2.q b/ql/src/test/queries/clientpositive/cte_2.q index b49620a..d226116 100644 --- a/ql/src/test/queries/clientpositive/cte_2.q +++ b/ql/src/test/queries/clientpositive/cte_2.q @@ -53,4 +53,9 @@ select * from v1 drop view v1; +drop view if exists view_3; +create view view_3 as select cstring2, AVG(cint) from alltypesorc group by cstring2 limit 10; + +drop view if exists view_4; +create view view_4 as select s.cstring1, v.ctimestamp1 from alltypesorc s join alltypesorc v on (s.cstring2= v.cstring1); http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/current_date_timestamp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/current_date_timestamp.q b/ql/src/test/queries/clientpositive/current_date_timestamp.q index 9bed885..a1157ce 100644 --- a/ql/src/test/queries/clientpositive/current_date_timestamp.q +++ b/ql/src/test/queries/clientpositive/current_date_timestamp.q @@ -1,4 +1,106 @@ select current_timestamp = current_timestamp(), current_date = current_date() from src limit 5; set hive.test.currenttimestamp =2012-01-01 01:02:03; + +--ensure that timestamp is same for all the rows while using current_timestamp() query should return single row +select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq; + +select count(*) from (select current_timestamp() from alltypesorc + union + select current_timestamp() from src + limit 5 ) subqr; + +--current_timestamp() should appear as expression +explain extended select current_timestamp() from alltypesorc; + +--current_timestamp() + insert +create temporary table tmp_runtimeconstant( + ts1 timestamp, + ts2 timestamp, + dt date, + s string, + v varchar(50), + c char(50) + ); +insert into table tmp_runtimeconstant + select current_timestamp(), + cast(current_timestamp() as timestamp), + cast(current_timestamp() as date), + cast(current_timestamp() as string), + cast(current_timestamp() as varchar(50)), + cast(current_timestamp() as char(50)) + from alltypesorc limit 5; +select ts1 = ts2, + to_date(ts2) = dt, + s = v, + v = c +from tmp_runtimeconstant; + +--current_date() + insert +drop table if exists tmp_runtimeconstant; +create temporary table tmp_runtimeconstant(d date, t timestamp); +insert into table tmp_runtimeconstant + select current_date(), current_timestamp() from alltypesorc limit 5; +select to_date(t)=d from tmp_runtimeconstant; + +--current_timestamp() + current_date() + where +drop table if exists tmp_runtimeconstant; +create temporary table tmp_runtimeconstant(t timestamp, d date); +insert into table tmp_runtimeconstant + select current_timestamp(), current_date() from alltypesorc limit 5; +select count(*) from tmp_runtimeconstant + where current_timestamp() >= t + and current_date <> d; + + +--current_timestamp() as argument for unix_timestamp(), hour(), minute(), second() +select unix_timestamp(current_timestamp()), + hour(current_timestamp()), + minute(current_timestamp()), + second(current_timestamp()) + from alltypesorc limit 5; + +--current_timestamp() as argument for various date udfs +select to_date(current_timestamp()), + year(current_timestamp()), + month(current_timestamp()), + day(current_timestamp()), + weekofyear(current_timestamp()), + datediff(current_timestamp(),current_timestamp), + to_date(date_add(current_timestamp(), 31)), + to_date(date_sub(current_timestamp(), 31)), + last_day(current_timestamp()), + next_day(current_timestamp(),'FRIDAY') + from alltypesorc limit 5; + +--current_date() as argument for various date udfs +select to_date(current_date()), + year(current_date()), + month(current_date()), + day(current_date()), + weekofyear(current_date()), + datediff(current_date(),current_date), + to_date(date_add(current_date(), 31)), + to_date(date_sub(current_date(), 31)), + last_day(current_date()), + next_day(current_date(),'FRIDAY') + from alltypesorc limit 5; + +select current_timestamp() - current_timestamp(), + current_timestamp() - current_date(), + current_date() - current_timestamp(), + current_date() - current_date() + from alltypesorc limit 1; + +select ctimestamp1 - current_date(), + ctimestamp1- ctimestamp2, + current_date() - current_date(), + current_date() - ctimestamp2 +from alltypesorc +where ctimestamp1 is not null + and ctimestamp2 is not null + limit 5; select current_date, current_timestamp from src limit 5; + +set hive.support.quoted.identifiers=none; +select `[kv]+.+` from srcpart order by key; http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/join_acid_non_acid.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/join_acid_non_acid.q b/ql/src/test/queries/clientpositive/join_acid_non_acid.q index 866b466..53b4b9e 100644 --- a/ql/src/test/queries/clientpositive/join_acid_non_acid.q +++ b/ql/src/test/queries/clientpositive/join_acid_non_acid.q @@ -22,3 +22,13 @@ SET hive.doing.acid=false; SELECT t1.*, t2.* FROM orc_table t1 JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1; + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +set hive.auto.convert.join=false; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +select t1.k1, t1.f1 from orc_table t1 + union all +select t2.k1, t2.f1 from orc_update_table t2; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/lvj_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/lvj_mapjoin.q b/ql/src/test/queries/clientpositive/lvj_mapjoin.q index b726e2a..5fb9a23 100644 --- a/ql/src/test/queries/clientpositive/lvj_mapjoin.q +++ b/ql/src/test/queries/clientpositive/lvj_mapjoin.q @@ -36,3 +36,12 @@ select sub1.aid, sub1.avalue, sub2.bvalue from sub1,sub2 where sub1.aid=sub2.bid; +create temporary table tmp_lateral_view( + arst array<struct<age:int,name:string>> + ) stored as orc; +insert into table tmp_lateral_view + select array(named_struct('age',cint,'name',cstring1)) + from alltypesorc limit 10; +select arst.name, arst.age + from tmp_lateral_view + lateral view inline(arst) arst; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/orc_ppd_basic.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_ppd_basic.q b/ql/src/test/queries/clientpositive/orc_ppd_basic.q index 43f2c85..c3ba24a 100644 --- a/ql/src/test/queries/clientpositive/orc_ppd_basic.q +++ b/ql/src/test/queries/clientpositive/orc_ppd_basic.q @@ -189,3 +189,51 @@ set hive.optimize.index.filter=false; select count(*) from orc_ppd where f=74.72; set hive.optimize.index.filter=true; select count(*) from orc_ppd where f=74.72; + + +create temporary table tmp_orcppd + stored as orc + as select ctinyint, csmallint, cint , cbigint, cfloat, cdouble, + cstring1, cstring2, ctimestamp1, ctimestamp2 + from alltypesorc limit 20; +insert into table tmp_orcppd + values(null, null, null, null, null, + null, null, null, null, null); + +drop table if exists tbl_orcppd_1_1; + +create table tbl_orcppd_1_1 as + select count(*) from tmp_orcppd + where ctimestamp1> current_timestamp() and + ctimestamp2 > current_timestamp() and + cstring1 like 'a*' and + cstring2 like 'a*'; + +drop table if exists tmp_orcppd; + +create temporary table tmp_orcppd + stored as orc + as select ctimestamp1, ctimestamp2 + from alltypesorc limit 10; +insert into table tmp_orcppd + values(null, null); + +drop table if exists tbl_orcppd_2_1; +create table tbl_orcppd_2_1 as + select count(*) from tmp_orcppd + where ctimestamp1 in (cast('2065-08-13 19:03:52' as timestamp), cast('2071-01-16 20:21:17' as timestamp), current_timestamp()); +set hive.optimize.index.filter=true; + +drop table if exists tmp_orcppd; +create temporary table tmp_orcppd + stored as orc + as select ts, da + from orc_ppd_staging ; +insert into table tmp_orcppd + values(null, null); + +drop table if exists tbl_orcppd_3_1; +create table tbl_orcppd_3_1 as + select count(*) from tmp_orcppd + group by ts, da + having ts in (select ctimestamp1 from alltypesorc limit 10); http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q index d971649..e62d21c 100644 --- a/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q +++ b/ql/src/test/queries/clientpositive/schema_evol_orc_acid_part_update.q @@ -115,3 +115,45 @@ alter table partitioned_delete_2 partition(part=2) compact 'major'; select insert_num,part,a,b,c,d from partitioned_delete_2; DROP TABLE partitioned_delete_2; + +--following tests is moved from system tests +drop table if exists missing_ddl_2; +create table missing_ddl_2(name string, age int); +insert overwrite table missing_ddl_2 select value, key from srcbucket; +alter table missing_ddl_2 add columns (gps double); + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.optimize.sort.dynamic.partition=true; + +DROP TABLE IF EXISTS all100kjson_textfile_orc; +CREATE TABLE all100kjson_textfile_orc ( + si smallint, + i int, + b bigint, + f float, + d double, + s string, + bo boolean, + ts timestamp) + PARTITIONED BY (t tinyint) + ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' + WITH SERDEPROPERTIES ('timestamp.formats'='yyyy-MM-dd\'T\'HH:mm:ss') + STORED AS TEXTFILE; + +INSERT INTO TABLE all100kjson_textfile_orc PARTITION (t) SELECT csmallint, cint, cbigint, cfloat, cdouble, cstring1, cboolean1, ctimestamp1, ctinyint FROM alltypesorc WHERE ctinyint > 0; + +ALTER TABLE all100kjson_textfile_orc + SET FILEFORMAT + INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' + SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; + +INSERT INTO TABLE all100kjson_textfile_orc PARTITION (t) SELECT csmallint, cint, cbigint, cfloat, cdouble, cstring1, cboolean1, ctimestamp1, ctinyint FROM alltypesorc WHERE ctinyint < 1 and ctinyint > -50 ; + +-- HIVE-11977: Hive should handle an external avro table with zero length files present +DROP TABLE IF EXISTS emptyavro; +CREATE TABLE emptyavro (i int) + PARTITIONED BY (s string) + STORED AS AVRO; +load data local inpath '../../data/files/empty1.txt' into table emptyavro PARTITION (s='something'); +SELECT COUNT(*) from emptyavro; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/queries/clientpositive/vectorization_0.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorization_0.q b/ql/src/test/queries/clientpositive/vectorization_0.q index caa6a6a..f4cf8c4 100644 --- a/ql/src/test/queries/clientpositive/vectorization_0.q +++ b/ql/src/test/queries/clientpositive/vectorization_0.q @@ -205,3 +205,70 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))); +EXPLAIN extended +select count(*) from alltypesorc + where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or + ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))); + +select count(*) from alltypesorc + where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or + ((length(cstring1) < 50 ) and ((cstring1 like '%n') and (length(cstring1) > 0)))); + +set hive.vectorized.execution.enabled=true; +set hive.compute.query.using.stats=false; + +select min(ctinyint), max(ctinyint), sum(ctinyint), avg(ctinyint) from alltypesorc; +select min(csmallint), max(csmallint), sum(csmallint), avg(csmallint) from alltypesorc; +select min(cint), max(cint), sum(cint), avg(cint) from alltypesorc; +select min(cbigint), max(cbigint), sum(cbigint), avg(cbigint) from alltypesorc; +select min(cdouble), max(cdouble), sum(cdouble), avg(cdouble) from alltypesorc; +select distinct cstring1 from alltypesorc; +select distinct cstring1, ctinyint from alltypesorc; +select cstring1, max(cbigint) from alltypesorc + group by cstring1 + order by cstring1 desc; + +set hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.reduce.groupby.enabled=true; +select cstring1, cint, ctinyint from alltypesorc + where cstring1 > 'religion'; +select cstring1, cint, ctinyint from alltypesorc where cstring1 <> 'religion'; + +select ctinyint, csmallint, cint, cbigint, cdouble, cdouble, cstring1 from alltypesorc + where ctinyint > 0 and csmallint > 0 and cint > 0 and cbigint > 0 and + cfloat > 0.0 and cdouble > 0.0 and cstring1 > 'm'; + +set hive.optimize.point.lookup=false; +--test to make sure multi and/or expressions are being vectorized +explain extended select * from alltypesorc where + (cint=49 and cfloat=3.5) or + (cint=47 and cfloat=2.09) or + (cint=45 and cfloat=3.02); + +set hive.optimize.point.lookup=true; +set hive.optimize.point.lookup.min=1; + +explain extended select * from alltypesorc where + (cint=49 and cfloat=3.5) or + (cint=47 and cfloat=2.09) or + (cint=45 and cfloat=3.02); + +explain extended select * from alltypesorc where + (cint=49 or cfloat=3.5) and + (cint=47 or cfloat=2.09) and + (cint=45 or cfloat=3.02); + +explain extended select count(*),cstring1 from alltypesorc where cstring1='biology' + or cstring1='history' + or cstring1='topology' group by cstring1 order by cstring1; + + +drop table if exists cast_string_to_int_1; +drop table if exists cast_string_to_int_2; + +create table cast_string_to_int_1 as select CAST(CAST(key as float) as string),value from srcbucket; +create table cast_string_to_int_2(i int,s string); +insert overwrite table cast_string_to_int_2 select * from cast_string_to_int_1; + +--moving ALL_1 system test here +select all key from src; http://git-wip-us.apache.org/repos/asf/hive/blob/bbfc4d9d/ql/src/test/results/clientpositive/acid_table_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/acid_table_stats.q.out b/ql/src/test/results/clientpositive/acid_table_stats.q.out index d5c509c..58d391d 100644 --- a/ql/src/test/results/clientpositive/acid_table_stats.q.out +++ b/ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -686,6 +686,19 @@ POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 #### A masked pattern was here #### 1000 +PREHOOK: query: select count(key) from acid +PREHOOK: type: QUERY +PREHOOK: Input: default@acid +PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(key) from acid +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid +POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid@ds=2008-04-09 +#### A masked pattern was here #### +2000 PREHOOK: query: explain select count(*) from acid where ds='2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) from acid where ds='2008-04-08'
