Modified: pig/branches/spark/test/e2e/pig/tests/multiquery.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/multiquery.conf?rev=1654955&r1=1654954&r2=1654955&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tests/multiquery.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/multiquery.conf Tue Jan 27 02:27:45 2015 @@ -20,29 +20,39 @@ #################################################################### # SUB: Multiquery # Please include a brief description here. +# MultiQuery_MapSplitee # - _TEST_ The first example; one that is defined in the bug with one split # - in the map phase # - _TEST_ Multiple side files, all in map phase. # - _TEST_ Two loads and two stores in map phase. # - _TEST_ One split added in reduce phase and map-only splitee. +# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException +# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException +# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException +# MultiQuery_MapReduceSplitee # - _TEST_ One split added in reduce phase and one map-reduce splitee # - _TEST_ One split in reduce phase and two Map-Reduce splitees. # - _TEST_ Two loads and two stores in reduce phase -# - _TEST_ Explicit split with two side files. -# - _TEST_ Explicit split with order by and two side files. # - _TEST_ Implicit split with multiple side files. -# - _TEST_ Streaming with multiple stores. # - _TEST_ Script with intermediate stores. # - _TEST_ Implicit split with order by and multiple side files. # - _TEST_ Self join using fragment replicate join with multiple side files. -# - _TEST_ PigMix Test Case L12. # - _TEST_ One split in map phase and two Map-Reduce splitees with mixed combiners. # - _TEST_ One split in map phase and two Map-Reduce splitees without combiners. +# - _TEST_ Pig-983: multi-query optimization on multiple group bys following a join or cogroup +# MultiQuery_ExplicitSplit +# - _TEST_ Explicit split with two side files. +# - _TEST_ Explicit split with order by and two side files. # - _TEST_ Splittees with different map key types and nested splits. # - _TEST_ Splittees with different map key type. +# - _TEST_ PigMix Test Case L12. +# - _TEST_ PigMix Test Case L12 version 2 +# - _TEST_ PigMix Test Case L12 version 3 (modified to have different map key types in inner split) +# MultiQuery_Streaming +# - _TEST_ Streaming with multiple stores. # - _TEST_ Streaming in demux. # - _TEST_ Streaming in nested demux. -# - _TEST_ PigMix Test Case L12 version 2 + $cfg = { 'driver' => 'Pig', @@ -50,7 +60,7 @@ $cfg = { 'groups' => [ { - 'name' => 'MultiQuery', + 'name' => 'MultiQuery_MapSplitee', 'floatpostprocess' => 1, 'delimiter' => ' ', 'tests' => [ @@ -111,9 +121,56 @@ $cfg = { 'sql' => "select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age; select age, avg(gpa) from studenttab10k where gpa < 3.0 group by age having avg(gpa) > 1.5;", }, + # Pig-976: Multi-query optimization throws ClassCastException { - # One split added in reduce phase and one map-reduce splitee 'num' => 5, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); + b = group a by name; + c = group a by age; + d = foreach b generate MAX(a.age); + e = foreach c generate group, SUM(a.gpa); + store d into ':OUTPATH:.1'; + store e into ':OUTPATH:.2'; #, + 'sql' => "select max(age) from studenttab10k group by name; + select age, sum(gpa) from studenttab10k group by age;", + }, + # Pig-976: Multi-query optimization throws ClassCastException + { + 'num' => 6, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); + b = group a all; + c = group a by age; + d = foreach b generate COUNT(a), MAX(a.age); + e = foreach c generate group, SUM(a.gpa); + store d into ':OUTPATH:.1'; + store e into ':OUTPATH:.2'; #, + 'sql' => "select count(*), max(age) from studenttab10k; + select age, sum(gpa) from studenttab10k group by age;", + }, + # Pig-976: Multi-query optimization throws ClassCastException + { + 'num' => 7, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); + b = group a by name; + c = group a by age; + d = foreach b generate MAX(a.age), group; + e = foreach c generate group, SUM(a.gpa); + store d into ':OUTPATH:.1'; + store e into ':OUTPATH:.2'; #, + 'sql' => "select max(age), name from studenttab10k group by name; + select age, sum(gpa) from studenttab10k group by age;", + }, + ] + }, + + { + 'name' => 'MultiQuery_MapReduceSplitee', + 'floatpostprocess' => 1, + 'delimiter' => ' ', + 'tests' => [ + { + # One split added in reduce phase and one map-reduce splitee + 'num' => 1, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); b = filter a by gpa < 3.0; c = group b by age; @@ -129,7 +186,7 @@ $cfg = { }, { # One split in reduce phase and two Map-Reduce splitees. - 'num' => 6, + 'num' => 2, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); b = filter a by gpa < 3.0; c = group b by age; @@ -149,7 +206,7 @@ $cfg = { }, { # Two loads and two stores in reduce phase - 'num' => 7, + 'num' => 3, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = filter a by age < 20; @@ -170,44 +227,8 @@ $cfg = { where a.age < 20 and b.age < 20 and a.gpa < 3.0;", }, { - # Explicit split with two side files. - 'num'=> 8, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); - split a into a1 if name > 'm', a2 if name <= 'm'; - store a1 into ':OUTPATH:.1'; - store a2 into ':OUTPATH:.2'; - b = cogroup a1 by age, a2 by age; - c = foreach b generate flatten(a1), flatten(a2); - store c into ':OUTPATH:.3'; #, - 'sql' => "select name, age, gpa from studenttab10k where name > 'm'; - select name, age, gpa from studenttab10k where name <= 'm'; - select A.name, A.age, A.gpa, B.name, B.age, B.gpa - from (select * from studenttab10k where name > 'm') as A - join (select * from studenttab10k where name <= 'm') as B using (age);", - }, - { - # Explicit split with order by and two side files. - 'num'=> 9, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); - split a into a1 if age > 50, a2 if name < 'm'; - b2 = distinct a2; - b1 = order a1 by name; - store b2 into ':OUTPATH:.2'; - store b1 into ':OUTPATH:.1'; - c = cogroup b2 by name, b1 by name; - d = foreach c generate flatten(group), COUNT($1), COUNT($2); - store d into ':OUTPATH:.3'; #, - 'sql' => "select name, age, gpa from studenttab10k where age > 50 order by name; - select distinct name, age, gpa from studenttab10k where name < 'm'; - select name, count(A.name), count(B.name) - from (select distinct name from studenttab10k where name < 'm') as A - join (select name from studenttab10k where age > 50) as B using (name) group by name;", - 'verify_with_pig' => 1, - 'verify_pig_version' => 'old', - }, - { # Implicit split with multiple side files. - 'num'=> 10, + 'num'=> 4, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); b = filter a by age > 50; c = filter a by gpa > 3.0; @@ -226,31 +247,8 @@ $cfg = { join (select * from studenttab10k where gpa > 3.0) as B using (name) where A.age < 75;", }, { - # Streaming with multiple stores - 'num' => 11, - 'pig' => q# define CMD1 `perl -ne 'print $_;'`; - define CMD2 `perl -ne 'print $_;'`; - A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); - B = stream A through CMD1 as (name, age, gpa); - store B into ':OUTPATH:.1'; - C = stream B through CMD2 as (name, age, gpa); - D = JOIN B by name, C by name; - store D into ':OUTPATH:.2'; #, - 'pig_win' => q# define CMD1 `perl -ne "print $_;"`; - define CMD2 `perl -ne "print $_;"`; - A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); - B = stream A through CMD1 as (name, age, gpa); - store B into ':OUTPATH:.1'; - C = stream B through CMD2 as (name, age, gpa); - D = JOIN B by name, C by name; - store D into ':OUTPATH:.2'; #, - 'sql' => "select name, age, gpa from studenttab10k; - select A.name, A.age, A.gpa, B.name, B.age, B.gpa - from studenttab10k as A join studenttab10k as B using(name);", - }, - { # With intermediate store - 'num' => 12, + 'num' => 5, 'pig' => q# A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:.1'; B = load ':OUTPATH:.1'; @@ -260,7 +258,7 @@ $cfg = { }, { # Implicit split with order by and multiple side files. - 'num'=>13, + 'num'=> 6, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); b = filter a by age > 50; c = filter a by gpa > 3.0; @@ -283,7 +281,7 @@ $cfg = { }, # Self join using fragment replicate join with multiple side files { - 'num' => 14, + 'num' => 7, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double); b = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double); c = filter a by age > 50; @@ -298,29 +296,9 @@ $cfg = { from studenttab10k as a join studenttab10k as b using(gpa) where a.age > 50 and b.gpa > 3.0;", }, - # PigMix Test Case L12 - { - 'num' => 15, - 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); - b = foreach a generate name, age, contributions; - split b into c1 if age > 50, c2 if age <= 50; - split c1 into d1 if name < 'm', d2 if name >= 'm'; - e = group c2 by name; - e1 = foreach e generate group, SUM(c2.contributions); - store e1 into ':OUTPATH:.1'; - f = group d1 by name; - f1 = foreach f generate group, MAX(d1.contributions); - store f1 into ':OUTPATH:.2'; - g = group d2 by name; - g1 = foreach g generate group, COUNT(d2); - store g1 into ':OUTPATH:.3'; #, - 'sql' => "select name, sum(contributions) from votertab10k where age <= 50 group by name; - select name, max(contributions) from votertab10k where (age > 50 and name < 'm') group by name; - select name, count(*) from votertab10k where (age > 50 and name >= 'm') group by name;", - }, # One split in map phase and two Map-Reduce splitees with mixed combiner. { - 'num' => 16, + 'num' => 8, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); b = filter a by gpa < 3.0; c = filter a by gpa >= 3.0; @@ -335,7 +313,7 @@ $cfg = { }, # One split in map phase and two Map-Reduce splitees without combiner. { - 'num' => 17, + 'num' => 9, 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name: chararray, age: int, gpa: float); b = filter a by gpa < 3.0; c = filter a by gpa >= 3.0; @@ -348,9 +326,68 @@ $cfg = { 'sql' => "select age, max(gpa) + min(gpa) from studenttab10k where gpa < 3.0 group by age; select age, max(gpa) - min(gpa) from studenttab10k where gpa >= 3.0 group by age;", }, + # Pig-983: multi-query optimization on multiple group bys following a join or cogroup + { + 'num' => 10, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); + b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double); + c = join a by name, b by name; + d = group c by a::age; + e = group c by b::age; + d1 = foreach d generate group, COUNT(c), MAX(c.a::gpa); + e1 = foreach e generate group, SUM(c.b::contributions); + store d1 into ':OUTPATH:.1'; + store e1 into ':OUTPATH:.2'; #, + 'sql' => "select a.age, count(*), max(a.gpa) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by a.age; + select b.age, sum(b.contributions) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by b.age;", + }, + ] + }, + + { + 'name' => 'MultiQuery_ExplicitSplit', + 'floatpostprocess' => 1, + 'delimiter' => ' ', + 'tests' => [ + { + # Explicit split with two side files. + 'num'=> 1, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); + split a into a1 if name > 'm', a2 if name <= 'm'; + store a1 into ':OUTPATH:.1'; + store a2 into ':OUTPATH:.2'; + b = cogroup a1 by age, a2 by age; + c = foreach b generate flatten(a1), flatten(a2); + store c into ':OUTPATH:.3'; #, + 'sql' => "select name, age, gpa from studenttab10k where name > 'm'; + select name, age, gpa from studenttab10k where name <= 'm'; + select A.name, A.age, A.gpa, B.name, B.age, B.gpa + from (select * from studenttab10k where name > 'm') as A + join (select * from studenttab10k where name <= 'm') as B using (age);", + }, + { + # Explicit split with order by and two side files. + 'num'=> 2, + 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); + split a into a1 if age > 50, a2 if name < 'm'; + b2 = distinct a2; + b1 = order a1 by name; + store b2 into ':OUTPATH:.2'; + store b1 into ':OUTPATH:.1'; + c = cogroup b2 by name, b1 by name; + d = foreach c generate flatten(group), COUNT($1), COUNT($2); + store d into ':OUTPATH:.3'; #, + 'sql' => "select name, age, gpa from studenttab10k where age > 50 order by name; + select distinct name, age, gpa from studenttab10k where name < 'm'; + select name, count(A.name), count(B.name) + from (select distinct name from studenttab10k where name < 'm') as A + join (select name from studenttab10k where age > 50) as B using (name) group by name;", + 'verify_with_pig' => 1, + 'verify_pig_version' => 'old', + }, # Splittees with different map key types and nested splits { - 'num' => 18, + 'num' => 3, 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name: chararray, age:int, registration, contributions:double); b = foreach a generate name, age, contributions; split b into c1 if age > 10, c2 if age <= 60; @@ -370,7 +407,7 @@ $cfg = { }, # Splittees with different map key types { - 'num' => 19, + 'num' => 4, 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name: chararray, age:int, registration, contributions:double); b = foreach a generate name, age, contributions; split b into c1 if age > 50, c2 if age <= 50; @@ -383,50 +420,29 @@ $cfg = { 'sql' => "select name, sum(contributions) from votertab10k where age <= 50 group by name; select age, max(contributions) from votertab10k where age > 50 group by age;", }, - # Streaming in demux - { - 'num' => 20, - 'execonly' => 'mapred,tez', - 'pig' => q# - define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl'); - A = load ':INPATH:/singlefile/studenttab10k'; - split A into A1 if $0 < 'm', A2 if $0 >= 'm'; - B = group A1 by $0; - C = foreach B generate flatten(A1); - D = stream C through CMD; - store D into ':OUTPATH:.1'; - E = group A2 by $0; - F = foreach E generate group, COUNT(A2); - store F into ':OUTPATH:.2';#, - 'sql' => "select name, count(*) from studenttab10k where name < 'm' group by name; - select name, count(*) from studenttab10k where name >= 'm' group by name;", - }, - # Streaming in nested demux + # PigMix Test Case L12 { - 'num' => 21, - 'execonly' => 'mapred,tez', - 'pig' => q# - define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl'); - A = load ':INPATH:/singlefile/studenttab10k'; - split A into A1 if $0 < 'm', A2 if $0 >= 'm'; - split A1 into A3 if $1 < 30, A4 if $1 >= 30; - B = group A3 by $0; - C = foreach B generate flatten(A3); - D = stream C through CMD; - store D into ':OUTPATH:.1'; - E = group A2 by $0; - F = foreach E generate group, COUNT(A2); - store F into ':OUTPATH:.2'; - G = group A4 by $0; - H = foreach G generate group, COUNT(A4); - store H into ':OUTPATH:.3';#, - 'sql' => "select name, count(*) from studenttab10k where name < 'm' and age < 30 group by name; - select name, count(*) from studenttab10k where name >= 'm' group by name; - select name, count(*) from studenttab10k where name < 'm' and age >= 30 group by name;", + 'num' => 5, + 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); + b = foreach a generate name, age, contributions; + split b into c1 if age > 50, c2 if age <= 50; + split c1 into d1 if name < 'm', d2 if name >= 'm'; + e = group c2 by name; + e1 = foreach e generate group, SUM(c2.contributions); + store e1 into ':OUTPATH:.1'; + f = group d1 by name; + f1 = foreach f generate group, MAX(d1.contributions); + store f1 into ':OUTPATH:.2'; + g = group d2 by name; + g1 = foreach g generate group, COUNT(d2); + store g1 into ':OUTPATH:.3'; #, + 'sql' => "select name, sum(contributions) from votertab10k where age <= 50 group by name; + select name, max(contributions) from votertab10k where (age > 50 and name < 'm') group by name; + select name, count(*) from votertab10k where (age > 50 and name >= 'm') group by name;", }, # PigMix Test Case L12 version 2 { - 'num' => 22, + 'num' => 6, 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); b = foreach a generate name, age, contributions; split b into c1 if age > 50, c2 if age <= 50; @@ -446,7 +462,7 @@ $cfg = { }, # PigMix Test Case L12 version 3 (modified to have different map key types in inner split) { - 'num' => 23, + 'num' => 7, 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); b = foreach a generate name, age, contributions; split b into c1 if age > 50, c2 if age <= 50; @@ -464,63 +480,81 @@ $cfg = { select name, age, count(*) from votertab10k where (age > 50 and name >= 'm') group by name, age; select name, age, sum(contributions) from votertab10k where age <= 50 group by name, age;", }, - # Pig-976: Multi-query optimization throws ClassCastException - { - 'num' => 24, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); - b = group a by name; - c = group a by age; - d = foreach b generate MAX(a.age); - e = foreach c generate group, SUM(a.gpa); - store d into ':OUTPATH:.1'; - store e into ':OUTPATH:.2'; #, - 'sql' => "select max(age) from studenttab10k group by name; - select age, sum(gpa) from studenttab10k group by age;", - }, - # Pig-976: Multi-query optimization throws ClassCastException + ] + }, + + { + 'name' => 'MultiQuery_Streaming', + 'floatpostprocess' => 1, + 'delimiter' => ' ', + 'tests' => [ { - 'num' => 25, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); - b = group a all; - c = group a by age; - d = foreach b generate COUNT(a), MAX(a.age); - e = foreach c generate group, SUM(a.gpa); - store d into ':OUTPATH:.1'; - store e into ':OUTPATH:.2'; #, - 'sql' => "select count(*), max(age) from studenttab10k; - select age, sum(gpa) from studenttab10k group by age;", + # Streaming with multiple stores + 'num' => 1, + 'pig' => q# define CMD1 `perl -ne 'print $_;'`; + define CMD2 `perl -ne 'print $_;'`; + A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); + B = stream A through CMD1 as (name, age, gpa); + store B into ':OUTPATH:.1'; + C = stream B through CMD2 as (name, age, gpa); + D = JOIN B by name, C by name; + store D into ':OUTPATH:.2'; #, + 'pig_win' => q# define CMD1 `perl -ne "print $_;"`; + define CMD2 `perl -ne "print $_;"`; + A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); + B = stream A through CMD1 as (name, age, gpa); + store B into ':OUTPATH:.1'; + C = stream B through CMD2 as (name, age, gpa); + D = JOIN B by name, C by name; + store D into ':OUTPATH:.2'; #, + 'sql' => "select name, age, gpa from studenttab10k; + select A.name, A.age, A.gpa, B.name, B.age, B.gpa + from studenttab10k as A join studenttab10k as B using(name);", }, - # Pig-983: multi-query optimization on multiple group bys following a join or cogroup + # Streaming in demux { - 'num' => 26, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); - b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration, contributions:double); - c = join a by name, b by name; - d = group c by a::age; - e = group c by b::age; - d1 = foreach d generate group, COUNT(c), MAX(c.a::gpa); - e1 = foreach e generate group, SUM(c.b::contributions); - store d1 into ':OUTPATH:.1'; - store e1 into ':OUTPATH:.2'; #, - 'sql' => "select a.age, count(*), max(a.gpa) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by a.age; - select b.age, sum(b.contributions) from studenttab10k as a inner join votertab10k as b on (a.name = b.name) group by b.age;", + 'num' => 2, + 'execonly' => 'mapred,tez', + 'pig' => q# + define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl'); + A = load ':INPATH:/singlefile/studenttab10k'; + split A into A1 if $0 < 'm', A2 if $0 >= 'm'; + B = group A1 by $0; + C = foreach B generate flatten(A1); + D = stream C through CMD; + store D into ':OUTPATH:.1'; + E = group A2 by $0; + F = foreach E generate group, COUNT(A2); + store F into ':OUTPATH:.2';#, + 'sql' => "select name, count(*) from studenttab10k where name < 'm' group by name; + select name, count(*) from studenttab10k where name >= 'm' group by name;", }, - # Pig-976: Multi-query optimization throws ClassCastException + # Streaming in nested demux { - 'num' => 27, - 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); - b = group a by name; - c = group a by age; - d = foreach b generate MAX(a.age), group; - e = foreach c generate group, SUM(a.gpa); - store d into ':OUTPATH:.1'; - store e into ':OUTPATH:.2'; #, - 'sql' => "select max(age), name from studenttab10k group by name; - select age, sum(gpa) from studenttab10k group by age;", + 'num' => 3, + 'execonly' => 'mapred,tez', + 'pig' => q# + define CMD `perl GroupBy.pl '\t' 0` ship(':SCRIPTHOMEPATH:/GroupBy.pl'); + A = load ':INPATH:/singlefile/studenttab10k'; + split A into A1 if $0 < 'm', A2 if $0 >= 'm'; + split A1 into A3 if $1 < 30, A4 if $1 >= 30; + B = group A3 by $0; + C = foreach B generate flatten(A3); + D = stream C through CMD; + store D into ':OUTPATH:.1'; + E = group A2 by $0; + F = foreach E generate group, COUNT(A2); + store F into ':OUTPATH:.2'; + G = group A4 by $0; + H = foreach G generate group, COUNT(A4); + store H into ':OUTPATH:.3';#, + 'sql' => "select name, count(*) from studenttab10k where name < 'm' and age < 30 group by name; + select name, count(*) from studenttab10k where name >= 'm' group by name; + select name, count(*) from studenttab10k where name < 'm' and age >= 30 group by name;", }, - ] # end of tests + ] # end of tests }, + ] # end of groups } -; - +; \ No newline at end of file
Modified: pig/branches/spark/test/e2e/pig/tests/nightly.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/nightly.conf?rev=1654955&r1=1654954&r2=1654955&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tests/nightly.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/nightly.conf Tue Jan 27 02:27:45 2015 @@ -1785,7 +1785,7 @@ store b into ':OUTPATH:';\, }, { - 'name' => 'Types', + 'name' => 'Types_Constants', 'tests' => [ { # constants @@ -1794,43 +1794,87 @@ store b into ':OUTPATH:';\, b = foreach a generate age + 1 + 0.2f + 253645L, gpa+1; store b into ':OUTPATH:';\, }, - { - # NULL and cast - 'num' => 2, - 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); + { + # constants + 'num' => 2, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); +b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1); +store b into ':OUTPATH:';\, + }, + { + # test precision for doubles is atleast 15 digits + 'num' => 3, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); +b = foreach a generate 0.123456789123456+0.123456789123456; +store b into ':OUTPATH:';\, + }, + ] + }, + + { + 'name' => 'Types_Cast', + 'tests' => [ + { + # NULL and cast + 'num' => 1, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int; c = foreach b generate (norm_gpa is null? 0 :norm_gpa); store c into ':OUTPATH:';\, - # 'expected_err_regex' => "Encountered Warning DIVIDE_BY_ZERO 2387 time.*", - # Driver does currently not support both 'sql' and 'expected_...' verification directives. - }, + # 'expected_err_regex' => "Encountered Warning DIVIDE_BY_ZERO 2387 time.*", + # Driver does currently not support both 'sql' and 'expected_...' verification directives. + }, + { + # Not NULL and cast + 'num' => 2, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); +b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int; +c = foreach b generate (norm_gpa is not null? norm_gpa: 0); +store c into ':OUTPATH:';\, + }, + # boolean cast + { + 'num' => 3, + 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean); +b = foreach a generate instate, true, false; +store b into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray); +b = foreach a generate instate, 'true', 'false'; +store b into ':OUTPATH:';\, + }, + ] + }, + + { + 'name' => 'Types_ArithmeticCast', + 'tests' => [ { # arithmetic operators and SIZE for int, double and size and concat operators for chararrays - 'num' => 3, + 'num' => 1, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = foreach a generate age, gpa, age % 25, age + 25, age - 25, age/2, age * 2, SIZE(age), gpa + 10.1, gpa - 1.1 , gpa / 1.2, gpa * 2.5, SIZE(gpa), SIZE(name), CONCAT(name, 'test'); store b into ':OUTPATH:';\, }, { # arithmetic operators and SIZE for long, float and size and concat operators for bytearrays - 'num' => 4, + 'num' => 2, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = foreach a generate age, gpa, age % 2L, age + 2500000000L, age - 2500000000L, age/2L, age * 250000000L, SIZE(age), gpa + 10.1f, gpa - 1.1f , gpa / 1.2f, gpa * 2.6f, SIZE(gpa), SIZE(name), CONCAT(name, name); store b into ':OUTPATH:';\, }, { - # equlity and implicit cast - 'num' => 5, - 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); + # equality and implicit cast + 'num' => 3, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); b = filter a by age == '25' and gpa < 3; store b into ':OUTPATH:';\, - }, + }, { # will need to test against previous version of pig # because in pig currently count includes nulls - this affects # avg - 'num' => 6, + 'num' => 4, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = group a ALL; c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age), MIN(a.name), MAX(a.name), SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa); @@ -1840,7 +1884,7 @@ store c into ':OUTPATH:';\, }, { # sum, min, max, avg for long and float (declared) - 'num' => 7, + 'num' => 5, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = group a ALL; c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age), SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa); @@ -1848,21 +1892,27 @@ store c into ':OUTPATH:';\, }, { # Explicit casts - arithmetic operators and SIZE for int, double and size and concat operators for chararrays - 'num' => 8, + 'num' => 6, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); b = foreach a generate (int)age % 25, (int)age + 25, (int)age - 25, (int)age/2, (int)age * 2, SIZE((int)age), (double)gpa + 10.1, (double)gpa - 1.1 , (double)gpa / 1.2, (double)gpa * 2.5, SIZE((double)gpa), SIZE((chararray)name), CONCAT((chararray)name, 'test'); store b into ':OUTPATH:';\, }, { # Explicit casts - arithmetic operators and SIZE for long, float - 'num' => 9, + 'num' => 7, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); b = foreach a generate (long)age, (long)age % 2L, (long)age + 2500000000L, (long)age - 2500000000L, (long)age/2L, (long)age * 250000000L, SIZE((long)age), (float)gpa + 10.1f, (float)gpa - 1.1f , (float)gpa / 1.2f, (float)gpa * 2.6f, SIZE((float)gpa); store b into ':OUTPATH:';\, }, + ] + }, + + { + 'name' => 'Types_Filter', + 'tests' => [ { # Filter is null for chararray and double and is not null for int - 'num' => 10, + 'num' => 1, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = filter a by name is null and age is not null and gpa is null; c = group b ALL; @@ -1871,7 +1921,7 @@ store d into ':OUTPATH:';\, }, { # Filter is not null for chararray and double and is null for int - 'num' => 11, + 'num' => 2, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = filter a by name is not null and age is null and gpa is not null; c = group b ALL; @@ -1880,7 +1930,7 @@ store d into ':OUTPATH:';\, }, { # Filter is null for bytearray and float and is not null for long - 'num' => 12, + 'num' => 3, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = filter a by name is null and age is not null and gpa is null; c = group b ALL; @@ -1889,55 +1939,54 @@ store d into ':OUTPATH:';\, }, { # Filter is not null for bytearray and float and is null for long - 'num' => 13, + 'num' => 4, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = filter a by name is not null and age is null and gpa is not null; c = group b ALL; d = foreach c generate COUNT(b); store d into ':OUTPATH:';\, }, + ] + }, + + { + 'name' => 'Types_Order', + 'tests' => [ { - # test that sorting is based on the type for chararray, int and double - 'num' => 14, - 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); + # test that sorting is based on the type for chararray, int and double + 'num' => 1, + 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by name, age, gpa; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'], - }, - { - # test that sorting descending is based on the type for chararray, int and double - 'num' => 15, - 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); + }, + { + # test that sorting descending is based on the type for chararray, int and double + 'num' => 2, + 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by name desc, age desc, gpa desc; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'], - }, - { - # test that sorting is based on the type for bytearray, long and float - 'num' => 16, - 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); + }, + { + # test that sorting is based on the type for bytearray, long and float + 'num' => 3, + 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = order a by name, age, gpa; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'], - }, - { - # test that sorting descending is based on the type for chararray, age and float - 'num' => 17, - 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); + }, + { + # test that sorting descending is based on the type for chararray, age and float + 'num' => 4, + 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float); b = order a by name desc, age desc, gpa desc; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'], - }, - { - # test precision for doubles is atleast 15 digits - 'num' => 18, - 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); -b = foreach a generate 0.123456789123456+0.123456789123456; -store b into ':OUTPATH:';\, - }, + }, { # order by string - 'num' => 20, + 'num' => 5, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by name; store b into ':OUTPATH:';\, @@ -1945,7 +1994,7 @@ store b into ':OUTPATH:';\, }, { # order by string desc - 'num' => 21, + 'num' => 6, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by name desc; store b into ':OUTPATH:';\, @@ -1953,7 +2002,7 @@ store b into ':OUTPATH:';\, }, { # order by int - 'num' => 22, + 'num' => 7, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by age; store b into ':OUTPATH:';\, @@ -1961,7 +2010,7 @@ store b into ':OUTPATH:';\, }, { # order by int desc - 'num' => 23, + 'num' => 8, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by age desc; store b into ':OUTPATH:';\, @@ -1969,7 +2018,7 @@ store b into ':OUTPATH:';\, }, { # order by long - 'num' => 24, + 'num' => 9, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long, gpa:double); b = order a by age; store b into ':OUTPATH:';\, @@ -1977,7 +2026,7 @@ store b into ':OUTPATH:';\, }, { # order by long desc - 'num' => 25, + 'num' => 10, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long, gpa:double); b = order a by age desc; store b into ':OUTPATH:';\, @@ -1985,7 +2034,7 @@ store b into ':OUTPATH:';\, }, { # order by float - 'num' => 26, + 'num' => 11, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float); b = order a by gpa; store b into ':OUTPATH:';\, @@ -1993,7 +2042,7 @@ store b into ':OUTPATH:';\, }, { # order by float desc - 'num' => 27, + 'num' => 12, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float); b = order a by gpa desc; store b into ':OUTPATH:';\, @@ -2001,7 +2050,7 @@ store b into ':OUTPATH:';\, }, { # order by double - 'num' => 28, + 'num' => 13, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by gpa; store b into ':OUTPATH:';\, @@ -2009,7 +2058,7 @@ store b into ':OUTPATH:';\, }, { # order by double desc - 'num' => 29, + 'num' => 14, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by gpa desc; store b into ':OUTPATH:';\, @@ -2017,7 +2066,7 @@ store b into ':OUTPATH:';\, }, { # order by * - 'num' => 30, + 'num' => 15, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by *; store b into ':OUTPATH:';\, @@ -2025,14 +2074,20 @@ store b into ':OUTPATH:';\, }, { # order by * desc - 'num' => 31, + 'num' => 16, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = order a by * desc; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'], }, + ] + }, + + { + 'name' => 'Types_CoGroup', + 'tests' => [ { - 'num' => 32, + 'num' => 1, 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double); b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:double); c = filter a by age < 20; @@ -2042,7 +2097,7 @@ f = foreach e generate flatten (c), flat store f into ':OUTPATH:';\, }, { - 'num' => 33, + 'num' => 2, 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double); b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:double); c = filter a by age < 20; @@ -2051,62 +2106,9 @@ e = cogroup c by age, d by age; f = foreach e generate flatten (c), flatten(d); store f into ':OUTPATH:';\, }, - { - 'num' => 34, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:long, gpa:double); -b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:long, registration:chararray, contributions:double); -c = filter a by age < 20; -d = filter b by age < 20; -e = cogroup c by age, d by age; -f = foreach e generate flatten (c), flatten(d); -store f into ':OUTPATH:';\, - }, - { - 'num' => 35, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:float, gpa:double); -b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:float, registration:chararray, contributions:double); -c = filter a by age < 20; -d = filter b by age < 20; -e = cogroup c by age, d by age; -f = foreach e generate flatten (c), flatten(d); -store f into ':OUTPATH:';\, - }, - { - 'num' => 36, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:double, gpa:double); -b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:double, registration:chararray, contributions:double); -c = filter a by age < 20; -d = filter b by age < 20; -e = cogroup c by age, d by age; -f = foreach e generate flatten (c), flatten(d); -store f into ':OUTPATH:';\, - }, - { - # NULL and cast - 'num' => 37, - 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); -b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int; -c = foreach b generate (norm_gpa is not null? norm_gpa: 0); -store c into ':OUTPATH:';\, - }, - { - # constants - 'num' => 38, - 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); -b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1); -store b into ':OUTPATH:';\, - }, - { - 'num' => 39, - 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean); -b = foreach a generate instate, true, false; -store b into ':OUTPATH:';\, - 'verify_pig_script' => q\a = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:chararray); -b = foreach a generate instate, 'true', 'false'; -store b into ':OUTPATH:';\, - }, ] }, + { 'name' => 'Limit', 'tests' => [ @@ -3854,12 +3856,18 @@ store b into ':OUTPATH:';\, 'pig' => q\ register ':SCRIPTHOMEPATH:/ruby/morerubyudfs.rb' using jruby as myfuncs; a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double); -b = foreach (group a all) generate FLATTEN(myfuncs.AppendIndex(a)); +b = foreach (group a all){ + a1= order a by name,age,gpa; + generate FLATTEN(myfuncs.AppendIndex(a1)); +} store b into ':OUTPATH:';\, 'verify_pig_script' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double); -b = foreach (group a all) generate FLATTEN(org.apache.pig.test.udf.evalfunc.AppendIndex(a)); +b = foreach (group a all){ + a1=order a by name,age,gpa; + generate FLATTEN(org.apache.pig.test.udf.evalfunc.AppendIndex(a1)); +} store b into ':OUTPATH:';\, }, ] @@ -3897,7 +3905,7 @@ store b into ':OUTPATH:';\, # test long and float square, plus two references to the same UDF with different schemas 'num' => 3, 'floatpostprocess' => 1, - 'delimiter' => ' ', + 'delimiter' => ' ', 'pig' => q\ register ':SCRIPTHOMEPATH:/cpython/scriptingudf.py' using streaming_python as myfuncs; a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:long, gpa:double); @@ -5403,6 +5411,111 @@ store a into ':OUTPATH:';\, 'name' => 'Rank', 'tests' => [ { + 'num' => 1, + 'execonly' => 'mapred,tez', + 'pig' => q\ + SET default_parallel 7; + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + B = rank A; + C = foreach B generate rank_A,a,b,c; + store C into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + C = foreach A generate rownumber,a,b,c; + store C into ':OUTPATH:'; + \, + }, { + 'num' =>2, + 'execonly' => 'mapred,tez', + 'pig' => q\ + SET default_parallel 9; + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + B = rank A by b DESC,a ASC; + C = foreach B generate rank_A,b,a; + store C into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + C = foreach A generate rankbdaa,b,a; + store C into ':OUTPATH:'; + \, + }, { + 'num' =>3, + 'execonly' => 'mapred,tez', + 'pig' => q\ + SET default_parallel 7; + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + B = rank A by c ASC,b DESC; + C = foreach B generate rank_A,c,b; + store C into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + C = foreach A generate rankcabd,c,b; + store C into ':OUTPATH:'; + \, + }, { + 'num' => 4, + 'execonly' => 'mapred,tez', + 'pig' => q\ + SET default_parallel 25; + A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); + B = rank A; + C = order B by rank_A; + D = foreach C generate rank_A,rownumber; + store D into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); + D = foreach A generate idx,rownumber; + store D into ':OUTPATH:'; + \, + }, { + 'num' => 5, + 'execonly' => 'mapred,tez', + 'pig' => q\ + SET default_parallel 11; + SET pig.splitCombination false; + A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); + B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + C = join A by rownumber, B by rownumber; + D = order C by B::rankcabd,B::rankbdca,B::rankaaba; + E = rank D; + F = group E by rank_D; + G = foreach F generate group, COUNT(E); + H = order G by group; + store H into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); + B = foreach A generate rownumber,1; + C = order B by rownumber; + store C into ':OUTPATH:'; + \, + }, { + 'num' => 6, + 'execonly' => 'mapred,tez', + 'pig' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + split A into M if rownumber > 15, N if rownumber < 25; + C = rank N; + D = foreach C generate $0, a, b, c; + store D into ':OUTPATH:'; + \, + 'verify_pig_script' => q\ + A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); + B = filter A by rownumber < 25; + D = foreach B generate rownumber, a, b, c; + store D into ':OUTPATH:'; + \, + } + ] + }, + { + 'name' => 'Rank_Dense', + 'tests' => [ + { 'num' => 1, 'execonly' => 'mapred,tez', 'pig' => q\ @@ -5453,51 +5566,6 @@ store a into ':OUTPATH:';\, 'pig' => q\ SET default_parallel 7; A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - B = rank A; - C = foreach B generate rank_A,a,b,c; - store C into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - C = foreach A generate rownumber,a,b,c; - store C into ':OUTPATH:'; - \, - }, { - 'num' =>5, - 'execonly' => 'mapred,tez', - 'pig' => q\ - SET default_parallel 9; - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - B = rank A by b DESC,a ASC; - C = foreach B generate rank_A,b,a; - store C into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - C = foreach A generate rankbdaa,b,a; - store C into ':OUTPATH:'; - \, - }, { - 'num' =>6, - 'execonly' => 'mapred,tez', - 'pig' => q\ - SET default_parallel 7; - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - B = rank A by c ASC,b DESC; - C = foreach B generate rank_A,c,b; - store C into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - C = foreach A generate rankcabd,c,b; - store C into ':OUTPATH:'; - \, - }, { - 'num' => 7, - 'execonly' => 'mapred,tez', - 'pig' => q\ - SET default_parallel 7; - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); B = foreach A generate a,b,c,tail; C = rank B by a ASC,b ASC DENSE; D = rank C by a ASC,c DESC DENSE; @@ -5511,7 +5579,7 @@ store a into ':OUTPATH:';\, store B into ':OUTPATH:'; \, }, { - 'num' => 8, + 'num' => 5, 'execonly' => 'mapred,tez', 'pig' => q\ SET default_parallel 9; @@ -5535,61 +5603,7 @@ store a into ':OUTPATH:';\, H = foreach G generate E::rankaaba, F::rankaacd, E::a, E::b, E::c; store H into ':OUTPATH:'; \, - }, { - 'num' => 9, - 'execonly' => 'mapred,tez', - 'pig' => q\ - SET default_parallel 25; - A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); - B = rank A; - C = order B by rank_A; - D = foreach C generate rank_A,rownumber; - store D into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); - D = foreach A generate idx,rownumber; - store D into ':OUTPATH:'; - \, - }, { - 'num' => 10, - 'execonly' => 'mapred,tez', - 'pig' => q\ - SET default_parallel 11; - SET pig.splitCombination false; - A = LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); - B = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - C = join A by rownumber, B by rownumber; - D = order C by B::rankcabd,B::rankbdca,B::rankaaba; - E = rank D; - F = group E by rank_D; - G = foreach F generate group, COUNT(E); - H = order G by group; - store H into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray); - B = foreach A generate rownumber,1; - C = order B by rownumber; - store C into ':OUTPATH:'; - \, - }, { - 'num' => 11, - 'execonly' => 'mapred,tez', - 'pig' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - split A into M if rownumber > 15, N if rownumber < 25; - C = rank N; - D = foreach C generate $0, a, b, c; - store D into ':OUTPATH:'; - \, - 'verify_pig_script' => q\ - A = LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as (rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray); - B = filter A by rownumber < 25; - D = foreach B generate rownumber, a, b, c; - store D into ':OUTPATH:'; - \, - } + } ] } ], Modified: pig/branches/spark/test/e2e/pig/tools/generate/generate_data.pl URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tools/generate/generate_data.pl?rev=1654955&r1=1654954&r2=1654955&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tools/generate/generate_data.pl (original) +++ pig/branches/spark/test/e2e/pig/tools/generate/generate_data.pl Tue Jan 27 02:27:45 2015 @@ -249,6 +249,13 @@ sub randomUnicodeNonAscii() my $testvar = "\N{U+03b1}\N{U+03b3}\N{U+03b1}\N{U+03c0}\N{U+03b7}"; +our @utf8Name = ("ä½","å®","è","ä½","å®","å½","å","çµ","ä¸","马"); + +sub randomUtf8Name() +{ + return sprintf("%s", $utf8Name[int(rand(10))]); +} + sub getBulkCopyCmd(){ my $sourceDir= shift; my $tableName = shift; @@ -511,6 +518,28 @@ sub getBulkCopyCmd(){ } printf HDFS "\n"; } + } elsif ($filetype eq "utf8Student") { + srand(3.14159 + $numRows); + print PSQL "create table $tableName (name varchar(100), age integer, gpa float(3));\n" unless defined $nosql; + print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql; + for (my $i = 0; $i < $numRows; $i++) { + my $name = randomUtf8Name(); + my $age = randomAge(); + my $gpa = randomGpa(); + printf HDFS "%s\t%d\t%.2f\n", $name, $age, $gpa; + } + } elsif ($filetype eq "utf8Voter") { + srand(3.14159 + $numRows); + print PSQL "create table $tableName (name varchar(100), age integer, registration varchar(20), contributions float);\n" unless defined $nosql; + print PSQL &getBulkCopyCmd( $targetDir, $tableName ) unless defined $nosql; + for (my $i = 0; $i < $numRows; $i++) { + my $name = randomUtf8Name(); + my $age = randomAge(); + my $registration = randomRegistration(); + my $contribution = randomContribution(); + printf HDFS "%s\t%d\t%s\t%.2f\n", $name, $age, + $registration, $contribution; + } } else { warn "Unknown filetype $filetype\n"; usage(); Modified: pig/branches/spark/test/excluded-tests-20 URL: http://svn.apache.org/viewvc/pig/branches/spark/test/excluded-tests-20?rev=1654955&r1=1654954&r2=1654955&view=diff ============================================================================== --- pig/branches/spark/test/excluded-tests-20 (original) +++ pig/branches/spark/test/excluded-tests-20 Tue Jan 27 02:27:45 2015 @@ -6,3 +6,5 @@ **/TestJobSubmissionTez.java **/TestGroupConstParallelTez.java **/TestLoaderStorerShipCacheFilesTez.java +**/TestPigStatsTez.java +**/TestPOPartialAggPlanTez.java Modified: pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java?rev=1654955&r1=1654954&r2=1654955&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java (original) +++ pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java Tue Jan 27 02:27:45 2015 @@ -327,7 +327,7 @@ public class TestLoadStoreFuncLifeCycle @Test public void testLoadStoreFunc() throws Exception { - PigServer pigServer = new PigServer(ExecType.LOCAL); + PigServer pigServer = new PigServer(Util.getLocalTestMode()); Data data = Storage.resetData(pigServer.getPigContext()); data.set("foo", tuple("a"), @@ -346,13 +346,22 @@ public class TestLoadStoreFuncLifeCycle assertEquals("c", out.get(2).get(0)); assertTrue("loader instanciation count increasing: " + Loader.count, Loader.count <= 3); - // LocalJobRunner gets the outputcommitter to call setupJob in Hadoop - // 2.0.x which was not done in Hadoop 1.0.x. (MAPREDUCE-3563) As a - // result, the number of StoreFunc instances is greater by 1 in - // Hadoop-2.0.x. - assertTrue("storer instanciation count increasing: " + Storer.count, - Storer.count <= (org.apache.pig.impl.util.Utils.isHadoop2() ? 5 : 4)); + // In Tez, Pig instantiate StoreFunc one more time to collect byteswritten for output file. + // This step is wrong in MR local mode, since it rely on hdfs counter to get it, if the output + // file is local, byteswritten is 0 + if (Util.getLocalTestMode().toString().startsWith("TEZ")) { + assertTrue("storer instanciation count increasing: " + Storer.count, + Storer.count == 6); + return; + } else { + // LocalJobRunner gets the outputcommitter to call setupJob in Hadoop + // 2.0.x which was not done in Hadoop 1.0.x. (MAPREDUCE-3563) As a + // result, the number of StoreFunc instances is greater by 1 in + // Hadoop-2.0.x. + assertTrue("storer instanciation count increasing: " + Storer.count, + Storer.count <= (org.apache.pig.impl.util.Utils.isHadoop2() ? 5 : 4)); + } } /**
