Author: rohini
Date: Tue Jan 6 20:06:15 2015
New Revision: 1649921
URL: http://svn.apache.org/r1649921
Log:
PIG-4337: Split Types and MultiQuery e2e tests into multiple groups (rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/test/e2e/pig/tests/bigdata.conf
pig/trunk/test/e2e/pig/tests/multiquery.conf
pig/trunk/test/e2e/pig/tests/nightly.conf
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1649921&r1=1649920&r2=1649921&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jan 6 20:06:15 2015
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-4337: Split Types and MultiQuery e2e tests into multiple groups (rohini)
+
PIG-4066: An optimization for ROLLUP operation in Pig (hxquangnhat via
cheolsoo)
PIG-4333: Split BigData tests into multiple groups (rohini)
Modified: pig/trunk/test/e2e/pig/tests/bigdata.conf
URL:
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/bigdata.conf?rev=1649921&r1=1649920&r2=1649921&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/bigdata.conf (original)
+++ pig/trunk/test/e2e/pig/tests/bigdata.conf Tue Jan 6 20:06:15 2015
@@ -92,7 +92,7 @@ store c into ':OUTPATH:';\,
]
},
{
- 'name' => 'BigData_Stream',
+ 'name' => 'BigData_Streaming',
'tests' => [
{
'num' => 1,
Modified: pig/trunk/test/e2e/pig/tests/multiquery.conf
URL:
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/multiquery.conf?rev=1649921&r1=1649920&r2=1649921&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/multiquery.conf (original)
+++ pig/trunk/test/e2e/pig/tests/multiquery.conf Tue Jan 6 20:06:15 2015
@@ -20,29 +20,39 @@
####################################################################
# SUB: Multiquery
# Please include a brief description here.
+# MultiQuery_MapSplitee
# - _TEST_ The first example; one that is defined in the bug with one split
# - in the map phase
# - _TEST_ Multiple side files, all in map phase.
# - _TEST_ Two loads and two stores in map phase.
# - _TEST_ One split added in reduce phase and map-only splitee.
+# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException
+# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException
+# - _TEST_ Pig-976: Multi-query optimization throws ClassCastException
+# MultiQuery_MapReduceSplitee
# - _TEST_ One split added in reduce phase and one map-reduce splitee
# - _TEST_ One split in reduce phase and two Map-Reduce splitees.
# - _TEST_ Two loads and two stores in reduce phase
-# - _TEST_ Explicit split with two side files.
-# - _TEST_ Explicit split with order by and two side files.
# - _TEST_ Implicit split with multiple side files.
-# - _TEST_ Streaming with multiple stores.
# - _TEST_ Script with intermediate stores.
# - _TEST_ Implicit split with order by and multiple side files.
# - _TEST_ Self join using fragment replicate join with multiple side files.
-# - _TEST_ PigMix Test Case L12.
# - _TEST_ One split in map phase and two Map-Reduce splitees with mixed
combiners.
# - _TEST_ One split in map phase and two Map-Reduce splitees without
combiners.
+# - _TEST_ Pig-983: multi-query optimization on multiple group bys following a
join or cogroup
+# MultiQuery_ExplicitSplit
+# - _TEST_ Explicit split with two side files.
+# - _TEST_ Explicit split with order by and two side files.
# - _TEST_ Splittees with different map key types and nested splits.
# - _TEST_ Splittees with different map key type.
+# - _TEST_ PigMix Test Case L12.
+# - _TEST_ PigMix Test Case L12 version 2
+# - _TEST_ PigMix Test Case L12 version 3 (modified to have different map key
types in inner split)
+# MultiQuery_Streaming
+# - _TEST_ Streaming with multiple stores.
# - _TEST_ Streaming in demux.
# - _TEST_ Streaming in nested demux.
-# - _TEST_ PigMix Test Case L12 version 2
+
$cfg = {
'driver' => 'Pig',
@@ -50,7 +60,7 @@ $cfg = {
'groups' => [
{
- 'name' => 'MultiQuery',
+ 'name' => 'MultiQuery_MapSplitee',
'floatpostprocess' => 1,
'delimiter' => ' ',
'tests' => [
@@ -111,9 +121,56 @@ $cfg = {
'sql' => "select age, avg(gpa) from studenttab10k where gpa < 3.0
group by age;
select age, avg(gpa) from studenttab10k where gpa < 3.0
group by age having avg(gpa) > 1.5;",
},
+ # Pig-976: Multi-query optimization throws ClassCastException
{
- # One split added in reduce phase and one map-reduce splitee
'num' => 5,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
+ b = group a by name;
+ c = group a by age;
+ d = foreach b generate MAX(a.age);
+ e = foreach c generate group, SUM(a.gpa);
+ store d into ':OUTPATH:.1';
+ store e into ':OUTPATH:.2'; #,
+ 'sql' => "select max(age) from studenttab10k group by name;
+ select age, sum(gpa) from studenttab10k group by age;",
+ },
+ # Pig-976: Multi-query optimization throws ClassCastException
+ {
+ 'num' => 6,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
+ b = group a all;
+ c = group a by age;
+ d = foreach b generate COUNT(a), MAX(a.age);
+ e = foreach c generate group, SUM(a.gpa);
+ store d into ':OUTPATH:.1';
+ store e into ':OUTPATH:.2'; #,
+ 'sql' => "select count(*), max(age) from studenttab10k;
+ select age, sum(gpa) from studenttab10k group by age;",
+ },
+ # Pig-976: Multi-query optimization throws ClassCastException
+ {
+ 'num' => 7,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
+ b = group a by name;
+ c = group a by age;
+ d = foreach b generate MAX(a.age), group;
+ e = foreach c generate group, SUM(a.gpa);
+ store d into ':OUTPATH:.1';
+ store e into ':OUTPATH:.2'; #,
+ 'sql' => "select max(age), name from studenttab10k group by name;
+ select age, sum(gpa) from studenttab10k group by age;",
+ },
+ ]
+ },
+
+ {
+ 'name' => 'MultiQuery_MapReduceSplitee',
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ 'tests' => [
+ {
+ # One split added in reduce phase and one map-reduce splitee
+ 'num' => 1,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:
chararray, age: int, gpa: float);
b = filter a by gpa < 3.0;
c = group b by age;
@@ -129,7 +186,7 @@ $cfg = {
},
{
# One split in reduce phase and two Map-Reduce splitees.
- 'num' => 6,
+ 'num' => 2,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:
chararray, age: int, gpa: float);
b = filter a by gpa < 3.0;
c = group b by age;
@@ -149,7 +206,7 @@ $cfg = {
},
{
# Two loads and two stores in reduce phase
- 'num' => 7,
+ 'num' => 3,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
b = load ':INPATH:/singlefile/votertab10k' as (name,
age, registration, contributions);
c = filter a by age < 20;
@@ -170,44 +227,8 @@ $cfg = {
where a.age < 20 and b.age < 20 and a.gpa < 3.0;",
},
{
- # Explicit split with two side files.
- 'num'=> 8,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
- split a into a1 if name > 'm', a2 if name <= 'm';
- store a1 into ':OUTPATH:.1';
- store a2 into ':OUTPATH:.2';
- b = cogroup a1 by age, a2 by age;
- c = foreach b generate flatten(a1), flatten(a2);
- store c into ':OUTPATH:.3'; #,
- 'sql' => "select name, age, gpa from studenttab10k where name >
'm';
- select name, age, gpa from studenttab10k where name <=
'm';
- select A.name, A.age, A.gpa, B.name, B.age, B.gpa
- from (select * from studenttab10k where name > 'm') as A
- join (select * from studenttab10k where name <= 'm') as
B using (age);",
- },
- {
- # Explicit split with order by and two side files.
- 'num'=> 9,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
- split a into a1 if age > 50, a2 if name < 'm';
- b2 = distinct a2;
- b1 = order a1 by name;
- store b2 into ':OUTPATH:.2';
- store b1 into ':OUTPATH:.1';
- c = cogroup b2 by name, b1 by name;
- d = foreach c generate flatten(group), COUNT($1),
COUNT($2);
- store d into ':OUTPATH:.3'; #,
- 'sql' => "select name, age, gpa from studenttab10k where age > 50
order by name;
- select distinct name, age, gpa from studenttab10k where
name < 'm';
- select name, count(A.name), count(B.name)
- from (select distinct name from studenttab10k where name
< 'm') as A
- join (select name from studenttab10k where age > 50) as
B using (name) group by name;",
- 'verify_with_pig' => 1,
- 'verify_pig_version' => 'old',
- },
- {
# Implicit split with multiple side files.
- 'num'=> 10,
+ 'num'=> 4,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
b = filter a by age > 50;
c = filter a by gpa > 3.0;
@@ -226,31 +247,8 @@ $cfg = {
join (select * from studenttab10k where gpa > 3.0) as B
using (name) where A.age < 75;",
},
{
- # Streaming with multiple stores
- 'num' => 11,
- 'pig' => q# define CMD1 `perl -ne 'print $_;'`;
- define CMD2 `perl -ne 'print $_;'`;
- A = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
- B = stream A through CMD1 as (name, age, gpa);
- store B into ':OUTPATH:.1';
- C = stream B through CMD2 as (name, age, gpa);
- D = JOIN B by name, C by name;
- store D into ':OUTPATH:.2'; #,
- 'pig_win' => q# define CMD1 `perl -ne "print $_;"`;
- define CMD2 `perl -ne "print $_;"`;
- A = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
- B = stream A through CMD1 as (name, age, gpa);
- store B into ':OUTPATH:.1';
- C = stream B through CMD2 as (name, age, gpa);
- D = JOIN B by name, C by name;
- store D into ':OUTPATH:.2'; #,
- 'sql' => "select name, age, gpa from studenttab10k;
- select A.name, A.age, A.gpa, B.name, B.age, B.gpa
- from studenttab10k as A join studenttab10k as B
using(name);",
- },
- {
# With intermediate store
- 'num' => 12,
+ 'num' => 5,
'pig' => q# A = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
store A into ':OUTPATH:.1';
B = load ':OUTPATH:.1';
@@ -260,7 +258,7 @@ $cfg = {
},
{
# Implicit split with order by and multiple side files.
- 'num'=>13,
+ 'num'=> 6,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
b = filter a by age > 50;
c = filter a by gpa > 3.0;
@@ -283,7 +281,7 @@ $cfg = {
},
# Self join using fragment replicate join with multiple side files
{
- 'num' => 14,
+ 'num' => 7,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:double);
c = filter a by age > 50;
@@ -298,29 +296,9 @@ $cfg = {
from studenttab10k as a join studenttab10k as b
using(gpa)
where a.age > 50 and b.gpa > 3.0;",
},
- # PigMix Test Case L12
- {
- 'num' => 15,
- 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name,
age, registration, contributions);
- b = foreach a generate name, age, contributions;
- split b into c1 if age > 50, c2 if age <= 50;
- split c1 into d1 if name < 'm', d2 if name >= 'm';
- e = group c2 by name;
- e1 = foreach e generate group, SUM(c2.contributions);
- store e1 into ':OUTPATH:.1';
- f = group d1 by name;
- f1 = foreach f generate group, MAX(d1.contributions);
- store f1 into ':OUTPATH:.2';
- g = group d2 by name;
- g1 = foreach g generate group, COUNT(d2);
- store g1 into ':OUTPATH:.3'; #,
- 'sql' => "select name, sum(contributions) from votertab10k where
age <= 50 group by name;
- select name, max(contributions) from votertab10k where
(age > 50 and name < 'm') group by name;
- select name, count(*) from votertab10k where (age > 50
and name >= 'm') group by name;",
- },
# One split in map phase and two Map-Reduce splitees with mixed
combiner.
{
- 'num' => 16,
+ 'num' => 8,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:
chararray, age: int, gpa: float);
b = filter a by gpa < 3.0;
c = filter a by gpa >= 3.0;
@@ -335,7 +313,7 @@ $cfg = {
},
# One split in map phase and two Map-Reduce splitees without
combiner.
{
- 'num' => 17,
+ 'num' => 9,
'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name:
chararray, age: int, gpa: float);
b = filter a by gpa < 3.0;
c = filter a by gpa >= 3.0;
@@ -348,9 +326,68 @@ $cfg = {
'sql' => "select age, max(gpa) + min(gpa) from studenttab10k where
gpa < 3.0 group by age;
select age, max(gpa) - min(gpa) from studenttab10k where
gpa >= 3.0 group by age;",
},
+ # Pig-983: multi-query optimization on multiple group bys
following a join or cogroup
+ {
+ 'num' => 10,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
+ b = load ':INPATH:/singlefile/votertab10k' as
(name:chararray, age:int, registration, contributions:double);
+ c = join a by name, b by name;
+ d = group c by a::age;
+ e = group c by b::age;
+ d1 = foreach d generate group, COUNT(c), MAX(c.a::gpa);
+ e1 = foreach e generate group, SUM(c.b::contributions);
+ store d1 into ':OUTPATH:.1';
+ store e1 into ':OUTPATH:.2'; #,
+ 'sql' => "select a.age, count(*), max(a.gpa) from studenttab10k as
a inner join votertab10k as b on (a.name = b.name) group by a.age;
+ select b.age, sum(b.contributions) from studenttab10k as
a inner join votertab10k as b on (a.name = b.name) group by b.age;",
+ },
+ ]
+ },
+
+ {
+ 'name' => 'MultiQuery_ExplicitSplit',
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ 'tests' => [
+ {
+ # Explicit split with two side files.
+ 'num'=> 1,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
+ split a into a1 if name > 'm', a2 if name <= 'm';
+ store a1 into ':OUTPATH:.1';
+ store a2 into ':OUTPATH:.2';
+ b = cogroup a1 by age, a2 by age;
+ c = foreach b generate flatten(a1), flatten(a2);
+ store c into ':OUTPATH:.3'; #,
+ 'sql' => "select name, age, gpa from studenttab10k where name >
'm';
+ select name, age, gpa from studenttab10k where name <=
'm';
+ select A.name, A.age, A.gpa, B.name, B.age, B.gpa
+ from (select * from studenttab10k where name > 'm') as A
+ join (select * from studenttab10k where name <= 'm') as
B using (age);",
+ },
+ {
+ # Explicit split with order by and two side files.
+ 'num'=> 2,
+ 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
+ split a into a1 if age > 50, a2 if name < 'm';
+ b2 = distinct a2;
+ b1 = order a1 by name;
+ store b2 into ':OUTPATH:.2';
+ store b1 into ':OUTPATH:.1';
+ c = cogroup b2 by name, b1 by name;
+ d = foreach c generate flatten(group), COUNT($1),
COUNT($2);
+ store d into ':OUTPATH:.3'; #,
+ 'sql' => "select name, age, gpa from studenttab10k where age > 50
order by name;
+ select distinct name, age, gpa from studenttab10k where
name < 'm';
+ select name, count(A.name), count(B.name)
+ from (select distinct name from studenttab10k where name
< 'm') as A
+ join (select name from studenttab10k where age > 50) as
B using (name) group by name;",
+ 'verify_with_pig' => 1,
+ 'verify_pig_version' => 'old',
+ },
# Splittees with different map key types and nested splits
{
- 'num' => 18,
+ 'num' => 3,
'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name:
chararray, age:int, registration, contributions:double);
b = foreach a generate name, age, contributions;
split b into c1 if age > 10, c2 if age <= 60;
@@ -370,7 +407,7 @@ $cfg = {
},
# Splittees with different map key types
{
- 'num' => 19,
+ 'num' => 4,
'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name:
chararray, age:int, registration, contributions:double);
b = foreach a generate name, age, contributions;
split b into c1 if age > 50, c2 if age <= 50;
@@ -383,50 +420,29 @@ $cfg = {
'sql' => "select name, sum(contributions) from votertab10k where
age <= 50 group by name;
select age, max(contributions) from votertab10k where
age > 50 group by age;",
},
- # Streaming in demux
- {
- 'num' => 20,
- 'execonly' => 'mapred,tez',
- 'pig' => q#
- define CMD `perl GroupBy.pl '\t' 0`
ship(':SCRIPTHOMEPATH:/GroupBy.pl');
- A = load ':INPATH:/singlefile/studenttab10k';
- split A into A1 if $0 < 'm', A2 if $0 >= 'm';
- B = group A1 by $0;
- C = foreach B generate flatten(A1);
- D = stream C through CMD;
- store D into ':OUTPATH:.1';
- E = group A2 by $0;
- F = foreach E generate group, COUNT(A2);
- store F into ':OUTPATH:.2';#,
- 'sql' => "select name, count(*) from studenttab10k where name <
'm' group by name;
- select name, count(*) from studenttab10k where name >=
'm' group by name;",
- },
- # Streaming in nested demux
+ # PigMix Test Case L12
{
- 'num' => 21,
- 'execonly' => 'mapred,tez',
- 'pig' => q#
- define CMD `perl GroupBy.pl '\t' 0`
ship(':SCRIPTHOMEPATH:/GroupBy.pl');
- A = load ':INPATH:/singlefile/studenttab10k';
- split A into A1 if $0 < 'm', A2 if $0 >= 'm';
- split A1 into A3 if $1 < 30, A4 if $1 >= 30;
- B = group A3 by $0;
- C = foreach B generate flatten(A3);
- D = stream C through CMD;
- store D into ':OUTPATH:.1';
- E = group A2 by $0;
- F = foreach E generate group, COUNT(A2);
- store F into ':OUTPATH:.2';
- G = group A4 by $0;
- H = foreach G generate group, COUNT(A4);
- store H into ':OUTPATH:.3';#,
- 'sql' => "select name, count(*) from studenttab10k where name <
'm' and age < 30 group by name;
- select name, count(*) from studenttab10k where name >=
'm' group by name;
- select name, count(*) from studenttab10k where name <
'm' and age >= 30 group by name;",
+ 'num' => 5,
+ 'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name,
age, registration, contributions);
+ b = foreach a generate name, age, contributions;
+ split b into c1 if age > 50, c2 if age <= 50;
+ split c1 into d1 if name < 'm', d2 if name >= 'm';
+ e = group c2 by name;
+ e1 = foreach e generate group, SUM(c2.contributions);
+ store e1 into ':OUTPATH:.1';
+ f = group d1 by name;
+ f1 = foreach f generate group, MAX(d1.contributions);
+ store f1 into ':OUTPATH:.2';
+ g = group d2 by name;
+ g1 = foreach g generate group, COUNT(d2);
+ store g1 into ':OUTPATH:.3'; #,
+ 'sql' => "select name, sum(contributions) from votertab10k where
age <= 50 group by name;
+ select name, max(contributions) from votertab10k where
(age > 50 and name < 'm') group by name;
+ select name, count(*) from votertab10k where (age > 50
and name >= 'm') group by name;",
},
# PigMix Test Case L12 version 2
{
- 'num' => 22,
+ 'num' => 6,
'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name,
age, registration, contributions);
b = foreach a generate name, age, contributions;
split b into c1 if age > 50, c2 if age <= 50;
@@ -446,7 +462,7 @@ $cfg = {
},
# PigMix Test Case L12 version 3 (modified to have different map
key types in inner split)
{
- 'num' => 23,
+ 'num' => 7,
'pig' => q# a = load ':INPATH:/singlefile/votertab10k' as (name,
age, registration, contributions);
b = foreach a generate name, age, contributions;
split b into c1 if age > 50, c2 if age <= 50;
@@ -464,63 +480,81 @@ $cfg = {
select name, age, count(*) from votertab10k where (age >
50 and name >= 'm') group by name, age;
select name, age, sum(contributions) from votertab10k
where age <= 50 group by name, age;",
},
- # Pig-976: Multi-query optimization throws ClassCastException
- {
- 'num' => 24,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
- b = group a by name;
- c = group a by age;
- d = foreach b generate MAX(a.age);
- e = foreach c generate group, SUM(a.gpa);
- store d into ':OUTPATH:.1';
- store e into ':OUTPATH:.2'; #,
- 'sql' => "select max(age) from studenttab10k group by name;
- select age, sum(gpa) from studenttab10k group by age;",
- },
- # Pig-976: Multi-query optimization throws ClassCastException
+ ]
+ },
+
+ {
+ 'name' => 'MultiQuery_Streaming',
+ 'floatpostprocess' => 1,
+ 'delimiter' => ' ',
+ 'tests' => [
{
- 'num' => 25,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
- b = group a all;
- c = group a by age;
- d = foreach b generate COUNT(a), MAX(a.age);
- e = foreach c generate group, SUM(a.gpa);
- store d into ':OUTPATH:.1';
- store e into ':OUTPATH:.2'; #,
- 'sql' => "select count(*), max(age) from studenttab10k;
- select age, sum(gpa) from studenttab10k group by age;",
+ # Streaming with multiple stores
+ 'num' => 1,
+ 'pig' => q# define CMD1 `perl -ne 'print $_;'`;
+ define CMD2 `perl -ne 'print $_;'`;
+ A = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
+ B = stream A through CMD1 as (name, age, gpa);
+ store B into ':OUTPATH:.1';
+ C = stream B through CMD2 as (name, age, gpa);
+ D = JOIN B by name, C by name;
+ store D into ':OUTPATH:.2'; #,
+ 'pig_win' => q# define CMD1 `perl -ne "print $_;"`;
+ define CMD2 `perl -ne "print $_;"`;
+ A = load ':INPATH:/singlefile/studenttab10k' as (name,
age, gpa);
+ B = stream A through CMD1 as (name, age, gpa);
+ store B into ':OUTPATH:.1';
+ C = stream B through CMD2 as (name, age, gpa);
+ D = JOIN B by name, C by name;
+ store D into ':OUTPATH:.2'; #,
+ 'sql' => "select name, age, gpa from studenttab10k;
+ select A.name, A.age, A.gpa, B.name, B.age, B.gpa
+ from studenttab10k as A join studenttab10k as B
using(name);",
},
- # Pig-983: multi-query optimization on multiple group bys
following a join or cogroup
+ # Streaming in demux
{
- 'num' => 26,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
- b = load ':INPATH:/singlefile/votertab10k' as
(name:chararray, age:int, registration, contributions:double);
- c = join a by name, b by name;
- d = group c by a::age;
- e = group c by b::age;
- d1 = foreach d generate group, COUNT(c), MAX(c.a::gpa);
- e1 = foreach e generate group, SUM(c.b::contributions);
- store d1 into ':OUTPATH:.1';
- store e1 into ':OUTPATH:.2'; #,
- 'sql' => "select a.age, count(*), max(a.gpa) from studenttab10k as
a inner join votertab10k as b on (a.name = b.name) group by a.age;
- select b.age, sum(b.contributions) from studenttab10k as
a inner join votertab10k as b on (a.name = b.name) group by b.age;",
+ 'num' => 2,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q#
+ define CMD `perl GroupBy.pl '\t' 0`
ship(':SCRIPTHOMEPATH:/GroupBy.pl');
+ A = load ':INPATH:/singlefile/studenttab10k';
+ split A into A1 if $0 < 'm', A2 if $0 >= 'm';
+ B = group A1 by $0;
+ C = foreach B generate flatten(A1);
+ D = stream C through CMD;
+ store D into ':OUTPATH:.1';
+ E = group A2 by $0;
+ F = foreach E generate group, COUNT(A2);
+ store F into ':OUTPATH:.2';#,
+ 'sql' => "select name, count(*) from studenttab10k where name <
'm' group by name;
+ select name, count(*) from studenttab10k where name >=
'm' group by name;",
},
- # Pig-976: Multi-query optimization throws ClassCastException
+ # Streaming in nested demux
{
- 'num' => 27,
- 'pig' => q# a = load ':INPATH:/singlefile/studenttab10k' as
(name:chararray, age:int, gpa:float);
- b = group a by name;
- c = group a by age;
- d = foreach b generate MAX(a.age), group;
- e = foreach c generate group, SUM(a.gpa);
- store d into ':OUTPATH:.1';
- store e into ':OUTPATH:.2'; #,
- 'sql' => "select max(age), name from studenttab10k group by name;
- select age, sum(gpa) from studenttab10k group by age;",
+ 'num' => 3,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q#
+ define CMD `perl GroupBy.pl '\t' 0`
ship(':SCRIPTHOMEPATH:/GroupBy.pl');
+ A = load ':INPATH:/singlefile/studenttab10k';
+ split A into A1 if $0 < 'm', A2 if $0 >= 'm';
+ split A1 into A3 if $1 < 30, A4 if $1 >= 30;
+ B = group A3 by $0;
+ C = foreach B generate flatten(A3);
+ D = stream C through CMD;
+ store D into ':OUTPATH:.1';
+ E = group A2 by $0;
+ F = foreach E generate group, COUNT(A2);
+ store F into ':OUTPATH:.2';
+ G = group A4 by $0;
+ H = foreach G generate group, COUNT(A4);
+ store H into ':OUTPATH:.3';#,
+ 'sql' => "select name, count(*) from studenttab10k where name <
'm' and age < 30 group by name;
+ select name, count(*) from studenttab10k where name >=
'm' group by name;
+ select name, count(*) from studenttab10k where name <
'm' and age >= 30 group by name;",
},
- ] # end of tests
+ ] # end of tests
},
+
] # end of groups
}
-;
-
+;
\ No newline at end of file
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL:
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1649921&r1=1649920&r2=1649921&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Tue Jan 6 20:06:15 2015
@@ -1785,7 +1785,7 @@ store b into ':OUTPATH:';\,
},
{
- 'name' => 'Types',
+ 'name' => 'Types_Constants',
'tests' => [
{
# constants
@@ -1794,43 +1794,87 @@ store b into ':OUTPATH:';\,
b = foreach a generate age + 1 + 0.2f + 253645L, gpa+1;
store b into ':OUTPATH:';\,
},
- {
- # NULL and cast
- 'num' => 2,
- 'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
+ {
+ # constants
+ 'num' => 2,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
+b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1);
+store b into ':OUTPATH:';\,
+ },
+ {
+ # test precision for doubles is atleast 15 digits
+ 'num' => 3,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
+b = foreach a generate 0.123456789123456+0.123456789123456;
+store b into ':OUTPATH:';\,
+ },
+ ]
+ },
+
+ {
+ 'name' => 'Types_Cast',
+ 'tests' => [
+ {
+ # NULL and cast
+ 'num' => 1,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
c = foreach b generate (norm_gpa is null? 0 :norm_gpa);
store c into ':OUTPATH:';\,
- # 'expected_err_regex' => "Encountered Warning
DIVIDE_BY_ZERO 2387 time.*",
- # Driver does currently not support both 'sql'
and 'expected_...' verification directives.
- },
+ # 'expected_err_regex' => "Encountered Warning
DIVIDE_BY_ZERO 2387 time.*",
+ # Driver does currently not support both 'sql' and
'expected_...' verification directives.
+ },
+ {
+ # Not NULL and cast
+ 'num' => 2,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
+b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
+c = foreach b generate (norm_gpa is not null? norm_gpa: 0);
+store c into ':OUTPATH:';\,
+ },
+ # boolean cast
+ {
+ 'num' => 3,
+ 'pig' => q\a = load ':INPATH:/singlefile/allscalar10k' using
PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean);
+b = foreach a generate instate, true, false;
+store b into ':OUTPATH:';\,
+ 'verify_pig_script' => q\a = load
':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray,
age:int, gpa:double, instate:chararray);
+b = foreach a generate instate, 'true', 'false';
+store b into ':OUTPATH:';\,
+ },
+ ]
+ },
+
+ {
+ 'name' => 'Types_ArithmeticCast',
+ 'tests' => [
{
# arithmetic operators and SIZE for int,
double and size and concat operators for chararrays
- 'num' => 3,
+ 'num' => 1,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = foreach a generate age, gpa, age % 25, age + 25, age - 25, age/2, age * 2,
SIZE(age), gpa + 10.1, gpa - 1.1 , gpa / 1.2, gpa * 2.5, SIZE(gpa), SIZE(name),
CONCAT(name, 'test');
store b into ':OUTPATH:';\,
},
{
# arithmetic operators and SIZE for long,
float and size and concat operators for bytearrays
- 'num' => 4,
+ 'num' => 2,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = foreach a generate age, gpa, age % 2L, age + 2500000000L, age -
2500000000L, age/2L, age * 250000000L, SIZE(age), gpa + 10.1f, gpa - 1.1f , gpa
/ 1.2f, gpa * 2.6f, SIZE(gpa), SIZE(name), CONCAT(name, name);
store b into ':OUTPATH:';\,
},
{
- # equlity and implicit cast
- 'num' => 5,
- 'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+ # equality and implicit cast
+ 'num' => 3,
+ 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name, age, gpa);
b = filter a by age == '25' and gpa < 3;
store b into ':OUTPATH:';\,
- },
+ },
{
# will need to test against previous version of pig
# because in pig currently count includes nulls - this affects
# avg
- 'num' => 6,
+ 'num' => 4,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = group a ALL;
c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age),
MIN(a.name), MAX(a.name), SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa);
@@ -1840,7 +1884,7 @@ store c into ':OUTPATH:';\,
},
{
# sum, min, max, avg for long and float
(declared)
- 'num' => 7,
+ 'num' => 5,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = group a ALL;
c = foreach b generate SUM(a.age), MIN(a.age), MAX(a.age), AVG(a.age),
SUM(a.gpa), MIN(a.gpa), MAX(a.gpa), AVG(a.gpa);
@@ -1848,21 +1892,27 @@ store c into ':OUTPATH:';\,
},
{
# Explicit casts - arithmetic operators and
SIZE for int, double and size and concat operators for chararrays
- 'num' => 8,
+ 'num' => 6,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
b = foreach a generate (int)age % 25, (int)age + 25, (int)age - 25,
(int)age/2, (int)age * 2, SIZE((int)age), (double)gpa + 10.1, (double)gpa - 1.1
, (double)gpa / 1.2, (double)gpa * 2.5, SIZE((double)gpa),
SIZE((chararray)name), CONCAT((chararray)name, 'test');
store b into ':OUTPATH:';\,
},
{
# Explicit casts - arithmetic operators and
SIZE for long, float
- 'num' => 9,
+ 'num' => 7,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
b = foreach a generate (long)age, (long)age % 2L, (long)age + 2500000000L,
(long)age - 2500000000L, (long)age/2L, (long)age * 250000000L, SIZE((long)age),
(float)gpa + 10.1f, (float)gpa - 1.1f , (float)gpa / 1.2f, (float)gpa * 2.6f,
SIZE((float)gpa);
store b into ':OUTPATH:';\,
},
+ ]
+ },
+
+ {
+ 'name' => 'Types_Filter',
+ 'tests' => [
{
# Filter is null for chararray and double and is not null for
int
- 'num' => 10,
+ 'num' => 1,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = filter a by name is null and age is not null and gpa is null;
c = group b ALL;
@@ -1871,7 +1921,7 @@ store d into ':OUTPATH:';\,
},
{
# Filter is not null for chararray and double and is null for
int
- 'num' => 11,
+ 'num' => 2,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = filter a by name is not null and age is null and gpa is not null;
c = group b ALL;
@@ -1880,7 +1930,7 @@ store d into ':OUTPATH:';\,
},
{
# Filter is null for bytearray and float and is not null for
long
- 'num' => 12,
+ 'num' => 3,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = filter a by name is null and age is not null and gpa is null;
c = group b ALL;
@@ -1889,55 +1939,54 @@ store d into ':OUTPATH:';\,
},
{
# Filter is not null for bytearray and float and is null for
long
- 'num' => 13,
+ 'num' => 4,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
b = filter a by name is not null and age is null and gpa is not null;
c = group b ALL;
d = foreach c generate COUNT(b);
store d into ':OUTPATH:';\,
},
+ ]
+ },
+
+ {
+ 'name' => 'Types_Order',
+ 'tests' => [
{
- # test that sorting is based on the type for
chararray, int and double
- 'num' => 14,
- 'pig' =>q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
+ # test that sorting is based on the type for chararray, int
and double
+ 'num' => 1,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
b = order a by name, age, gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'],
- },
- {
- # test that sorting descending is based on the
type for chararray, int and double
- 'num' => 15,
- 'pig' =>q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
+ },
+ {
+ # test that sorting descending is based on the type for
chararray, int and double
+ 'num' => 2,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name:chararray, age:int, gpa:double);
b = order a by name desc, age desc, gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
- },
- {
- # test that sorting is based on the type for
bytearray, long and float
- 'num' => 16,
- 'pig' =>q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
+ },
+ {
+ # test that sorting is based on the type for bytearray, long
and float
+ 'num' => 3,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name, age:long, gpa:float);
b = order a by name, age, gpa;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1,1', '-k', '2n,3n'],
- },
- {
- # test that sorting descending is based on the
type for chararray, age and float
- 'num' => 17,
- 'pig' =>q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name, age:long, gpa:float);
+ },
+ {
+ # test that sorting descending is based on the type for
chararray, age and float
+ 'num' => 4,
+ 'pig' =>q\a = load ':INPATH:/singlefile/studentnulltab10k' as
(name, age:long, gpa:float);
b = order a by name desc, age desc, gpa desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
- },
- {
- # test precision for doubles is atleast 15
digits
- 'num' => 18,
- 'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
-b = foreach a generate 0.123456789123456+0.123456789123456;
-store b into ':OUTPATH:';\,
- },
+ },
{
# order by string
- 'num' => 20,
+ 'num' => 5,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by name;
store b into ':OUTPATH:';\,
@@ -1945,7 +1994,7 @@ store b into ':OUTPATH:';\,
},
{
# order by string desc
- 'num' => 21,
+ 'num' => 6,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by name desc;
store b into ':OUTPATH:';\,
@@ -1953,7 +2002,7 @@ store b into ':OUTPATH:';\,
},
{
# order by int
- 'num' => 22,
+ 'num' => 7,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by age;
store b into ':OUTPATH:';\,
@@ -1961,7 +2010,7 @@ store b into ':OUTPATH:';\,
},
{
# order by int desc
- 'num' => 23,
+ 'num' => 8,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by age desc;
store b into ':OUTPATH:';\,
@@ -1969,7 +2018,7 @@ store b into ':OUTPATH:';\,
},
{
# order by long
- 'num' => 24,
+ 'num' => 9,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long,
gpa:double);
b = order a by age;
store b into ':OUTPATH:';\,
@@ -1977,7 +2026,7 @@ store b into ':OUTPATH:';\,
},
{
# order by long desc
- 'num' => 25,
+ 'num' => 10,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:long,
gpa:double);
b = order a by age desc;
store b into ':OUTPATH:';\,
@@ -1985,7 +2034,7 @@ store b into ':OUTPATH:';\,
},
{
# order by float
- 'num' => 26,
+ 'num' => 11,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
b = order a by gpa;
store b into ':OUTPATH:';\,
@@ -1993,7 +2042,7 @@ store b into ':OUTPATH:';\,
},
{
# order by float desc
- 'num' => 27,
+ 'num' => 12,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float);
b = order a by gpa desc;
store b into ':OUTPATH:';\,
@@ -2001,7 +2050,7 @@ store b into ':OUTPATH:';\,
},
{
# order by double
- 'num' => 28,
+ 'num' => 13,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by gpa;
store b into ':OUTPATH:';\,
@@ -2009,7 +2058,7 @@ store b into ':OUTPATH:';\,
},
{
# order by double desc
- 'num' => 29,
+ 'num' => 14,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by gpa desc;
store b into ':OUTPATH:';\,
@@ -2017,7 +2066,7 @@ store b into ':OUTPATH:';\,
},
{
# order by *
- 'num' => 30,
+ 'num' => 15,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by *;
store b into ':OUTPATH:';\,
@@ -2025,14 +2074,20 @@ store b into ':OUTPATH:';\,
},
{
# order by * desc
- 'num' => 31,
+ 'num' => 16,
'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
b = order a by * desc;
store b into ':OUTPATH:';\,
'sortArgs' => ['-t', ' ', '-k', '1r,1r', '-k', '2nr,3nr'],
},
+ ]
+ },
+
+ {
+ 'name' => 'Types_CoGroup',
+ 'tests' => [
{
- 'num' => 32,
+ 'num' => 1,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'
using PigStorage() as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int,
registration:chararray, contributions:double);
c = filter a by age < 20;
@@ -2042,7 +2097,7 @@ f = foreach e generate flatten (c), flat
store f into ':OUTPATH:';\,
},
{
- 'num' => 33,
+ 'num' => 2,
'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'
using PigStorage() as (name:chararray, age:int, gpa:double);
b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:int,
registration:chararray, contributions:double);
c = filter a by age < 20;
@@ -2051,62 +2106,9 @@ e = cogroup c by age, d by age;
f = foreach e generate flatten (c), flatten(d);
store f into ':OUTPATH:';\,
},
- {
- 'num' => 34,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'
using PigStorage() as (name:chararray, age:long, gpa:double);
-b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:long,
registration:chararray, contributions:double);
-c = filter a by age < 20;
-d = filter b by age < 20;
-e = cogroup c by age, d by age;
-f = foreach e generate flatten (c), flatten(d);
-store f into ':OUTPATH:';\,
- },
- {
- 'num' => 35,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'
using PigStorage() as (name:chararray, age:float, gpa:double);
-b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:float,
registration:chararray, contributions:double);
-c = filter a by age < 20;
-d = filter b by age < 20;
-e = cogroup c by age, d by age;
-f = foreach e generate flatten (c), flatten(d);
-store f into ':OUTPATH:';\,
- },
- {
- 'num' => 36,
- 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'
using PigStorage() as (name:chararray, age:double, gpa:double);
-b = load ':INPATH:/singlefile/votertab10k' as (name:chararray, age:double,
registration:chararray, contributions:double);
-c = filter a by age < 20;
-d = filter b by age < 20;
-e = cogroup c by age, d by age;
-f = foreach e generate flatten (c), flatten(d);
-store f into ':OUTPATH:';\,
- },
- {
- # NULL and cast
- 'num' => 37,
- 'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
-b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int;
-c = foreach b generate (norm_gpa is not null? norm_gpa: 0);
-store c into ':OUTPATH:';\,
- },
- {
- # constants
- 'num' => 38,
- 'pig' => q\a = load
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int,
gpa:double);
-b = foreach a generate -(age + 1 + 0.2f + 253645L), -(gpa+1);
-store b into ':OUTPATH:';\,
- },
- {
- 'num' => 39,
- 'pig' => q\a = load
':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray,
age:int, gpa:double, instate:boolean);
-b = foreach a generate instate, true, false;
-store b into ':OUTPATH:';\,
- 'verify_pig_script' => q\a = load
':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray,
age:int, gpa:double, instate:chararray);
-b = foreach a generate instate, 'true', 'false';
-store b into ':OUTPATH:';\,
- },
]
},
+
{
'name' => 'Limit',
'tests' => [
@@ -3903,7 +3905,7 @@ store b into ':OUTPATH:';\,
# test long and float square, plus two references to the
same UDF with different schemas
'num' => 3,
'floatpostprocess' => 1,
- 'delimiter' => ' ',
+ 'delimiter' => ' ',
'pig' => q\
register ':SCRIPTHOMEPATH:/cpython/scriptingudf.py' using streaming_python as
myfuncs;
a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name,
age:long, gpa:double);
@@ -5409,6 +5411,111 @@ store a into ':OUTPATH:';\,
'name' => 'Rank',
'tests' => [
{
+ 'num' => 1,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ SET default_parallel 7;
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ B = rank A;
+ C = foreach B generate rank_A,a,b,c;
+ store C into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ C = foreach A generate rownumber,a,b,c;
+ store C into ':OUTPATH:';
+ \,
+ }, {
+ 'num' =>2,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ SET default_parallel 9;
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ B = rank A by b DESC,a ASC;
+ C = foreach B generate rank_A,b,a;
+ store C into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ C = foreach A generate rankbdaa,b,a;
+ store C into ':OUTPATH:';
+ \,
+ }, {
+ 'num' =>3,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ SET default_parallel 7;
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ B = rank A by c ASC,b DESC;
+ C = foreach B generate rank_A,c,b;
+ store C into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ C = foreach A generate rankcabd,c,b;
+ store C into ':OUTPATH:';
+ \,
+ }, {
+ 'num' => 4,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ SET default_parallel 25;
+ A = LOAD ':INPATH:/singlefile/biggish'
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ B = rank A;
+ C = order B by rank_A;
+ D = foreach C generate rank_A,rownumber;
+ store D into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/biggish'
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ D = foreach A generate idx,rownumber;
+ store D into ':OUTPATH:';
+ \,
+ }, {
+ 'num' => 5,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ SET default_parallel 11;
+ SET pig.splitCombination false;
+ A = LOAD ':INPATH:/singlefile/biggish'
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ B = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ C = join A by rownumber, B by rownumber;
+ D = order C by
B::rankcabd,B::rankbdca,B::rankaaba;
+ E = rank D;
+ F = group E by rank_D;
+ G = foreach F generate group, COUNT(E);
+ H = order G by group;
+ store H into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+ B = foreach A generate rownumber,1;
+ C = order B by rownumber;
+ store C into ':OUTPATH:';
+ \,
+ }, {
+ 'num' => 6,
+ 'execonly' => 'mapred,tez',
+ 'pig' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ split A into M if rownumber > 15, N if
rownumber < 25;
+ C = rank N;
+ D = foreach C generate $0, a, b, c;
+ store D into ':OUTPATH:';
+ \,
+ 'verify_pig_script' => q\
+ A = LOAD ':INPATH:/singlefile/prerank'
using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+ B = filter A by rownumber < 25;
+ D = foreach B generate rownumber, a, b, c;
+ store D into ':OUTPATH:';
+ \,
+ }
+ ]
+ },
+ {
+ 'name' => 'Rank_Dense',
+ 'tests' => [
+ {
'num' => 1,
'execonly' => 'mapred,tez',
'pig' => q\
@@ -5459,51 +5566,6 @@ store a into ':OUTPATH:';\,
'pig' => q\
SET
default_parallel 7;
A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- B =
rank A;
- C =
foreach B generate rank_A,a,b,c;
- store C
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- C =
foreach A generate rownumber,a,b,c;
- store C
into ':OUTPATH:';
- \,
- }, {
- 'num' =>5,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- SET
default_parallel 9;
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- B =
rank A by b DESC,a ASC;
- C =
foreach B generate rank_A,b,a;
- store C
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- C =
foreach A generate rankbdaa,b,a;
- store C
into ':OUTPATH:';
- \,
- }, {
- 'num' =>6,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- SET
default_parallel 7;
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- B =
rank A by c ASC,b DESC;
- C =
foreach B generate rank_A,c,b;
- store C
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- C =
foreach A generate rankcabd,c,b;
- store C
into ':OUTPATH:';
- \,
- }, {
- 'num' => 7,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- SET
default_parallel 7;
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
B =
foreach A generate a,b,c,tail;
C =
rank B by a ASC,b ASC DENSE;
D =
rank C by a ASC,c DESC DENSE;
@@ -5517,7 +5579,7 @@ store a into ':OUTPATH:';\,
store B
into ':OUTPATH:';
\,
}, {
- 'num' => 8,
+ 'num' => 5,
'execonly' => 'mapred,tez',
'pig' => q\
SET
default_parallel 9;
@@ -5541,61 +5603,7 @@ store a into ':OUTPATH:';\,
H =
foreach G generate E::rankaaba, F::rankaacd, E::a, E::b, E::c;
store H
into ':OUTPATH:';
\,
- }, {
- 'num' => 9,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- SET
default_parallel 25;
- A =
LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as
(rownumber:long,idx:long,tail:bytearray);
- B =
rank A;
- C =
order B by rank_A;
- D =
foreach C generate rank_A,rownumber;
- store D
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as
(rownumber:long,idx:long,tail:bytearray);
- D =
foreach A generate idx,rownumber;
- store D
into ':OUTPATH:';
- \,
- }, {
- 'num' => 10,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- SET
default_parallel 11;
- SET
pig.splitCombination false;
- A =
LOAD ':INPATH:/singlefile/biggish' using PigStorage(',') as
(rownumber:long,idx:long,tail:bytearray);
- B =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- C =
join A by rownumber, B by rownumber;
- D =
order C by B::rankcabd,B::rankbdca,B::rankaaba;
- E =
rank D;
- F =
group E by rank_D;
- G =
foreach F generate group, COUNT(E);
- H =
order G by group;
- store H
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,idx:long,tail:bytearray);
- B =
foreach A generate rownumber,1;
- C =
order B by rownumber;
- store C
into ':OUTPATH:';
- \,
- }, {
- 'num' => 11,
- 'execonly' => 'mapred,tez',
- 'pig' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- split A
into M if rownumber > 15, N if rownumber < 25;
- C =
rank N;
- D =
foreach C generate $0, a, b, c;
- store D
into ':OUTPATH:';
- \,
- 'verify_pig_script' => q\
- A =
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
- B =
filter A by rownumber < 25;
- D =
foreach B generate rownumber, a, b, c;
- store D
into ':OUTPATH:';
- \,
- }
+ }
]
}
],