Author: daijy
Date: Wed Nov  2 21:52:15 2016
New Revision: 1767791

URL: http://svn.apache.org/viewvc?rev=1767791&view=rev
Log:
PIG-5036: Remove biggish from e2e input dataset

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
    pig/trunk/test/e2e/pig/tests/nightly.conf
    pig/trunk/test/e2e/pig/tools/generate/generate_data.pl

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Nov  2 21:52:15 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
  
 IMPROVEMENTS
 
+PIG-5036: Remove biggish from e2e input dataset (daijy)
+
 PIG-5053: Can't change HDFS user home in e2e tests using Ant (nkollar via 
daijy)
 
 PIG-5037: Add api getDisplayString to PigStats (zjffdu)

Modified: pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm (original)
+++ pig/trunk/test/e2e/pig/deployers/ExistingClusterDeployer.pm Wed Nov  2 
21:52:15 2016
@@ -231,11 +231,6 @@ sub generateData
             'rows' => 5000,
             'hdfs' => "types/numbers.txt",
         }, {
-            'name' => "biggish",
-            'filetype' => "biggish",
-            'rows' => 1000000,
-            'hdfs' => "singlefile/biggish",
-        }, {
             'name' => "prerank",
             'filetype' => "ranking",
             'rows' => 30,

Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Wed Nov  2 21:52:15 2016
@@ -5657,13 +5657,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = foreach B generate rank_A,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rownumber,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5672,13 +5674,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 9;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by b DESC,a ASC;
                                     C = foreach B generate rank_A,b,a;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankbdaa,b,a;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5687,13 +5691,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by c ASC,b DESC;
                                     C = foreach B generate rank_A,c,b;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankcabd,c,b;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5701,26 +5707,29 @@ store a into ':OUTPATH:';\,
                         'num' => 4,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 25;
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    SET default_parallel 5;
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = order B by rank_A;
-                                    D = foreach C generate rank_A,rownumber;
+                                    D = foreach C generate rank_A,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    D = foreach A generate idx,rownumber;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    D = foreach A generate rownumber,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                     }, {
                         'num' => 5,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 11;
+                                    SET default_parallel 5;
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
                                     SET pig.splitCombination false;
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    B = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    B = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = join A by rownumber, B by rownumber;
                                     D = order C by 
B::rankcabd,B::rankbdca,B::rankaaba;
                                     E = rank D;
@@ -5730,7 +5739,7 @@ store a into ':OUTPATH:';\,
                                     store H into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as (rownumber:long,idx:long);
                                     B = foreach A generate rownumber,1;
                                     C = order B by rownumber;
                                     store C into ':OUTPATH:';
@@ -5739,14 +5748,16 @@ store a into ':OUTPATH:';\,
                         'num' => 6,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     split A into M if rownumber > 15, N if 
rownumber < 25;
                                     C = rank N;
                                     D = foreach C generate $0, a, b, c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = filter A by rownumber < 25;
                                     D = foreach B generate rownumber, a, b, c;
                                     store D into ':OUTPATH:';
@@ -5761,14 +5772,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 1,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 9;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by a ASC,b ASC DENSE;
                                                                        C = 
foreach B generate rank_A,a,b;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaaba,a,b;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5776,14 +5789,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 2,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 9;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by a ASC,c DESC DENSE;
                                                                        C = 
foreach B generate rank_A,a,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaacd,a,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5791,14 +5806,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 3,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 7;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by b DESC,c ASC DENSE;
                                                                        C = 
foreach B generate rank_A,b,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankbdca,b,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5806,9 +5823,11 @@ store a into ':OUTPATH:';\,
                                                'num' => 4,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 7;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
-                                                                       B = 
foreach A generate a,b,c,tail;
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                                                       B = 
foreach A generate a,b,c;
                                                                        C = 
rank B by a ASC,b ASC DENSE;
                                                                        D = 
rank C by a ASC,c DESC DENSE;
                                                                        E = 
rank D by b DESC,c ASC DENSE;
@@ -5816,7 +5835,7 @@ store a into ':OUTPATH:';\,
                                                                        store F 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
foreach A generate rankbdca,rankaacd,rankaaba,a,b,c;
                                                                        store B 
into ':OUTPATH:';
                                                                \,
@@ -5825,8 +5844,9 @@ store a into ':OUTPATH:';\,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
                                                                        SET 
default_parallel 9;
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
                                                                        SET 
pig.splitCombination false;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
foreach A generate a,b,c;
                                                                        C = 
rank B by a ASC,b ASC DENSE;
                                                                        D = 
rank B by a ASC,c DESC DENSE;
@@ -5836,7 +5856,7 @@ store a into ':OUTPATH:';\,
                                                                        store H 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaaba,a,b,c;
                                                                        E = 
order C by a ASC,b ASC;
                                                                        D = 
foreach A generate rankaacd,a,b,c;

Modified: pig/trunk/test/e2e/pig/tools/generate/generate_data.pl
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tools/generate/generate_data.pl?rev=1767791&r1=1767790&r2=1767791&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tools/generate/generate_data.pl (original)
+++ pig/trunk/test/e2e/pig/tools/generate/generate_data.pl Wed Nov  2 21:52:15 
2016
@@ -41,7 +41,6 @@ our @lastName = ("allen", "brown", "cars
 #      rankaacd: RANK BY a ASC , c DESC
 #      rankaaba: RANK BY a ASC , b ASC
 #      a,b,c:    values
-#      tail:     long value in order to create multiple mappers
 ############################################################################
 our @rankedTuples = (
        
"1,21,5,7,1,1,0,8,8","2,26,2,3,2,5,1,9,10","3,30,24,21,2,3,1,3,10","4,6,10,8,3,4,1,7,2",
@@ -501,22 +500,10 @@ sub getBulkCopyCmd(){
             my $randf = rand(10);
             printf HDFS "%d:%d:%d:%d:%d:%dL:%.2ff:%.2f\n", $tid, $i, $rand5, 
$rand100, $rand1000, $rand1000, $randf, $randf;
         }
-    }  elsif ($filetype eq "ranking") {
+    } elsif ($filetype eq "ranking") {
         for (my $i = 0; $i < $numRows; $i++) {
             my $tuple = $rankedTuples[int($i)];
-            printf HDFS "$tuple,";
-            for my $j ( 0 .. 1000000) {
-                               printf HDFS "%d",$j;
-                       }
-                       printf HDFS "\n";
-        }
-    } elsif ($filetype eq "biggish") {
-        for (my $i = 1; $i < $numRows; $i++) {
-            printf HDFS "$i,$i,";
-            for my $j ( 0 .. 1000) {
-                               printf HDFS "%d",$j;
-            }
-            printf HDFS "\n";
+            printf HDFS "$tuple\n";
         }
     } elsif ($filetype eq "utf8Student") {
         srand(3.14159 + $numRows);


Reply via email to