Modified: pig/branches/spark/test/e2e/pig/tests/nightly.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/nightly.conf?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tests/nightly.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/nightly.conf Thu Nov 27 12:49:54 2014 @@ -1414,6 +1414,7 @@ b = load ':INPATH:/singlefile/studentcol c = union a, b; d = order c by name PARALLEL 2; store d into ':OUTPATH:';\, + 'sortArgs' => ['-t', ' ', '-k', '1,1'], }, { 'num' => 5, @@ -1504,6 +1505,38 @@ e = load ':INPATH:/singlefile/votertab10 f = join d by group, e by name using 'replicated'; store f into ':OUTPATH:';\, }, + { ## Secondary Key + 'num' => 14, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age, gpa); +c = group a by name; +d = foreach c { + sorted = order a by name,age; + lmt = limit sorted 1; + generate lmt as c1; +}; +e = foreach d generate flatten(c1) as (name:chararray, age, gpa); +f = group b by name; +g = foreach f { + sorted = order b by name,age; + lmt = limit sorted 1; + generate lmt as f1; +}; +h = foreach g generate flatten(f1) as (name:chararray, age, gpa); +i = union e, h; +j = order i by name parallel 1; +store j into ':OUTPATH:';\, + 'sortArgs' => ['-t', ' ', '-k', '1,1'], + }, + { + 'num' => 15, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa:float); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, age, gpa:float); +c = filter a by gpa >= 4; +d = cross a, c; +e = union b, d; +store e into ':OUTPATH:';\, + }, ] }, { @@ -1534,19 +1567,19 @@ store b into ':OUTPATH:';\, }, { 'num' => 3, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab*' as (name, age, gpa); + 'pig' => q\a = load ':INPATH:/singlefile/voter*' as (name, age, registration, contributions); b = filter a by name == 'nick miller'; store b into ':OUTPATH:';\, }, { 'num' => 4, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab???' as (name, age, gpa); + 'pig' => q\a = load ':INPATH:/singlefile/student???10k' as (name, age, registration, contributions); b = filter a by name == 'nick miller'; store b into ':OUTPATH:';\, }, { 'num' => 5, - 'pig' => q\a = load ':INPATH:/singlefile/studenttab[1-9]0[km]' as (name, age, gpa); + 'pig' => q\a = load ':INPATH:/singlefile/studentta[a-z][1-9]0[!m],:INPATH:/singlefile/voter{,null}tab10k' as (name, age); b = filter a by name == 'nick miller'; store b into ':OUTPATH:';\, }, @@ -3782,13 +3815,13 @@ register ':SCRIPTHOMEPATH:/ruby/scriptin a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double); b = group a by name; c = foreach b generate group, myfuncs.Sum(a.age), myfuncs.Sum(a.gpa); -d = foreach c generate $0, $1, (double)((int)$2*100)/100; +d = foreach c generate $0, $1, (double)(ROUND($2*100))/100; store d into ':OUTPATH:';\, 'verify_pig_script' => q\ a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age:int, gpa:double); b = group a by name; c = foreach b generate group, SUM(a.age), SUM(a.gpa); -d = foreach c generate $0, $1, (double)((int)$2*100)/100; +d = foreach c generate $0, $1, (double)(ROUND($2*100))/100; store d into ':OUTPATH:';\, }, { @@ -4015,6 +4048,12 @@ a = load ':INPATH:/singlefile/studenttab b = native ':MAPREDJARS:/hadoop-examples.jar' Store a into 'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' `wordcount table_testNativeMRJobSimple_input table_testNativeMRJobSimple_output`; store b into ':OUTPATH:';\, 'notmq' => 1, + 'verify_pig_script' => q\ +rmf table_testNativeMRJobSimple_input +rmf table_testNativeMRJobSimple_output +a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); +b = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store a into 'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' `wordcount table_testNativeMRJobSimple_input table_testNativeMRJobSimple_output`; +store b into ':OUTPATH:';\, }, { # test complex @@ -4030,6 +4069,15 @@ e = order d by name; store e into ':OUTPATH:';\, 'sortArgs' => ['-t', ' '], 'notmq' => 1, + 'verify_pig_script' => q\ +rmf table_testNativeMRJobSimple_input +rmf table_testNativeMRJobSimple_output +a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); +b = foreach a generate name; +c = distinct b; +d = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store c into 'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' as (name:chararray, count: int) `wordcount table_testNativeMRJobSimple_input table_testNativeMRJobSimple_output`; +e = order d by name; +store e into ':OUTPATH:';\, }, { # test streaming @@ -4044,7 +4092,7 @@ store b into ':OUTPATH:';\, rmf table_testNativeMRJobSimple_input rmf table_testNativeMRJobSimple_output a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); -b = mapreduce ':MAPREDJARS:/hadoop-0.23.0-streaming.jar' Store a into 'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' as (name:chararray, count: int) `-input table_testNativeMRJobSimple_input -output table_testNativeMRJobSimple_output -mapper /bin/cat -reducer /usr/bin/wc`; +b = mapreduce ':MAPREDJARS:/hadoop-0.23.0-streaming.jar' Store a into 'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' as (name:chararray, count: int) `-input table_testNativeMRJobSimple_input -output table_testNativeMRJobSimple_output -mapper cat -reducer wc`; store b into ':OUTPATH:';\, 'notmq' => 1, }, @@ -4719,7 +4767,7 @@ store C into ':OUTPATH:';\, # Simplest merge-sparse-join. { 'num' => 1, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studenttab10k'; b = load ':INPATH:/singlefile/votertab10k'; c = order a by $0; @@ -4740,7 +4788,7 @@ store C into ':OUTPATH:';\, # Merge-sparse-join with left-side filter { 'num' => 2, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studenttab10k'; b = load ':INPATH:/singlefile/votertab10k'; c = order a by $0; @@ -4763,7 +4811,7 @@ store C into ':OUTPATH:';\, # Merge-sparse-join with right-side filter { 'num' => 3, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studenttab10k'; b = load ':INPATH:/singlefile/votertab10k'; c = order a by $0; @@ -4787,7 +4835,7 @@ store C into ':OUTPATH:';\, # Merge-sparse-join with key as expression { 'num' => 4, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studenttab10k'; b = load ':INPATH:/singlefile/votertab10k'; c = order a by $0,$1; @@ -4808,7 +4856,7 @@ store C into ':OUTPATH:';\, # Merge-sparse-join with nulls in keys and data. { 'num' => 5, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studentnulltab10k'; b = load ':INPATH:/singlefile/voternulltab10k'; c = order a by $0; @@ -4829,7 +4877,7 @@ store C into ':OUTPATH:';\, # Merge-sparse-join with join on numeric key { 'num' => 6, - 'pig' => q\register :PIGGYBANKPATH:/piggybank.jar + 'pig' => q\register :PIGGYBANKJAR: a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); b = load ':INPATH:/singlefile/votertab10k'as (name:chararray, age:int, reg:chararray, contrib:float); c = order a by age;
Modified: pig/branches/spark/test/e2e/pig/tests/orc.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/orc.conf?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tests/orc.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/orc.conf Thu Nov 27 12:49:54 2014 @@ -14,16 +14,10 @@ $cfg = { 'num' => 1, 'notmq' => 1, 'pig' => q\ -register :HIVELIBDIR:/hive-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-serde-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-exec-:HIVEVERSION:-core.jar; -register :HIVELIBDIR:/hive-shims-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-common-secure-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-:HIVESHIMSVERSION:-:HIVEVERSION:.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); store a into ':OUTPATH:.intermediate' using OrcStorage(); +exec b = load ':OUTPATH:.intermediate' using OrcStorage(); -describe b; c = filter b by age < 30; store c into ':OUTPATH:';\, 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); @@ -35,14 +29,9 @@ store b into ':OUTPATH:';\, 'num' => 2, 'notmq' => 1, 'pig' => q\ -register :HIVELIBDIR:/hive-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-serde-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-exec-:HIVEVERSION:-core.jar; -register :HIVELIBDIR:/hive-shims-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-common-secure-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-:HIVESHIMSVERSION:-:HIVEVERSION:.jar; a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)}); store a into ':OUTPATH:.intermediate' using OrcStorage(); +exec b = load ':OUTPATH:.intermediate' using OrcStorage(); store b into ':OUTPATH:';\, 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)}); @@ -53,20 +42,15 @@ store a into ':OUTPATH:';\, { 'num' => 3, 'notmq' => 1, - 'pig' => q\register :HIVELIBDIR:/hive-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-serde-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-exec-:HIVEVERSION:-core.jar; -register :HIVELIBDIR:/hive-shims-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-common-secure-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-:HIVESHIMSVERSION:-:HIVEVERSION:.jar; + 'pig' => q\ a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:float); store a into ':OUTPATH:.simple.intermediate' using OrcStorage(); +exec b = load ':INPATH:/singlefile/studentcomplextab10k' as (nameagegpamap:map[], nameagegpatuple:tuple(tname:chararray, tage:int, tgpa:float), nameagegpabag:bag{t:tuple(bname:chararray, bage:int, bgpa:float)}, nameagegpamap_name:chararray, nameagegpamap_age:int, nameagegpamap_gpa:float); store b into ':OUTPATH:.complex.intermediate' using OrcStorage(); +exec c = load ':OUTPATH:.simple.intermediate' using OrcStorage(); -describe c; d = load ':OUTPATH:.complex.intermediate' using OrcStorage(); -describe d; e = foreach c generate name, age, gpa; f = foreach d generate nameagegpamap#'name' as name, nameagegpamap#'age' as age, nameagegpamap#'gpa' as gpa, nameagegpatuple.tage as tage, FLATTEN(nameagegpabag) as (bname, bage, bgpa); g = join e by name, f by name; @@ -89,21 +73,121 @@ store h into ':OUTPATH:';\, 'num' => 4, 'notmq' => 1, 'pig' => q\ -register :HIVELIBDIR:/hive-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-serde-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-exec-:HIVEVERSION:-core.jar; -register :HIVELIBDIR:/hive-shims-common-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-common-secure-:HIVEVERSION:.jar; -register :HIVELIBDIR:/hive-shims-:HIVESHIMSVERSION:-:HIVEVERSION:.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); store a into ':OUTPATH:.orc_params.intermediate' using OrcStorage('-c ZLIB -s 67108864 -r 100000 -b 1048576 -p true -v 0.12'); +exec b = load ':OUTPATH:.orc_params.intermediate' using OrcStorage(); store b into ':OUTPATH:';\, 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); store a into ':OUTPATH:';\, + }, +# Tests 5 : Test loading null map key + { + 'num' => 5, + 'notmq' => 1, + 'pig' => q\ +a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age); +b = foreach a generate TOMAP(name, age) as m; +store b into ':OUTPATH:.intermediate' using OrcStorage(); +exec +c = load ':OUTPATH:.intermediate' using OrcStorage(); +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age); +b = foreach a generate (name is null ? [] : TOMAP(name, age)); +store b into ':OUTPATH:';\, } ] - } + }, + { + 'name' => 'Orc_Pushdown', + 'tests' => [ +# Test 1: Load (primitive) from PigStorage and store into OrcStorage +# Also tests multiple load stores in same script + { + 'num' => 1, + 'notmq' => 1, + 'pig' => q\ +a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); +b = order a by name parallel 4; +store b into ':OUTPATH:.intermediate' using OrcStorage(); +exec +b = load ':OUTPATH:.intermediate' using OrcStorage(); +c = filter b by name < 'david falkner'; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); +b = filter a by name < 'david falkner'; +store b into ':OUTPATH:';\, + }, + { + 'num' => 2, + 'notmq' => 1, + 'execonly' => 'mapred,tez', # studenttab20m not available in local mode + 'pig' => q\ +a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:float); +b = order a by age desc parallel 4; +store b into ':OUTPATH:.intermediate' using OrcStorage('-s 10000000'); +exec +b = load ':OUTPATH:.intermediate' using OrcStorage(); +c = filter b by age <= 22; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:float); +b = filter a by age <= 22; +store b into ':OUTPATH:';\, + }, + { + 'num' => 3, + 'notmq' => 1, + 'pig' => q\ +a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); +b = order a by gpa parallel 4; +store b into ':OUTPATH:.intermediate' using OrcStorage(); +exec +b = load ':OUTPATH:.intermediate' using OrcStorage(); +c = filter b by gpa >= 3.2 and gpa < 3.5 and age > 30 + 2; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); +b = filter a by gpa >= 3.2 and gpa < 3.5 and age > 30 + 2; +store b into ':OUTPATH:';\, + }, + { + 'num' => 4, + 'notmq' => 1, + 'pig' => q\ +a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal); +b = order a by gpa parallel 4; +store b into ':OUTPATH:.intermediate' using OrcStorage(); +exec +b = load ':OUTPATH:.intermediate' using OrcStorage(); +c = filter b by gpa >= 3.5; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal); +b = filter a by gpa >= 3.5; +store b into ':OUTPATH:';\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + }, + { + 'num' => 5, + 'notmq' => 1, + 'pig' => q\ +a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double); +b = foreach a generate name, age, gpa, (age>35 ? ToDate('20100101', 'yyyyMMdd', 'UTC') : ToDate('20100105', 'yyyyMMdd', 'UTC')) as d; +c = order b by d parallel 4; +store c into ':OUTPATH:.intermediate' using OrcStorage(); +exec +b = load ':OUTPATH:.intermediate' using OrcStorage(); +c = filter b by d >= ToDate('20100103', 'yyyyMMdd', 'UTC'); +d = foreach c generate name, age, gpa; +store d into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:bigdecimal); +b = filter a by age<=35; +store b into ':OUTPATH:';\, + 'floatpostprocess' => 1, + 'delimiter' => ' ', + }, + ] + }, + ] }; Modified: pig/branches/spark/test/e2e/pig/tests/streaming.conf URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/streaming.conf?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/tests/streaming.conf (original) +++ pig/branches/spark/test/e2e/pig/tests/streaming.conf Thu Nov 27 12:49:54 2014 @@ -407,6 +407,15 @@ store B into ':OUTPATH:.intermediate'; C = stream B through CMD2 as (name, age, gpa); D = JOIN B by name, C by name; store D into ':OUTPATH:';#, + 'pig_win' => q# +define CMD1 `perl -ne "print $_;"`; +define CMD2 `perl -ne "print $_;"`; +A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); +B = stream A through CMD1 as (name, age, gpa); +store B into ':OUTPATH:.intermediate'; +C = stream B through CMD2 as (name, age, gpa); +D = JOIN B by name, C by name; +store D into ':OUTPATH:';#, 'notmq' => 1, 'sql' => "select A.name, A.age, A.gpa, B.name, B.age, B.gpa from studenttab10k as A join studenttab10k as B using(name);", }, @@ -422,6 +431,14 @@ B = stream A through CMD1; C = stream B through CMD1; D = stream C through CMD2; store D into ':OUTPATH:';#, + 'pig_win' => q# +define CMD1 `perl -ne "print $_;print STDERR "stderr $_";"`; +define CMD2 `Split.pl 3` input(stdin using PigStreaming(',')) ship(':SCRIPTHOMEPATH:/Split.pl'); +A = load ':INPATH:/singlefile//studenttab10k'; +B = stream A through CMD1; +C = stream B through CMD1; +D = stream C through CMD2; +store D into ':OUTPATH:';#, 'sql' => "select name, age, gpa from studenttab10k;", }, { @@ -438,6 +455,16 @@ C = stream B through CMD1; D = stream C through CMD2; E = JOIN B by $0, D by $0; store E into ':OUTPATH:';#, + 'pig_win' => q# +define CMD1 `perl -ne "print $_;"`; +define CMD2 `Split.pl 3` input(stdin using PigStreaming(',')) ship(':SCRIPTHOMEPATH:/Split.pl'); +A = load ':INPATH:/singlefile/studenttab10k'; +B = stream A through CMD1; +store B into ':OUTPATH:.intermediate'; +C = stream B through CMD1; +D = stream C through CMD2; +E = JOIN B by $0, D by $0; +store E into ':OUTPATH:';#, 'notmq' => 1, 'sql' => "select A.name, A.age, A.gpa, B.name, B.age, B.gpa from studenttab10k as A join studenttab10k as B using(name);", }, Modified: pig/branches/spark/test/e2e/pig/udfs/java/build.xml URL: http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/udfs/java/build.xml?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/e2e/pig/udfs/java/build.xml (original) +++ pig/branches/spark/test/e2e/pig/udfs/java/build.xml Thu Nov 27 12:49:54 2014 @@ -21,17 +21,26 @@ <property name="pig.jarfile" value="${lib.dir}/pig.jar" /> --> <property name="udf.src.dir" value="${basedir}/org/" /> - <path id="udf-classpath"> <!-- <fileset dir="lib"> <include name="**/*.jar"/> </fileset>--> - <fileset dir="${pig.dir}/build/ivy/lib/Pig"> + <fileset dir="${pig.base.dir}/build/ivy/lib/Pig" erroronmissingdir="false"> <include name="*.jar"/> </fileset> <fileset dir="${pig.dir}"> <include name="pig*-core-*.jar"/> </fileset> + <fileset dir="${hadoop.common.dir}" erroronmissingdir="false"> + <include name="hadoop-common*.jar"/> + </fileset> + <fileset dir="${hadoop.common.lib.dir}" erroronmissingdir="false"> + <include name="commons-codec*.jar"/> + <include name="commons-logging*.jar"/> + </fileset> + <fileset dir="${hadoop.mapreduce.dir}" erroronmissingdir="false"> + <include name="hadoop-mapreduce-client-core*.jar"/> + </fileset> </path> <target name="init"> Modified: pig/branches/spark/test/excluded-tests-20 URL: http://svn.apache.org/viewvc/pig/branches/spark/test/excluded-tests-20?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/excluded-tests-20 (original) +++ pig/branches/spark/test/excluded-tests-20 Thu Nov 27 12:49:54 2014 @@ -5,3 +5,4 @@ **/TestTezAutoParallelism.java **/TestJobSubmissionTez.java **/TestGroupConstParallelTez.java +**/TestLoaderStorerShipCacheFilesTez.java Modified: pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java (original) +++ pig/branches/spark/test/org/apache/pig/TestLoadStoreFuncLifeCycle.java Thu Nov 27 12:49:54 2014 @@ -351,7 +351,7 @@ public class TestLoadStoreFuncLifeCycle // result, the number of StoreFunc instances is greater by 1 in // Hadoop-2.0.x. assertTrue("storer instanciation count increasing: " + Storer.count, - Storer.count <= (Util.isHadoop2_0() ? 5 : 4)); + Storer.count <= (org.apache.pig.impl.util.Utils.isHadoop2() ? 5 : 4)); } Modified: pig/branches/spark/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java (original) +++ pig/branches/spark/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java Thu Nov 27 12:49:54 2014 @@ -93,6 +93,7 @@ public class TestAccumuloPigCluster { @AfterClass public static void stopClusters() throws Exception { accumuloCluster.stop(); + Thread.sleep(5000); FileUtils.deleteDirectory(tmpdir); } Modified: pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java (original) +++ pig/branches/spark/test/org/apache/pig/builtin/TestAvroStorage.java Thu Nov 27 12:49:54 2014 @@ -94,6 +94,7 @@ public class TestAvroStorage { "recordsAsOutputByPigWithDates", "records", "recordsOfArrays", + "recordsOfStringArrays", "recordsOfArraysOfRecords", "recordsSubSchema", "recordsSubSchemaNullable", @@ -323,6 +324,18 @@ public class TestAvroStorage { return outbasedir + st[index].getMethodName(); } + @Test public void testLoadRecordsOfStringArrays() throws Exception { + final String input = basedir + "data/avro/uncompressed/recordsOfStringArrays.avro"; + final String check = input; + testAvroStorage(true, basedir + "code/pig/dump.pig", + ImmutableMap.of( + "INFILE", input, + "AVROSTORAGE_OUT_2", "-f " + basedir + "schema/recordsOfStringArrays.avsc", + "OUTFILE", createOutputName()) + ); + } + + @Test public void testLoadRecords() throws Exception { final String input = basedir + "data/avro/uncompressed/records.avro"; final String check = basedir + "data/avro/uncompressed/recordsAsOutputByPig.avro"; Modified: pig/branches/spark/test/org/apache/pig/builtin/TestOrcStorage.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/builtin/TestOrcStorage.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/builtin/TestOrcStorage.java (original) +++ pig/branches/spark/test/org/apache/pig/builtin/TestOrcStorage.java Thu Nov 27 12:49:54 2014 @@ -57,6 +57,7 @@ import org.apache.pig.backend.executione import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; import org.apache.pig.data.BinSedesTuple; import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.DataType; import org.apache.pig.data.DefaultDataBag; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.schema.Schema; @@ -153,6 +154,38 @@ public class TestOrcStorage { } @Test + // See PIG-4195 + public void testCharVarchar() throws Exception { + pigServer.registerQuery("A = load '" + basedir + "charvarchar.orc'" + " using OrcStorage();" ); + Schema schema = pigServer.dumpSchema("A"); + assertEquals(schema.size(), 4); + assertEquals(schema.getField(0).type, DataType.CHARARRAY); + assertEquals(schema.getField(1).type, DataType.CHARARRAY); + Iterator<Tuple> iter = pigServer.openIterator("A"); + int count=0; + Tuple t=null; + while (iter.hasNext()) { + t = iter.next(); + assertEquals(t.size(), 4); + assertTrue(t.get(0) instanceof String); + assertTrue(t.get(1) instanceof String); + assertEquals(((String)t.get(1)).length(), 20); + count++; + } + assertEquals(count, 10000); + } + + @Test + // See PIG-4218 + public void testNullMapKey() throws Exception { + pigServer.registerQuery("A = load '" + basedir + "nullmapkey.orc'" + " using OrcStorage();" ); + Iterator<Tuple> iter = pigServer.openIterator("A"); + assertEquals(iter.next().toString(), "([hello#world])"); + assertEquals(iter.next().toString(), "([])"); + assertFalse(iter.hasNext()); + } + + @Test public void testSimpleStore() throws Exception { pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);"); pigServer.store("A", OUTPUT1, "OrcStorage"); @@ -197,6 +230,7 @@ public class TestOrcStorage { assertEquals(t.get(1), "hello"); assertFalse(iter.hasNext()); + rows.close(); } @Test @@ -315,6 +349,8 @@ public class TestOrcStorage { assertEquals(expectedRows, actualRows); assertEquals(expectedTotalRows, actualRows); + readerExpected.close(); + readerActual.close(); } @SuppressWarnings("rawtypes") Modified: pig/branches/spark/test/org/apache/pig/builtin/TestOrcStoragePushdown.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/builtin/TestOrcStoragePushdown.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/builtin/TestOrcStoragePushdown.java (original) +++ pig/branches/spark/test/org/apache/pig/builtin/TestOrcStoragePushdown.java Thu Nov 27 12:49:54 2014 @@ -22,14 +22,18 @@ import static org.junit.Assert.assertTru import java.io.BufferedWriter; import java.io.File; +import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.io.OutputStreamWriter; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import org.apache.commons.lang.RandomStringUtils; +import org.apache.commons.lang.math.RandomUtils; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.log4j.FileAppender; import org.apache.log4j.Level; @@ -51,6 +55,8 @@ import org.apache.pig.newplan.logical.ru import org.apache.pig.test.MiniGenericCluster; import org.apache.pig.test.Util; import org.apache.pig.tools.pigstats.JobStats; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -68,8 +74,8 @@ public class TestOrcStoragePushdown { private OrcStorage orcStorage; private static final String basedir = "test/org/apache/pig/builtin/orc/"; - private static final String inpbasedir = System.getProperty("user.dir") + "/build/test/TestOrcStorage_in/"; - private static final String outbasedir = System.getProperty("user.dir") + "/build/test/TestOrcStorage_out/"; + private static final String inpbasedir = "build/test/TestOrcStorage_in/"; + private static final String outbasedir = "build/test/TestOrcStorage_out/"; private static String INPUT = inpbasedir + "TestOrcStorage_1"; private static String OUTPUT1 = outbasedir + "TestOrcStorage_1"; private static String OUTPUT2 = outbasedir + "TestOrcStorage_2"; @@ -82,6 +88,7 @@ public class TestOrcStoragePushdown { public static void oneTimeSetup() throws Exception{ cluster = MiniGenericCluster.buildCluster(); Util.copyFromLocalToCluster(cluster, basedir + "orc-file-11-format.orc", basedir + "orc-file-11-format.orc"); + Util.copyFromLocalToCluster(cluster, basedir + "charvarchar.orc", basedir + "charvarchar.orc"); createInputData(); if(Util.WINDOWS){ @@ -120,31 +127,47 @@ public class TestOrcStoragePushdown { new File(inpbasedir).mkdirs(); new File(outbasedir).mkdirs(); String inputTxtFile = inpbasedir + File.separator + "input.txt"; - BufferedWriter bw = new BufferedWriter(new FileWriter(inputTxtFile)); + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inputTxtFile), "UTF-8")); long[] lVal = new long[] {100L, 200L, 300L}; float[] fVal = new float[] {50.0f, 100.0f, 200.0f, 300.0f}; double[] dVal = new double[] {1000.11, 2000.22, 3000.33}; StringBuilder sb = new StringBuilder(); for (int i=1; i <= 10000; i++) { - sb.append((i > 900 && i < 1100) ? true : false).append("\t"); //boolean + sb.append((i > 6500 && i <= 9000) ? true : false).append("\t"); //boolean sb.append((i > 1000 && i < 3000) ? 1 : 5).append("\t"); //byte sb.append((i > 2500 && i <= 4500) ? 100 : 200).append("\t"); //short sb.append(i).append("\t"); //int sb.append(lVal[i%3]).append("\t"); //long sb.append(fVal[i%4]).append("\t"); //float sb.append((i > 2500 && i < 3500) ? dVal[i%3] : dVal[i%1]).append("\t"); //double - sb.append((i%2 == 1 ? "" : RandomStringUtils.random(100))).append("\t"); //bytearray - sb.append((i%2 == 0 ? "" : RandomStringUtils.random(100))).append("\n"); //string - //sb.append("").append("\t"); //datetime - //sb.append("").append("\n"); //bigdecimal + sb.append((i%2 == 1 ? "" : RandomStringUtils.random(100).replaceAll("\t", " ") + .replaceAll("\n", " ").replaceAll("\r", " "))).append("\t"); //bytearray + sb.append((i%2 == 0 ? "" : RandomStringUtils.random(100).replaceAll("\t", " ") + .replaceAll("\n", " ").replaceAll("\r", " "))).append("\t"); //string + int year; + if (i > 5000 && i <= 8000) { //datetime + year = RandomUtils.nextInt(4)+2010; + } else { + year = RandomUtils.nextInt(10)+2000; + } + sb.append(new DateTime(year, RandomUtils.nextInt(12)+1, + RandomUtils.nextInt(28)+1, RandomUtils.nextInt(24), RandomUtils.nextInt(60), + DateTimeZone.UTC).toString()).append("\t"); // datetime + String bigString; + if (i>7500) { + bigString = RandomStringUtils.randomNumeric(9) + "." + RandomStringUtils.randomNumeric(5); + } else { + bigString = "1" + RandomStringUtils.randomNumeric(9) + "." + RandomStringUtils.randomNumeric(5); + } + sb.append(new BigDecimal(bigString)).append("\n"); //bigdecimal bw.write(sb.toString()); sb.setLength(0); } bw.close(); // Store only 1000 rows in each row block (MIN_ROW_INDEX_STRIDE is 1000. So can't use less than that) - pigServer.registerQuery("A = load '" + inputTxtFile + "' as (f1:boolean, f2:int, f3:int, f4:int, f5:long, f6:float, f7:double, f8:bytearray, f9:chararray);");//, f10:datetime, f11:bigdecimal);"); - pigServer.registerQuery("store A into '" + INPUT +"' using OrcStorage('-r 1000');"); + pigServer.registerQuery("A = load '" + Util.generateURI(inputTxtFile, pigServer.getPigContext()) + "' as (f1:boolean, f2:int, f3:int, f4:int, f5:long, f6:float, f7:double, f8:bytearray, f9:chararray, f10:datetime, f11:bigdecimal);"); + pigServer.registerQuery("store A into '" + Util.generateURI(INPUT, pigServer.getPigContext()) +"' using OrcStorage('-r 1000 -s 100000');"); Util.copyFromLocalToCluster(cluster, INPUT, INPUT); } @@ -290,35 +313,34 @@ public class TestOrcStoragePushdown { "expr = leaf-0", sarg.toString()); } - //@Test + @Test public void testPredicatePushdownBoolean() throws Exception { - testPredicatePushdownLocal("f1 == true", 10); + testPredicatePushdown(INPUT, "f1 == true", 2500, 1200000); } @Test public void testPredicatePushdownByteShort() throws Exception { - //TODO: BytesWithoutPushdown was 2373190 and bytesWithPushdown was 1929669 - // Expected to see more difference only when 3 out of 10 blocks are read. Other tests too. - // Investigate why. - testPredicatePushdown("f2 != 5 or f3 == 100", 3500, 400000); + testPredicatePushdown(INPUT, "f2 != 5 or f3 == 100", 3500, 1200000); } @Test public void testPredicatePushdownIntLongString() throws Exception { - testPredicatePushdown("f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 800000); + testPredicatePushdown(INPUT, "f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 1200000); } @Test public void testPredicatePushdownFloatDouble() throws Exception { - testPredicatePushdown("f6 == 100.0 and f7 > 2000.00000001", 167, 800000); + testPredicatePushdown(INPUT, "f6 == 100.0 and f7 > 2000.00000001", 167, 1600000); } - //@Test + @Test public void testPredicatePushdownBigDecimal() throws Exception { + testPredicatePushdown(INPUT, "f11 < (bigdecimal)'1000000000';", 2500, 1600000); } - //@Test + @Test public void testPredicatePushdownTimestamp() throws Exception { + testPredicatePushdown(INPUT, "f10 >= ToDate('20100101', 'yyyyMMdd', 'UTC')", 3000, 400000); } private Expression getExpressionForTest(String query, List<String> predicateCols) throws Exception { @@ -350,7 +372,7 @@ public class TestOrcStoragePushdown { Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows); } - private void testPredicatePushdown(String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException { + private void testPredicatePushdown(String inputFile, String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException { Util.resetStateForExecModeSwitch(); // Minicluster is required to get hdfs bytes read counter value @@ -362,7 +384,7 @@ public class TestOrcStoragePushdown { disabledOptimizerRules.add("PredicatePushdownOptimizer"); pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY, ObjectSerializer.serialize(disabledOptimizerRules)); - pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();"); + pigServer_disabledRule.registerQuery("B = load '" + inputFile + "' using OrcStorage();"); pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";"); ExecJob job = pigServer_disabledRule.store("C", OUTPUT3); //Util.copyFromClusterToLocal(cluster, OUTPUT3 + "/part-m-00000", OUTPUT3); @@ -371,7 +393,7 @@ public class TestOrcStoragePushdown { long bytesWithoutPushdown = stats.getHdfsBytesRead(); // Test with PredicatePushdownOptimizer enabled. Only 2 blocks should be read - pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();"); + pigServer.registerQuery("D = load '" + inputFile + "' using OrcStorage();"); pigServer.registerQuery("E = filter D by " + filterStmt + ";"); job = pigServer.store("E", OUTPUT4); //Util.copyFromClusterToLocal(cluster, OUTPUT4 + "/part-m-00000", OUTPUT4); @@ -390,6 +412,13 @@ public class TestOrcStoragePushdown { } + @Test + public void testPredicatePushdownChar() throws Exception { + testPredicatePushdown(basedir + "charvarchar.orc", "$0 == 'ulysses thompson'", 18, 18000); + } - + @Test + public void testPredicatePushdownVarchar() throws Exception { + testPredicatePushdown(basedir + "charvarchar.orc", "$1 == 'alice allen '", 19, 18000); + } } Modified: pig/branches/spark/test/org/apache/pig/data/TestSchemaTuple.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/data/TestSchemaTuple.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/data/TestSchemaTuple.java (original) +++ pig/branches/spark/test/org/apache/pig/data/TestSchemaTuple.java Thu Nov 27 12:49:54 2014 @@ -78,7 +78,7 @@ public class TestSchemaTuple { @Before public void perTestInitialize() { props = new Properties(); - props.setProperty(PigConfiguration.SHOULD_USE_SCHEMA_TUPLE, "true"); + props.setProperty(PigConfiguration.PIG_SCHEMA_TUPLE_ENABLED, "true"); conf = ConfigurationUtil.toConfiguration(props); Modified: pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java (original) +++ pig/branches/spark/test/org/apache/pig/impl/builtin/TestStreamingUDF.java Thu Nov 27 12:49:54 2014 @@ -35,7 +35,9 @@ import org.apache.pig.data.TupleFactory; import org.apache.pig.test.MiniGenericCluster; import org.apache.pig.test.Util; import org.joda.time.DateTime; +import org.junit.AfterClass; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; public class TestStreamingUDF { @@ -43,7 +45,17 @@ public class TestStreamingUDF { private static PigServer pigServerMapReduce = null; private TupleFactory tf = TupleFactory.getInstance(); - private static MiniGenericCluster cluster = MiniGenericCluster.buildCluster(); + private static MiniGenericCluster cluster; + + @BeforeClass + public static void oneTimeSetup() { + cluster = MiniGenericCluster.buildCluster(); + } + + @AfterClass + public static void oneTimeTearDown() throws Exception { + cluster.shutDown(); + } @Before public void setUp() throws Exception { Modified: pig/branches/spark/test/org/apache/pig/parser/TestQueryParserUtils.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/parser/TestQueryParserUtils.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/parser/TestQueryParserUtils.java (original) +++ pig/branches/spark/test/org/apache/pig/parser/TestQueryParserUtils.java Thu Nov 27 12:49:54 2014 @@ -72,7 +72,7 @@ public class TestQueryParserUtils { QueryParserUtils.setHdfsServers("hello://nn1/tmp", pc); assertEquals(null, props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); - if(Util.isHadoop23() || Util.isHadoop2_0()) { + if(org.apache.pig.impl.util.Utils.isHadoop23() || org.apache.pig.impl.util.Utils.isHadoop2()) { // webhdfs props.remove(MRConfiguration.JOB_HDFS_SERVERS); QueryParserUtils.setHdfsServers("webhdfs://nn1/tmp", pc); Modified: pig/branches/spark/test/org/apache/pig/test/TestAccumulator.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestAccumulator.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestAccumulator.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestAccumulator.java Thu Nov 27 12:49:54 2014 @@ -77,7 +77,7 @@ public class TestAccumulator { public void setUp() throws Exception { Util.resetStateForExecModeSwitch(); // Drop stale configuration from previous test run - properties.remove(PigConfiguration.OPT_ACCUMULATOR); + properties.remove(PigConfiguration.PIG_OPT_ACCUMULATOR); pigServer = new PigServer(cluster.getExecType(), properties); } @@ -632,7 +632,7 @@ public class TestAccumulator { @Test public void testAccumulatorOff() throws IOException{ pigServer.getPigContext().getProperties().setProperty( - PigConfiguration.OPT_ACCUMULATOR, "false"); + PigConfiguration.PIG_OPT_ACCUMULATOR, "false"); pigServer.registerQuery("A = load '" + INPUT_FILE2 + "' as (id:int, fruit);"); pigServer.registerQuery("B = group A by id;"); @@ -641,7 +641,7 @@ public class TestAccumulator { checkAccumulatorOff("C"); pigServer.getPigContext().getProperties().setProperty( - PigConfiguration.OPT_ACCUMULATOR, "true"); + PigConfiguration.PIG_OPT_ACCUMULATOR, "true"); } Modified: pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEval.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEval.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEval.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEval.java Thu Nov 27 12:49:54 2014 @@ -63,7 +63,7 @@ public class TestAlgebraicEval { public void testGroupCountWithMultipleFields() throws Throwable { File tmpFile = File.createTempFile("test", "txt"); for (int k = 0; k < nullFlags.length; k++) { - System.err.println("Running testGroupCountWithMultipleFields with nullFlags set to " + nullFlags[k]); + System.out.println("Running testGroupCountWithMultipleFields with nullFlags set to " + nullFlags[k]); // flag to indicate if both the keys forming // the group key are null int groupKeyWithNulls = 0; Modified: pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEvalLocal.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEvalLocal.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEvalLocal.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestAlgebraicEvalLocal.java Thu Nov 27 12:49:54 2014 @@ -92,7 +92,7 @@ public class TestAlgebraicEvalLocal { ps.close(); } pig.registerQuery(" a = group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') by ($0,$1);"); pig.registerQuery("b = foreach a generate flatten(group), SUM($1.$2);"); Iterator<Tuple> it = pig.openIterator("b"); @@ -141,7 +141,7 @@ public class TestAlgebraicEvalLocal { PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); generateInput(ps, nullFlags[i]); String query = "myid = foreach (group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') all) generate COUNT($1);"; System.out.println(query); pig.registerQuery(query); @@ -163,7 +163,7 @@ public class TestAlgebraicEvalLocal { PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); generateInput(ps, nullFlags[i]); String query = "myid = foreach (group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') all) generate group, COUNT($1) ;"; System.out.println(query); pig.registerQuery(query); @@ -184,7 +184,7 @@ public class TestAlgebraicEvalLocal { PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); generateInput(ps, nullFlags[i]); String query = "myid = foreach (group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "') all) generate COUNT($1), group ;"; System.out.println(query); pig.registerQuery(query); @@ -227,7 +227,7 @@ public class TestAlgebraicEvalLocal { } ps.close(); String query = "myid = foreach (group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + PigStorage.class.getName() + "(':')) by $0) generate group, COUNT($1.$1) ;"; System.out.println(query); @@ -278,7 +278,7 @@ public class TestAlgebraicEvalLocal { } ps.close(); String query = "myid = foreach (group (load '" - + Util.generateURI(Util.encodeEscape(tmpFile.toString()), pig.getPigContext()) + + Util.generateURI(tmpFile.toString(), pig.getPigContext()) + "' using " + PigStorage.class.getName() + "(':')) by $0) generate group, COUNT($1.$1), COUNT($1.$0) ;"; @@ -325,4 +325,4 @@ public class TestAlgebraicEvalLocal { ps.close(); return numNulls; } -} \ No newline at end of file +} Modified: pig/branches/spark/test/org/apache/pig/test/TestAutoLocalMode.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestAutoLocalMode.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestAutoLocalMode.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestAutoLocalMode.java Thu Nov 27 12:49:54 2014 @@ -95,7 +95,7 @@ public class TestAutoLocalMode { @Before public void setUp() throws Exception{ pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); - pigServer.getPigContext().getExecutionEngine().setProperty(PigConfiguration.OPT_FETCH, "false"); + pigServer.getPigContext().getExecutionEngine().setProperty(PigConfiguration.PIG_OPT_FETCH, "false"); pigServer.getPigContext().getExecutionEngine().setProperty(PigConfiguration.PIG_AUTO_LOCAL_ENABLED, String.valueOf("true")); pigServer.getPigContext().getExecutionEngine().setProperty(PigConfiguration.PIG_AUTO_LOCAL_INPUT_MAXBYTES, "200"); @@ -120,7 +120,7 @@ public class TestAutoLocalMode { @Test public void testSmallJob() throws IOException { pigServer.registerQuery("A = LOAD '" - + Util.generateURI(Util.encodeEscape(smallFileName), pigServer + + Util.generateURI(smallFileName, pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("B = filter A by 1 == 0;"); pigServer.openIterator("B"); @@ -131,7 +131,7 @@ public class TestAutoLocalMode { @Test public void testBigJob() throws IOException { pigServer.registerQuery("A = LOAD '" - + Util.generateURI(Util.encodeEscape(bigFileName), pigServer + + Util.generateURI(bigFileName, pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("B = filter A by 1 == 0;"); pigServer.openIterator("B"); @@ -142,10 +142,10 @@ public class TestAutoLocalMode { @Test public void testReplicatedJoin() throws IOException { pigServer.registerQuery("A1 = LOAD '" - + Util.generateURI(Util.encodeEscape(smallFileName), pigServer + + Util.generateURI(smallFileName, pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("A2 = LOAD '" - + Util.generateURI(Util.encodeEscape(miniFileName), pigServer + + Util.generateURI(miniFileName, pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("A = join A1 by num, A2 by num using 'replicated';"); pigServer.registerQuery("B = filter A by 1 == 0;"); @@ -157,7 +157,7 @@ public class TestAutoLocalMode { @Test public void testOrderBy() throws IOException { pigServer.registerQuery("A1 = LOAD '" - + Util.generateURI(Util.encodeEscape(bigFileName), pigServer + + Util.generateURI(bigFileName, pigServer .getPigContext()) + "' AS (num:int);"); pigServer.registerQuery("A = filter A1 by num == 1;"); pigServer.registerQuery("B = order A by num;"); Modified: pig/branches/spark/test/org/apache/pig/test/TestBZip.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestBZip.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestBZip.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestBZip.java Thu Nov 27 12:49:54 2014 @@ -91,7 +91,7 @@ public class TestBZip { cos.close(); pig.registerQuery("AA = load '" - + Util.generateURI(Util.encodeEscape(in.getAbsolutePath()), pig.getPigContext()) + + Util.generateURI(in.getAbsolutePath(), pig.getPigContext()) + "';"); pig.registerQuery("A = foreach (group (filter AA by $0 > 0) all) generate flatten($1);"); pig.registerQuery("store A into '" + Util.encodeEscape(clusterOutput) + "';"); @@ -318,7 +318,7 @@ public class TestBZip { System.out.println(in.getAbsolutePath()); pig.registerQuery("AA = load '" - + Util.generateURI(Util.encodeEscape(in.getAbsolutePath()), pig.getPigContext()) + + Util.generateURI(in.getAbsolutePath(), pig.getPigContext()) + "';"); pig.registerQuery("A=foreach (group (filter AA by $0 < '0') all) generate flatten($1);"); pig.registerQuery("store A into '" + Util.encodeEscape(clusterOutputFilePath) + "';"); Modified: pig/branches/spark/test/org/apache/pig/test/TestBatchAliases.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestBatchAliases.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestBatchAliases.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestBatchAliases.java Thu Nov 27 12:49:54 2014 @@ -38,7 +38,7 @@ public class TestBatchAliases { @Before public void setUp() throws Exception { - System.setProperty(PigConfiguration.OPT_MULTIQUERY, ""+true); + System.setProperty(PigConfiguration.PIG_OPT_MULTIQUERY, ""+true); myPig = new PigServer(ExecType.LOCAL, new Properties()); deleteOutputFiles(); } Modified: pig/branches/spark/test/org/apache/pig/test/TestBlackAndWhitelistValidator.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestBlackAndWhitelistValidator.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestBlackAndWhitelistValidator.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestBlackAndWhitelistValidator.java Thu Nov 27 12:49:54 2014 @@ -44,6 +44,7 @@ import org.apache.pig.newplan.logical.re import org.apache.pig.newplan.logical.rules.LogicalRelationalNodeValidator; import org.apache.pig.parser.QueryParser; import org.apache.pig.tools.grunt.GruntParser; +import org.apache.pig.tools.parameters.PreprocessorContext; import org.apache.pig.validator.BlackAndWhitelistFilter; import org.apache.pig.validator.BlackAndWhitelistValidator; import org.junit.Before; @@ -95,6 +96,192 @@ public class TestBlackAndWhitelistValida } /** + * A few commands such as DECLARE, DEFAULT go via + * {@link PreprocessorContext}. This step basically parses commands and + * substitutes parameters. The parameters can be evaluated using shell + * commands, which need to validated if specified in the white or blacklist. + * This test handles that scenario + * + * @throws Exception + */ + @Test + public void testPreprocessorCommands() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEclAre"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("set io.sort.mb 1000;") + .append("%declare X `echo`; ") + .append("%default input 'foo';") + .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor + // within RuntimeException + Util.assertExceptionAndMessage(RuntimeException.class, e, + "DECLARE command is not permitted. "); + } + } + + @Test + public void testPreprocessorCommands2() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEfaUlt"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("set io.sort.mb 1000;") + .append("%Default input 'foo';") + .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor + // within RuntimeException + Util.assertExceptionAndMessage(RuntimeException.class, e, + "DEFAULT command is not permitted. "); + } + } + + @Test + public void testPreprocessorCommand3() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "Define"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("set io.sort.mb 1000;") + .append("DEFINE UrlDecode InvokeForString('java.net.URLDecoder.decode', 'String String'); ") + .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + Util.assertExceptionAndMessage(FrontendException.class, e, + "Error during parsing. DEFINE command is not permitted. "); + } + } + + @Test + public void testExplain() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "explain"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("EXPLAIN B;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + Util.assertExceptionAndMessage(FrontendException.class, e, + "EXPLAIN command is not permitted. "); + } + } + + @Test + public void testExec() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "exec"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("exec evil.pig;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + Util.assertExceptionAndMessage(FrontendException.class, e, + "EXEC command is not permitted. "); + } + } + + @Test + public void testRun() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "run"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("run evil.pig;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + Util.assertExceptionAndMessage(FrontendException.class, e, + "RUN command is not permitted. "); + } + } + + @Test + public void testImport() throws Exception { + try { + ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "import"); + PigServer pigServer = new PigServer(ctx); + Data data = resetData(pigServer); + + data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), + tuple("c", 3, "d")); + + StringBuilder script = new StringBuilder(); + script.append("import 'piggybank.jar';") + .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") + .append("B = order A by f1,f2,f3 DESC;") + .append("run evil.pig;") + .append("STORE B INTO 'bar' USING mock.Storage();"); + + pigServer.registerScript(IOUtils.toInputStream(script)); + fail(); + } catch (Exception e) { + Util.assertExceptionAndMessage(FrontendException.class, e, + "Error during parsing. IMPORT command is not permitted. "); + } + } + + /** * Tests {@link BlackAndWhitelistValidator}. The logical plan generated * contains a filter, and the test must throw a {@link FrontendException} as * we set "filter" in the blacklist @@ -282,4 +469,4 @@ public class TestBlackAndWhitelistValida private File generateTmpFile(String filename) throws Exception { return Util.createTempFileDelOnExit(filename, ".txt"); } -} +} \ No newline at end of file Modified: pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestBuiltin.java Thu Nov 27 12:49:54 2014 @@ -340,7 +340,6 @@ public class TestBuiltin { inputMap.put("String", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), stringInput)); inputMap.put("DateTime", Util.loadNestTuple(TupleFactory.getInstance().newTuple(1), datetimeInput)); - DateTimeZone.setDefault(DateTimeZone.forOffsetMillis(DateTimeZone.UTC.getOffset(null))); } @BeforeClass @@ -397,31 +396,31 @@ public class TestBuiltin { Tuple t1 = TupleFactory.getInstance().newTuple(1); t1.set(0, 1231290421000L); DateTime dt1 = func1.exec(t1); - assertEquals(dt1, new DateTime("2009-01-07T01:07:01.000Z")); + assertEquals(dt1.compareTo(new DateTime("2009-01-07T01:07:01.000Z")), 0); ToDateISO func2 = new ToDateISO(); Tuple t2 = TupleFactory.getInstance().newTuple(1); t2.set(0, "2009-01-07T01:07:01.000Z"); DateTime dt2 = func2.exec(t2); - assertEquals(dt2, new DateTime("2009-01-07T01:07:01.000Z")); + assertEquals(dt2.compareTo(new DateTime("2009-01-07T01:07:01.000Z")), 0); Tuple t3 = TupleFactory.getInstance().newTuple(1); t3.set(0, "2009-01-07T01:07:01.000+08:00"); DateTime dt3 = func2.exec(t3); - assertEquals(dt3, new DateTime("2009-01-07T01:07:01.000+08:00", DateTimeZone.forID("+08:00"))); + assertEquals(dt3.compareTo(new DateTime("2009-01-07T01:07:01.000+08:00", DateTimeZone.forID("+08:00"))), 0); ToDate2ARGS func3 = new ToDate2ARGS(); Tuple t4 = TupleFactory.getInstance().newTuple(2); t4.set(0, "2009.01.07 AD at 01:07:01"); t4.set(1, "yyyy.MM.dd G 'at' HH:mm:ss"); DateTime dt4 = func3.exec(t4); - assertEquals(dt4, new DateTime("2009-01-07T01:07:01.000Z")); + assertEquals(dt4.compareTo(new DateTime("2009-01-07T01:07:01.000")), 0); Tuple t5 = TupleFactory.getInstance().newTuple(2); t5.set(0, "2009.01.07 AD at 01:07:01 +0800"); t5.set(1, "yyyy.MM.dd G 'at' HH:mm:ss Z"); DateTime dt5 = func3.exec(t5); - assertEquals(dt5, new DateTime("2009-01-07T01:07:01.000+08:00")); + assertEquals(dt5.compareTo(new DateTime("2009-01-07T01:07:01.000+08:00")), 0); ToDate3ARGS func4 = new ToDate3ARGS(); Tuple t6 = TupleFactory.getInstance().newTuple(3); @@ -429,14 +428,14 @@ public class TestBuiltin { t6.set(1, "yyyy.MM.dd G 'at' HH:mm:ss"); t6.set(2, "+00:00"); DateTime dt6 = func4.exec(t6); - assertEquals(dt6, new DateTime("2009-01-07T01:07:01.000Z", DateTimeZone.forID("+00:00"))); + assertEquals(dt6.compareTo(new DateTime("2009-01-07T01:07:01.000", DateTimeZone.forID("+00:00"))), 0); Tuple t7 = TupleFactory.getInstance().newTuple(3); t7.set(0, "2009.01.07 AD at 01:07:01 +0800"); t7.set(1, "yyyy.MM.dd G 'at' HH:mm:ss Z"); t7.set(2, "Asia/Singapore"); DateTime dt7 = func4.exec(t7); - assertEquals(dt7, new DateTime("2009-01-07T01:07:01.000+08:00", DateTimeZone.forID("+08:00"))); + assertEquals(dt7.compareTo(new DateTime("2009-01-07T01:07:01.000+08:00", DateTimeZone.forID("+08:00"))), 0); ToUnixTime func5 = new ToUnixTime(); Tuple t8 = TupleFactory.getInstance().newTuple(1); @@ -447,17 +446,17 @@ public class TestBuiltin { ToString func6 = new ToString(); Tuple t9 = TupleFactory.getInstance().newTuple(1); - t9.set(0, new DateTime("2009-01-07T01:07:01.000Z")); + t9.set(0, ToDate.extractDateTime("2009-01-07T01:07:01.000Z")); String dtStr1 = func6.exec(t9); assertEquals(dtStr1, "2009-01-07T01:07:01.000Z"); Tuple t10 = TupleFactory.getInstance().newTuple(1); - t10.set(0, new DateTime("2009-01-07T09:07:01.000+08:00")); + t10.set(0, new DateTime("2009-01-07T09:07:01.000+08:00", DateTimeZone.UTC)); String dtStr2 = func6.exec(t10); assertEquals(dtStr2, "2009-01-07T01:07:01.000Z"); Tuple t11 = TupleFactory.getInstance().newTuple(2); - t11.set(0, new DateTime("2009-01-07T01:07:01.000Z")); + t11.set(0, ToDate.extractDateTime("2009-01-07T01:07:01.000Z")); t11.set(1, "yyyy.MM.dd G 'at' HH:mm:ss"); String dtStr3 = func6.exec(t11); assertEquals(dtStr3, "2009.01.07 AD at 01:07:01"); @@ -2731,14 +2730,16 @@ public class TestBuiltin { pigServer.registerQuery("=> load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "' as (name: chararray);"); pigServer.registerQuery("B = foreach @ generate SUBSTRING(name, 0, 3), " + "INDEXOF(name, 'a'), INDEXOF(name, 'a', 3), LAST_INDEX_OF(name, 'a'), REPLACE(name, 'a', 'b'), " + - "STRSPLIT(name), STRSPLIT(name, ' '), STRSPLIT(name, ' ', 0), TRIM(name);"); + "STRSPLIT(name), STRSPLIT(name, ' '), STRSPLIT(name, ' ', 0), STRSPLITTOBAG(name), STRSPLITTOBAG(name,' ')" + + ", STRSPLITTOBAG(name,' ',0), TRIM(name);"); Iterator<Tuple> it = pigServer.openIterator("B"); assertTrue(it.hasNext()); Tuple t = it.next(); Tuple expected = Util.buildTuple("amy", "smith"); + DataBag expectedBag = Util.createBag(new Tuple[]{Util.buildTuple("amy"), Util.buildTuple("smith")}); assertTrue(!it.hasNext()); - assertEquals(9, t.size()); + assertEquals(12, t.size()); assertEquals("amy", t.get(0)); assertEquals(0, t.get(1)); assertEquals(-1, t.get(2)); @@ -2747,7 +2748,10 @@ public class TestBuiltin { assertEquals(expected, t.get(5)); assertEquals(expected, t.get(6)); assertEquals(expected, t.get(7)); - assertEquals("amy smith", t.get(8)); + assertEquals(expectedBag, t.get(8)); + assertEquals(expectedBag, t.get(9)); + assertEquals(expectedBag, t.get(10)); + assertEquals("amy smith", t.get(11)); // test untyped data pigServer.registerQuery("=> load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "' as (name);"); @@ -2849,6 +2853,8 @@ public class TestBuiltin { public void testURIwithCurlyBrace() throws Exception { String inputFileName = "input.txt"; String inputFileName1 ="part-1"; + Util.deleteFile(pigServer.getPigContext(), inputFileName); + Util.deleteFile(pigServer.getPigContext(), inputFileName1); String[] inputData = new String[] { "1", "a", @@ -3050,9 +3056,9 @@ public class TestBuiltin { @Test public void testGetDateTimeField() throws Exception { Tuple t1 = TupleFactory.getInstance().newTuple(1); - t1.set(0, new DateTime("2010-04-15T08:11:33.020Z")); + t1.set(0, ToDate.extractDateTime("2010-04-15T08:11:33.020Z")); Tuple t2 = TupleFactory.getInstance().newTuple(1); - t2.set(0, new DateTime("2010-04-15T08:11:33.020+08:00")); + t2.set(0, ToDate.extractDateTime("2010-04-15T08:11:33.020+08:00")); GetYear func1 = new GetYear(); Integer year = func1.exec(t1); @@ -3076,7 +3082,7 @@ public class TestBuiltin { Integer hour = func4.exec(t1); assertEquals(hour.intValue(), 8); hour = func4.exec(t2); - assertEquals(hour.intValue(), 0); + assertEquals(hour.intValue(), 8); GetMinute func5 = new GetMinute(); Integer minute = func5.exec(t1); @@ -3122,4 +3128,28 @@ public class TestBuiltin { } + @Test + public void testUniqueID() throws Exception { + Util.resetStateForExecModeSwitch(); + String inputFileName = "testUniqueID.txt"; + Util.createInputFile(cluster, inputFileName, new String[] + {"1\n2\n3\n4\n5\n1\n2\n3\n4\n5\n"}); + PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); + pigServer.getPigContext().getProperties().setProperty("mapred.max.split.size", "10"); + pigServer.getPigContext().getProperties().setProperty("pig.noSplitCombination", "true"); + pigServer.registerQuery("A = load '" + inputFileName + "' as (name);"); + pigServer.registerQuery("B = foreach A generate name, UniqueID();"); + Iterator<Tuple> iter = pigServer.openIterator("B"); + iter.next().get(1).equals("0-0"); + iter.next().get(1).equals("0-1"); + iter.next().get(1).equals("0-2"); + iter.next().get(1).equals("0-3"); + iter.next().get(1).equals("0-4"); + iter.next().get(1).equals("1-0"); + iter.next().get(1).equals("1-1"); + iter.next().get(1).equals("1-1"); + iter.next().get(1).equals("1-2"); + iter.next().get(1).equals("1-3"); + iter.next().get(1).equals("1-4"); + } } Modified: pig/branches/spark/test/org/apache/pig/test/TestCollectedGroup.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestCollectedGroup.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestCollectedGroup.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestCollectedGroup.java Thu Nov 27 12:49:54 2014 @@ -25,6 +25,7 @@ import java.util.Iterator; import java.util.List; import org.apache.pig.CollectableLoadFunc; +import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompilerException; @@ -292,6 +293,43 @@ public class TestCollectedGroup { } } + @Test + public void testMapsideGroupWithMergeJoin() throws IOException{ + pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); + pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' using "+DummyCollectableLoader.class.getName() +"() as (id, name, grade);"); + pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' using "+DummyCollectableLoader.class.getName() +"() as (id, name, grade);"); + try { + DataBag dbfrj = BagFactory.getInstance().newDefaultBag(); + DataBag dbshj = BagFactory.getInstance().newDefaultBag(); + { + pigServer.registerQuery("C = join A by id, B by id using 'merge';"); + pigServer.registerQuery("D = group C by A::id using 'collected';"); + pigServer.registerQuery("E = foreach D generate group, COUNT(C);"); + Iterator<Tuple> iter = pigServer.openIterator("E"); + + while (iter.hasNext()) { + dbfrj.add(iter.next()); + } + } + { + pigServer.registerQuery("F = join A by id, B by id;"); + pigServer.registerQuery("G = group F by A::id;"); + pigServer.registerQuery("H = foreach G generate group, COUNT(F);"); + Iterator<Tuple> iter = pigServer.openIterator("H"); + + while (iter.hasNext()) { + dbshj.add(iter.next()); + } + } + Assert.assertTrue(dbfrj.size()>0 && dbshj.size()>0); + Assert.assertEquals(true, TestHelper.compareBags(dbfrj, dbshj)); + + } catch (Exception e) { + e.printStackTrace(); + Assert.fail(e.getMessage()); + } + } + public static class DummyCollectableLoader extends PigStorage implements CollectableLoadFunc{ @Override Modified: pig/branches/spark/test/org/apache/pig/test/TestConversions.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestConversions.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestConversions.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestConversions.java Thu Nov 27 12:49:54 2014 @@ -41,8 +41,6 @@ import org.apache.pig.parser.ParserExcep import org.apache.pig.test.utils.GenRandomData; import org.apache.pig.test.utils.TestHelper; import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; -import org.junit.Before; import org.junit.Test; /** @@ -56,11 +54,6 @@ public class TestConversions { Random r = new Random(42L); final int MAX = 10; - @Before - public void setUp() { - DateTimeZone.setDefault(DateTimeZone.forOffsetMillis(DateTimeZone.UTC.getOffset(null))); - } - @Test public void testBytesToBoolean() throws IOException { // valid booleans @@ -194,7 +187,7 @@ public class TestConversions { ResourceFieldSchema fs = GenRandomData.getSmallBagTextTupleFieldSchema(); Tuple convertedTuple = ps.getLoadCaster().bytesToTuple(t.toString().getBytes(), fs); - assertTrue(TestHelper.tupleEquals(t, convertedTuple)); + assertEquals(t, convertedTuple); } } @@ -207,7 +200,7 @@ public class TestConversions { for (int i = 0; i < MAX; i++) { DataBag b = GenRandomData.genRandFullTupTextDataBag(r,5,100); DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs); - assertTrue(TestHelper.bagEquals(b, convertedBag)); + assertTrue(b.equals(convertedBag)); } } Modified: pig/branches/spark/test/org/apache/pig/test/TestDataBagAccess.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestDataBagAccess.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestDataBagAccess.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestDataBagAccess.java Thu Nov 27 12:49:54 2014 @@ -89,7 +89,7 @@ public class TestDataBagAccess { File input = Util.createInputFile("tmp", "", new String[] {"sampledata\tnot_used"}); pigServer.registerQuery("a = load '" - + Util.generateURI(Util.encodeEscape(input.toString()), pigServer.getPigContext()) + "';"); + + Util.generateURI(input.toString(), pigServer.getPigContext()) + "';"); pigServer.registerQuery("b = foreach a generate {(16, 4.0e-2, 'hello', -101)} as mybag:{t:(i: int, d: double, c: chararray, e : int)};"); pigServer.registerQuery("c = foreach b generate mybag.i, mybag.d, mybag.c, mybag.e;"); Iterator<Tuple> it = pigServer.openIterator("c"); @@ -112,7 +112,7 @@ public class TestDataBagAccess { pigServer.setValidateEachStatement(true); try { pigServer.registerQuery("a = load '" - + Util.generateURI(Util.encodeEscape(input.toString()), pigServer.getPigContext()) + "';"); + + Util.generateURI(input.toString(), pigServer.getPigContext()) + "';"); pigServer.registerQuery("b = foreach a generate {(16, 4.0e-2, 'hello')} as mybag:{t:(i: int, d: double, c: chararray)};"); pigServer.registerQuery("c = foreach b generate mybag.t;"); pigServer.explain("c", System.out); @@ -129,7 +129,7 @@ public class TestDataBagAccess { File input = Util.createInputFile("tmp", "", new String[] {"sampledata\tnot_used"}); pigServer.registerQuery("A = load '" - + Util.generateURI(Util.encodeEscape(input.toString()), pigServer.getPigContext()) + "';"); + + Util.generateURI(input.toString(), pigServer.getPigContext()) + "';"); pigServer.registerQuery("B = foreach A generate {(('p1-t1-e1', 'p1-t1-e2'),('p1-t2-e1', 'p1-t2-e2'))," + "(('p2-t1-e1', 'p2-t1-e2'), ('p2-t2-e1', 'p2-t2-e2'))};"); pigServer.registerQuery("C = foreach B generate $0 as pairbag : { pair: ( t1: (e1, e2), t2: (e1, e2) ) };"); @@ -153,7 +153,7 @@ public class TestDataBagAccess { File input = Util.createInputFile("tmp", "", new String[] {"somestring\t10\t{(a,10),(b,20)}"}); pigServer.registerQuery("a = load '" - + Util.generateURI(Util.encodeEscape(input.toString()), pigServer.getPigContext()) + + Util.generateURI(input.toString(), pigServer.getPigContext()) + "' " + "as (str:chararray, intval:int, bg:bag{t:tuple(s:chararray, i:int)});"); pigServer.registerQuery("b = foreach a generate str, intval, flatten(bg);"); pigServer.registerQuery("c = foreach b generate str, intval, s, i;"); @@ -191,7 +191,7 @@ public class TestDataBagAccess { File input = Util.createInputFile("tmp", "", new String[] {"a\tid1", "a\tid2", "a\tid3", "b\tid4", "b\tid5", "b\tid6"}); pigServer.registerQuery("a = load '" - + Util.generateURI(Util.encodeEscape(input.toString()), pigServer.getPigContext()) + + Util.generateURI(input.toString(), pigServer.getPigContext()) + "' " + "as (s:chararray, id:chararray);"); pigServer.registerQuery("b = group a by s;"); Class[] loadStoreClasses = new Class[] { BinStorage.class, PigStorage.class }; Modified: pig/branches/spark/test/org/apache/pig/test/TestDefaultDateTimeZone.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestDefaultDateTimeZone.java?rev=1642132&r1=1642131&r2=1642132&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestDefaultDateTimeZone.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestDefaultDateTimeZone.java Thu Nov 27 12:49:54 2014 @@ -29,7 +29,6 @@ import java.util.Properties; import junit.framework.TestCase; -import org.apache.pig.ExecType; import org.apache.pig.PigServer; import org.apache.pig.builtin.ToDate; import org.apache.pig.data.Tuple; @@ -42,9 +41,13 @@ import org.junit.Test; public class TestDefaultDateTimeZone extends TestCase { private File tmpFile; + private DateTimeZone currentDTZ; + @Override @Before public void setUp() throws Exception { + currentDTZ = DateTimeZone.getDefault(); + tmpFile = File.createTempFile("test", "txt"); PrintStream ps = new PrintStream(new FileOutputStream(tmpFile)); ps.println("1970-01-01T00:00:00.000"); @@ -53,13 +56,18 @@ public class TestDefaultDateTimeZone ext ps.println("1970-01-03T00:00:00.000Z"); ps.println("1970-01-05T00:00:00.000"); ps.println("1970-01-05T00:00:00.000Z"); + // for testing DST + ps.println("2014-02-01T00:00:00.000"); // EST + ps.println("2014-06-01T00:00:00.000"); // EDT ps.close(); } + @Override @After public void tearDown() throws Exception { tmpFile.delete(); + DateTimeZone.setDefault(currentDTZ); } @Test @@ -68,7 +76,7 @@ public class TestDefaultDateTimeZone ext .forOffsetMillis(DateTimeZone.forID("+08:00").getOffset(null))); Properties config = new Properties(); config.setProperty("pig.datetime.default.tz", "+08:00"); - PigServer pig = new PigServer(ExecType.LOCAL, config); + PigServer pig = new PigServer(Util.getLocalTestMode(), config); pig.registerQuery("a = load '" + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext())) + "' as (test:datetime);"); @@ -82,6 +90,25 @@ public class TestDefaultDateTimeZone ext assertEquals(expectedItr.hasNext(), actualItr.hasNext()); } + @Test + public void testDST() throws Exception { + String defaultDTZ = "America/New_York"; // a timezone that uses DST + Properties config = new Properties(); + config.setProperty("pig.datetime.default.tz", defaultDTZ); + PigServer pig = new PigServer(Util.getLocalTestMode(), config); + pig.registerQuery("a = load '" + + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext())) + + "' as (test:datetime);"); + pig.registerQuery("b = filter a by test > ToDate('2014-01-01T00:00:00.000');"); + pig.registerQuery("c = foreach b generate ToString(test, 'Z') as tz;"); + Iterator<Tuple> actualItr = pig.openIterator("c"); + + Tuple est = actualItr.next(); + assertEquals(Util.buildTuple("-0500"), est); + Tuple edt = actualItr.next(); + assertEquals(Util.buildTuple("-0400"), edt); + } + private static Iterator<Tuple> generateExpectedResults(DateTimeZone dtz) throws Exception { List<Tuple> expectedResults = new ArrayList<Tuple>(); @@ -99,8 +126,7 @@ public class TestDefaultDateTimeZone ext public void testTimeZone() throws IOException { // Usually set through "pig.datetime.default.tz" String defaultDTZ = "+03:00"; - DateTimeZone.setDefault(DateTimeZone.forOffsetMillis(DateTimeZone.forID(defaultDTZ) - .getOffset(null))); + DateTimeZone.setDefault(DateTimeZone.forID(defaultDTZ)); String[] inputs = { "1970-01-01T00:00:00.000-08:00", "1970-01-01T00:00",
