apachel...

zly Fri, 24 Feb 2017 00:20:13 -0800

Modified: pig/branches/spark/src/pig-default.properties
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/src/pig-default.properties?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/src/pig-default.properties (original)
+++ pig/branches/spark/src/pig-default.properties Fri Feb 24 08:19:42 2017
@@ -61,4 +61,8 @@ pig.stats.output.size.reader.unsupported
 
 
pig.tez.opt.union.unsupported.storefuncs=org.apache.hcatalog.pig.HCatStorer,org.apache.hive.hcatalog.pig.HCatStorer,org.apache.pig.piggybank.storage.DBStorage,org.apache.pig.piggybank.storage.MultiStorage
 
-pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage
\ No newline at end of file
+pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage
+
+pig.ats.enabled=true
+
+pig.tez.configure.am.memory=true


Added: pig/branches/spark/start-build-env.sh
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/start-build-env.sh?rev=1784237&view=auto
==============================================================================
--- pig/branches/spark/start-build-env.sh (added)
+++ pig/branches/spark/start-build-env.sh Fri Feb 24 08:19:42 2017
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e               # exit on error
+
+cd "$(dirname "$0")" # connect to root
+
+docker build -t pig-build dev-support/docker
+
+if [ "$(uname -s)" == "Linux" ]; then
+  USER_NAME=${SUDO_USER:=${USER}}
+  USER_ID=$(id -u "${USER_NAME}")
+  GROUP_ID=$(id -g "${USER_NAME}")
+else # boot2docker uid and gid
+  USER_NAME=${USER}
+  USER_ID=1000
+  GROUP_ID=50
+fi
+
+docker build -t "pig-build-${USER_NAME}" - <<UserSpecificDocker
+FROM pig-build
+RUN bash configure-for-user.sh ${USER_NAME} ${USER_ID} ${GROUP_ID} "$(fgrep 
vboxsf /etc/group)"
+UserSpecificDocker
+
+# By mapping the .m2 directory you can do an mvn install from
+# within the container and use the result on your normal
+# system. This also is a significant speedup in subsequent
+# builds because the dependencies are downloaded only once.
+# Same with the .ivy2 directory
+
+DOCKER="docker run --rm=true -t -i"
+DOCKER=${DOCKER}" -u ${USER_NAME}"
+
+# Work in the current directory
+DOCKER=${DOCKER}" -v ${PWD}:/home/${USER_NAME}/pig"
+DOCKER=${DOCKER}" -w /home/${USER_NAME}/pig"
+
+# Mount persistent caching of 'large' downloads
+DOCKER=${DOCKER}" -v ${HOME}/.m2:/home/${USER_NAME}/.m2"
+DOCKER=${DOCKER}" -v ${HOME}/.ivy2:/home/${USER_NAME}/.ivy2"
+
+# What do we run?
+DOCKER=${DOCKER}" --name pig-build-${USER_NAME}-$$"
+DOCKER=${DOCKER}" pig-build-${USER_NAME}"
+DOCKER=${DOCKER}" bash"
+
+# Now actually start it
+${DOCKER}
+

Modified: pig/branches/spark/test/e2e/pig/build.xml
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/build.xml?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/build.xml (original)
+++ pig/branches/spark/test/e2e/pig/build.xml Fri Feb 24 08:19:42 2017
@@ -27,9 +27,8 @@
   <property name="hive.lib.dir"
        value="${pig.base.dir}/build/ivy/lib/Pig"/>
 
-  <condition property="hive.hadoop.shims.version" value="0.23" else="0.20S">
-    <equals arg1="${hadoopversion}" arg2="23" />
-  </condition>
+  <property name="hadoopversion" value="2" />
+  <property name="hive.hadoop.shims.version" value="0.23" />
 
   <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
 
@@ -61,6 +60,7 @@
   <property name="harness.PH_LOCAL" value="."/>
   <property name="harness.PH_OUT" value="."/>
   <property name="harness.PERL5LIB" value="./libexec"/>
+  <property name="harness.user.home" value="/user/pig" />
 
   <property name="test.location" value="${basedir}/testdist"/>
   <property name="benchmark.location" value="${test.location}/benchmarks"/>
@@ -137,6 +137,7 @@
       <path path="${test.location}/tests/multiquery.conf"/>
       <path path="${test.location}/tests/negative.conf"/>
       <path path="${test.location}/tests/nightly.conf"/>
+      <path path="${test.location}/tests/join.conf"/>
       <path path="${test.location}/tests/streaming.conf"/>
       <path path="${test.location}/tests/streaming_local.conf"/>
       <path path="${test.location}/tests/turing_jython.conf"/>
@@ -309,6 +310,7 @@
       <env key="PH_HIVE_LIB_DIR" value="${hive.lib.dir}"/>
       <env key="PH_HIVE_VERSION" value="${hive.version}"/>
       <env key="PH_HIVE_SHIMS_VERSION" value="${hive.hadoop.shims.version}"/>
+      <env key="PH_HDFS_BASE" value="${harness.user.home}" />
       <env key="HARNESS_CONF" value="${harness.conf.file}"/>
       <env key="HADOOP_HOME" value="${harness.hadoop.home}"/>
       <env key="HADOOP_PREFIX" value="${HADOOP_PREFIX}"/>
@@ -369,6 +371,7 @@
       <env key="PH_CLUSTER_BIN" value="${harness.cluster.bin}"/>
       <env key="HARNESS_CONF" value="${harness.conf.file}"/>
       <env key="HADOOP_HOME" value="${harness.hadoop.home}"/>
+      <env key="PH_HDFS_BASE" value="${harness.user.home}" />
 
       <arg value="./test_harness.pl"/>
       <arg value="-deploycfg"/>

Modified: pig/branches/spark/test/e2e/pig/conf/spark.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/conf/spark.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/conf/spark.conf (original)
+++ pig/branches/spark/test/e2e/pig/conf/spark.conf Fri Feb 24 08:19:42 2017
@@ -30,8 +30,8 @@ my $hdfsBase = $ENV{PH_HDFS_BASE} || "/u
 
 $cfg = {
     #HDFS
-      'inpathbase'     => "$ENV{PH_ROOT}/data"
-    , 'outpathbase'    => "$ENV{PH_ROOT}/testout"
+      'inpathbase'     => "$hdfsBase/test/data"
+    , 'outpathbase'    => "$hdfsBase/out"
 
    #LOCAL
     , 'localinpathbase'   => "$ENV{PH_LOCAL}/in"
@@ -55,7 +55,7 @@ $cfg = {
     , 'hcatbin'          => "$ENV{HCAT_BIN}"
     , 'usePython'        => "$ENV{PIG_USE_PYTHON}"
     , 'exectype'         => 'spark'
-    , 'benchmark_exectype'         => 'local'
+    , 'benchmark_exectype'         => 'mapred'
 
     #HADOOP
     , 'mapredjars'       => "$ENV{PH_ROOT}/lib"

Modified: pig/branches/spark/test/e2e/pig/deployers/ExistingClusterDeployer.pm
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/deployers/ExistingClusterDeployer.pm?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/deployers/ExistingClusterDeployer.pm 
(original)
+++ pig/branches/spark/test/e2e/pig/deployers/ExistingClusterDeployer.pm Fri 
Feb 24 08:19:42 2017
@@ -231,11 +231,6 @@ sub generateData
             'rows' => 5000,
             'hdfs' => "types/numbers.txt",
         }, {
-            'name' => "biggish",
-            'filetype' => "biggish",
-            'rows' => 1000000,
-            'hdfs' => "singlefile/biggish",
-        }, {
             'name' => "prerank",
             'filetype' => "ranking",
             'rows' => 30,

Modified: pig/branches/spark/test/e2e/pig/deployers/LocalDeployer.pm
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/deployers/LocalDeployer.pm?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/deployers/LocalDeployer.pm (original)
+++ pig/branches/spark/test/e2e/pig/deployers/LocalDeployer.pm Fri Feb 24 
08:19:42 2017
@@ -209,11 +209,21 @@ sub generateData
             'filetype' => "ranking",
             'rows' => 30,
             'outfile' => "singlefile/prerank",
+        }, {
+            'name' => "utf8Voter",
+            'filetype' => "utf8Voter",
+            'rows' => 30,
+            'outfile' => "utf8Data/éæ°/utf8Voter",
+        }, {
+            'name' => "utf8Student",
+            'filetype' => "utf8Student",
+            'rows' => 300,
+            'outfile' => "utf8Data/å¦ç/utf8Student",
         }
     );
 
        # Create the target directories
-    for my $dir ("singlefile", "dir", "types", "glob/star/somegood",
+    for my $dir ("singlefile", "utf8Data/éæ°", "utf8Data/å¦ç", "dir", 
"types", "glob/star/somegood",
             "glob/star/moregood", "glob/star/bad") {
         my @cmd = ("mkdir", "-p", "$cfg->{'inpathbase'}/$dir");
            $self->runCmd($log, \@cmd);

Modified: pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm (original)
+++ pig/branches/spark/test/e2e/pig/drivers/TestDriverPig.pm Fri Feb 24 
08:19:42 2017
@@ -211,13 +211,6 @@ sub runTest
            $testCmd->{'pig'} = $testCmd->{'pig_win'};
        }
 
-       if ( $testCmd->{'hadoopversion'} == '23' && $testCmd->{'pig23'}) {
-           $oldpig = $testCmd->{'pig'};
-           $testCmd->{'pig'} = $testCmd->{'pig23'};
-       }
-       if ( $testCmd->{'hadoopversion'} == '23' && 
$testCmd->{'expected_err_regex23'}) {
-           $testCmd->{'expected_err_regex'} = 
$testCmd->{'expected_err_regex23'};
-       }
        my $res = $self->runPigCmdLine( $testCmd, $log, 1, $resources );
        if ($oldpig) {
            $testCmd->{'pig'} = $oldpig;
@@ -231,10 +224,6 @@ sub runTest
            $testCmd->{'pig'} = $testCmd->{'pig_win'};
        }
 
-       if ( $testCmd->{'hadoopversion'} == '23' && $testCmd->{'pig23'}) {
-           $oldpig = $testCmd->{'pig'};
-           $testCmd->{'pig'} = $testCmd->{'pig23'};
-       }
        my $res = $self->runPig( $testCmd, $log, 1, $resources );
        if ($oldpig) {
            $testCmd->{'pig'} = $oldpig;
@@ -686,9 +675,6 @@ sub generateBenchmark
         if ((Util::isWindows()||Util::isCygwin()) && $testCmd->{'pig_win'}) {
            $modifiedTestCmd{'pig'} = $testCmd->{'pig_win'};
        }
-          if ( $testCmd->{'hadoopversion'} == '23' && $testCmd->{'pig23'}) {
-           $modifiedTestCmd{'pig'} = $testCmd->{'pig23'};
-       }
                # Change so we're looking at the old version of Pig
                 if (defined $testCmd->{'oldpigpath'} && 
$testCmd->{'oldpigpath'} ne "") {
                    $modifiedTestCmd{'pigpath'} = $testCmd->{'oldpigpath'};
@@ -1058,10 +1044,6 @@ sub wrongExecutionMode($$)
         }
     }
 
-    if (defined $testCmd->{'ignore23'} && $testCmd->{'hadoopversion'}=='23') {
-        $wrong = 1;
-    }
-
     if ($wrong) {
         print $log "Skipping test $testCmd->{'group'}" . "_" .
             $testCmd->{'num'} . " since it is not suppsed to be run in hadoop 
23\n";

Modified: pig/branches/spark/test/e2e/pig/streaming/PigStreaming.pl
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/streaming/PigStreaming.pl?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/streaming/PigStreaming.pl (original)
+++ pig/branches/spark/test/e2e/pig/streaming/PigStreaming.pl Fri Feb 24 
08:19:42 2017
@@ -73,7 +73,7 @@ while (<$input_handle>)
 {
        chomp;  
        $data = $_;
-       if (defined(%hash) && (exists $hash{$data}))
+       if (exists $hash{$data})
        {
                print $output_handle "$hash{$data}\n";          
        }

Modified: pig/branches/spark/test/e2e/pig/tests/grunt.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/grunt.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/grunt.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/grunt.conf Fri Feb 24 08:19:42 2017
@@ -46,7 +46,12 @@ $cfg = {
                         'execonly' => 'mapred,tez', # don't have a clue what 
their cwd will be for local mode
                         'expected_out_regex' => "/user",
                         'rc' => 0
-
+                      },{
+                        'num' => 3,
+                        'pig' => "ls .",
+                        'execonly' => 'mapred,tez',
+                        'expected_out_regex' => "/user",
+                        'rc' => 0
                       },{
                         'num' => 4,
                         'pig' => "ls :INPATH:",
@@ -77,21 +82,22 @@ $cfg = {
                             'rc' => 0
                         },{
                             'num' => 10,
-                            'pig' => "cp :INPATH:/singlefile/studenttab10k .
-                                      ls .",
+                            'pig' => "mkdir :OUTPATH:
+                                      cp :INPATH:/singlefile/studenttab10k 
:OUTPATH:
+                                      ls :OUTPATH:",
                             'expected_out_regex' => ".*studenttab10k",
                             'rc' => 0
                         },{
                             'num' => 11,
-                            'pig' => "cp :INPATH:/singlefile/studenttab10k 
./fred
-                                      ls .",
+                            'pig' => "cp :INPATH:/singlefile/studenttab10k 
:OUTPATH:/fred
+                                      ls :OUTPATH:",
                             'expected_out_regex' => ".*fred",
                             'rc' => 0
                         },{
                             'num' => 12,
-                            'pig' => "cp :INPATH:/singlefile/studenttab10k 
./jim
-                                      mv ./jim ./bob
-                                      ls .",
+                            'pig' => "cp :INPATH:/singlefile/studenttab10k 
:OUTPATH:/jim
+                                      mv :OUTPATH:/jim :OUTPATH:/bob
+                                      ls :OUTPATH:",
                             'expected_out_regex' => ".*bob",
                             'rc' => 0
                         },{
@@ -103,18 +109,19 @@ $cfg = {
                         },{
                             'num' => 14,
                             'pig' => "copyToLocal 
:INPATH:/singlefile/votertab10k :TMP:
-                                      copyFromLocal :TMP:/votertab10k ./joe
-                                      cat ./joe",
+                                      copyFromLocal :TMP:/votertab10k 
:OUTPATH:/joe
+                                      cat :OUTPATH:/joe",
                             'expected_out_regex' => ":Grunt_14_output:",
                             'rc' => 0
                         },{
                             'num' => 15,
-                            'pig' => "rm fred bob joe",
-                            'not_expected_out_regex' => "joe",
+                            'pig' => "cp :INPATH:/singlefile/studenttab10k 
:OUTPATH:/fred
+                                      rm :OUTPATH:/fred",
+                            'not_expected_out_regex' => "fred",
                             'rc' => 0
                         },{
                             'num' => 16,
-                            'pig' => "rmf jill",
+                            'pig' => "rmf :OUTPATH:/jill",
                             'rc' => 0
                         }
                 ]

Modified: pig/branches/spark/test/e2e/pig/tests/hcat.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/hcat.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/hcat.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/hcat.conf Fri Feb 24 08:19:42 2017
@@ -44,7 +44,7 @@ stored as textfile;\,
                        'num' => 2,
                        'java_params' => ['-Dhcat.bin=:HCATBIN:'],
                        'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' 
as (name, age, gpa);
-SQL drop table if exists pig_hcat_ddl_1;
+SQL drop table if exists pig_hcat_ddl_1 purge;
 sql create table pig_hcat_ddl_1(name string,
 age int,
 gpa double)
@@ -55,6 +55,35 @@ store a into ':OUTPATH:';\,
                        },
                ]
                },
+                {
+                'name' => 'Jython_HCatDDL',
+                'tests' => [
+                    {
+                        # sql command
+                                'num' => 1
+                                ,'java_params' => ['-Dhcat.bin=:HCATBIN:']
+                                ,'pig' => q\#!/usr/bin/python
+from org.apache.pig.scripting import Pig
+
+#create pig script
+
+Pig.sql("""sql drop table if exists pig_script_hcat_ddl_1;""")
+ret = Pig.sql("""sql create table pig_script_hcat_ddl_1(name string,
+age int,
+gpa double)
+stored as textfile;
+""")
+
+if ret==0:
+    print "SQL command PASSED"
+
+else:
+    raise "SQL command FAILED"
+\
+                       ,'rc' => 0
+                    },
+               ]
+               },
        ]
 }
 ;

Added: pig/branches/spark/test/e2e/pig/tests/join.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/join.conf?rev=1784237&view=auto
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/join.conf (added)
+++ pig/branches/spark/test/e2e/pig/tests/join.conf Fri Feb 24 08:19:42 2017
@@ -0,0 +1,310 @@
+#!/usr/bin/env perl
+############################################################################   
        
+#  Licensed to the Apache Software Foundation (ASF) under one or more          
        
+#  contributor license agreements.  See the NOTICE file distributed with       
        
+#  this work for additional information regarding copyright ownership.         
        
+#  The ASF licenses this file to You under the Apache License, Version 2.0     
        
+#  (the "License"); you may not use this file except in compliance with        
        
+#  the License.  You may obtain a copy of the License at                       
        
+#                                                                              
        
+#      http://www.apache.org/licenses/LICENSE-2.0                              
        
+#                                                                              
        
+#  Unless required by applicable law or agreed to in writing, software         
        
+#  distributed under the License is distributed on an "AS IS" BASIS,           
        
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.    
        
+#  See the License for the specific language governing permissions and         
        
+#  limitations under the License.                                              
        
+                                                                               
        
+###############################################################################
+
+$cfg = {
+    'driver' => 'Pig',
+
+    'groups' => [
+        {
+        'name' => 'BloomJoin_Map',
+        'execonly' => 'tez',
+        'tests' => [
+            {
+            # Tuple join key
+            'num' => 1,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age) using 'bloom';
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age);
+store e into ':OUTPATH:';\,
+            },
+            {
+            # bytearray join key
+            'num' => 2,
+            'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name using 'bloom';
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Left outer join and chararray join key
+            'num' => 3,
+            'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, 
registration, contributions);
+c = join a by name left, b by name using 'bloom';
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, 
registration, contributions);
+c = join a by name left, b by name;
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+            },
+            {
+            # Right outer join
+            'num' => 4,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, 
registration, contributions);
+c = join a by (name,age) right, b by (name,age) using 'bloom';
+store c into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, 
registration, contributions);
+c = join a by (name,age) right, b by (name,age);
+store c into ':OUTPATH:';\,
+            },
+            {
+            # Left input from a union
+            'num' => 5,
+            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+d = filter d by age > 60;
+e = join c by name, d by name using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+d = filter d by age > 60;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Right input from a union and integer join key
+            'num' => 6,
+            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+e = join d by age, c by age using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+e = join d by age, c by age;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Left input from a split
+            'num' => 7,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age using 'bloom';
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age;
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+            },
+            {
+            # Right input from a split
+            'num' => 8,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age using 'bloom';
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age;
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+            },
+        ] # end of tests
+        },
+        {
+        'name' => 'BloomJoin_Reduce',
+        'execonly' => 'tez',
+        'java_params' => ['-Dpig.bloomjoin.strategy=reduce'],
+        'tests' => [
+            {
+            # Tuple join key
+            'num' => 1,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age) using 'bloom';
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+--c = filter a by age < 20;
+--d = filter b by age < 20;
+e = join a by (name, age), b by (name, age);
+store e into ':OUTPATH:';\,
+            },
+            {
+            # bytearray join key
+            'num' => 2,
+            'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name using 'bloom';
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+c = filter a by age < 20;
+d = filter b by age < 20;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Left outer join and chararray join key
+            'num' => 3,
+            'pig' => q\
+SET mapreduce.input.fileinputformat.split.maxsize '50000';
+SET pig.splitCombination false;
+a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, 
registration, contributions);
+c = join a by name left, b by name using 'bloom';
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, 
registration, contributions);
+c = join a by name left, b by name;
+d = foreach c generate a::name, a::age, gpa, registration, contributions;
+store d into ':OUTPATH:';\,
+            },
+            {
+            # Right outer join
+            'num' => 4,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, 
registration, contributions);
+c = join a by (name,age) right, b by (name,age) using 'bloom';
+store c into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, 
registration, contributions);
+c = join a by (name,age) right, b by (name,age);
+store c into ':OUTPATH:';\,
+            },
+            {
+            # Left input from a union
+            'num' => 5,
+            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+d = filter d by age > 60;
+e = join c by name, d by name using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+d = filter d by age > 60;
+e = join c by name, d by name;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Right input from a union and integer join key
+            'num' => 6,
+            'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as 
(name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+e = join d by age, c by age using 'bloom' PARALLEL 3;
+store e into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa);
+c = union a, b;
+c = filter c by age > 75;
+d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+e = join d by age, c by age;
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Left input from a split
+            'num' => 7,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age using 'bloom';
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+b = filter b by age > 75;
+c = filter a by age > 50;
+d = join a by age, b by age;
+store c into ':OUTPATH:.1';
+store d into ':OUTPATH:.2';\,
+            },
+            {
+            # Right input from a split
+            'num' => 8,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age using 'bloom';
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa);
+b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, 
registration, contributions);
+c = filter a by age > 75;
+d = filter a by name == 'nick miller';
+e = join b by age, c by age;
+store d into ':OUTPATH:.1';
+store e into ':OUTPATH:.2';\,
+            },
+        ] # end of tests
+        }
+    ] # end of groups
+};
\ No newline at end of file

Modified: pig/branches/spark/test/e2e/pig/tests/multiquery.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/multiquery.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/multiquery.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/multiquery.conf Fri Feb 24 08:19:42 
2017
@@ -728,6 +728,52 @@ b = union a1, a2;
 c = rank b by name ASC, age DESC DENSE;  
 store c into ':OUTPATH:';\,
             },
+            {
+            # Union + Split + Two replicate join
+            'num' => 12,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age, gpa);
+a1 = filter a by gpa is null or gpa <= 3.9;
+a2 = filter a by gpa < 2;
+b = union a1, a2;
+c = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, 
contributions);
+c1 = filter c by age < 30;
+c2 = filter c by age > 50;
+d = join b by name, c1 by name using 'replicated';
+e = join d by b::name, c2 by name using 'replicated';
+store e into ':OUTPATH:';\,
+            },
+            {
+            # Multiple Union + Multiple Split + Single store
+            'num' => 13,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/studenttab10k' as (name, age:int, gpa);
+u1 = union onschema a, b;
+SPLIT u1 INTO r IF age < 30, s OTHERWISE;
+c = load ':INPATH:/singlefile/voternulltab10k' as (votername, voterage, 
registration, contributions);
+d = JOIN r BY name LEFT, c BY votername;
+u2 = UNION ONSCHEMA d, s;
+e = FILTER u2 BY name == 'nick miller';
+f = FILTER u2 BY age > 70 ;
+u3 = UNION ONSCHEMA e, f;
+store u3 into ':OUTPATH:';\,
+            },
+            {
+            # PIG-5082. Similar to MultiQuery_Union_13 but for non-store 
vertex group
+            'num' => 14,
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa);
+b = load ':INPATH:/singlefile/studenttab10k' as (name, age:int, gpa);
+u1 = union onschema a, b;
+SPLIT u1 INTO r IF age < 30, s OTHERWISE;
+c = load ':INPATH:/singlefile/voternulltab10k' as (votername, voterage, 
registration, contributions);
+d = JOIN r BY name LEFT, c BY votername;
+u2 = UNION ONSCHEMA d, s;
+e = FILTER u2 BY name == 'nick miller';
+f = FILTER u2 BY age > 70 ;
+u3 = UNION ONSCHEMA e, f;
+SPLIT u3 INTO t if age > 75, u OTHERWISE;
+v = JOIN t BY name LEFT, c BY votername;
+store v into ':OUTPATH:';\,
+            }
             ] # end of tests
         },
         
@@ -860,7 +906,38 @@ m = UNION e, i, j, n;
 
 n = JOIN a BY name, m BY name;
 store n into ':OUTPATH:';\,
-            }
+            },
+            {
+            # Self join bloom left outer
+            'num' => 12,
+            'execonly' => 'tez',
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name using 'bloom';
+store d into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name;
+store d into ':OUTPATH:';\,
+            },
+            {
+            # Self join bloom left outer with strategy as reduce
+            'num' => 13,
+            'execonly' => 'tez',
+            'java_params' => ['-Dpig.bloomjoin.strategy=reduce'],
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name using 'bloom';
+store d into ':OUTPATH:';\,
+            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa);
+b = filter a by gpa >= 3.9;
+c = filter a by gpa > 3;
+d = join b by name left outer, c by name;
+store d into ':OUTPATH:';\,
+            },
             ] # end of tests
         },
 

Modified: pig/branches/spark/test/e2e/pig/tests/negative.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/negative.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/negative.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/negative.conf Fri Feb 24 08:19:42 2017
@@ -473,7 +473,7 @@ define CMD `perl PigStreaming.pl` input(
 A = load ':INPATH:/singlefile/studenttab10k';
 B = stream A through CMD;
 store B into ':OUTPATH:';\,
-                        'expected_err_regex' => "Error reading output from 
Streaming binary",
+                        'expected_err_regex' => "Error reading output from 
Streaming binary|Error while reading from POStream and passing it to the 
streaming process",
                         },
                        {
                        # Invalid serializer - throws exception
@@ -568,24 +568,7 @@ store D into ':OUTPATH:';\,
                         'expected_err_regex' => "Could not resolve 
StringStoreBad using imports",
                        },
                ]
-               },
-               {
-               'name' => 'LineageErrors',
-               'tests' => [
-                       {
-                       # UDF returns a bytearray that is cast to an integer
-                'num' => 1,
-                'pig' => q\register :FUNCPATH:/testudf.jar;
-a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa);
-b = filter a by name lt 'b';
-c = foreach b generate 
org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age);
-d = foreach c generate $0#'alice young';
-split d into e if $0 < 42, f if $0 >= 42;
-store e into ':OUTPATH:';\,
-                'expected_err_regex' => "Received a bytearray from the UDF or 
Union from two different Loaders. Cannot determine how to convert the bytearray 
to int",
-            },
-        ]
-        }
+               }
     ]
 }
 ;

Modified: pig/branches/spark/test/e2e/pig/tests/nightly.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/nightly.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/nightly.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/nightly.conf Fri Feb 24 08:19:42 2017
@@ -567,7 +567,6 @@ store c into ':OUTPATH:';\,
                        {
                        'num' => 9,
                         'floatpostprocess' => 1,
-                        'ignore23' => 'I cannot get it right due to float 
precision, temporarily disable',
                        'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' 
as (name, age, gpa);
 b = group a by name;
 c = foreach b generate group, AVG(a.gpa);
@@ -1518,8 +1517,8 @@ store i into ':OUTPATH:';\,
             {
             # Union + operators
             'num' => 12,
-            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name, age:int, gpa:double);
-b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name, 
age:int, gpa:double);
+            'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as 
(name:chararray, age:int, gpa:double);
+b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as 
(name:chararray, age:int, gpa:double);
 c = union a, b;
 -- Exercise all expression operators --
 d = foreach c generate (name is not NULL? UPPER(name) : 'FNU LNU') as name, 
(age < 30 ? -1 : age) as age, (gpa is NULL ? 0.0 : ((gpa > 0.5 AND gpa < 1.0) ? 
1 : gpa)) as gpa;
@@ -2186,7 +2185,7 @@ store d into ':OUTPATH:';\,
 b = order a by $0, $1, $2;
 c = limit b 100;
 store c into ':OUTPATH:';\,
-               'sortArgs' => ['-t', '  ', '-k', '1,3'],
+               'sortArgs' => ['-t', '  ', '-k', '1,2'],
                        },
                        {
                                # Make sure that limit higher than number of 
rows doesn't mess stuff up
@@ -2206,6 +2205,7 @@ store c into ':OUTPATH:';\,
                        },
                        {
                                'num' => 5,
+                               'execonly' => 'mapred,local', #tez may pick 
either input as part of the optimization so cannot be tested easily
                                'pig' =>q\a = load 
':INPATH:/singlefile/studenttab10k';
 b = load ':INPATH:/singlefile/votertab10k';
 a1 = foreach a generate $0, $1;
@@ -2285,7 +2285,21 @@ store d into ':OUTPATH:';\,
                                'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:double);
 b = limit a 2000;
 store b into ':OUTPATH:';\,
-                        }
+                        },
+                       {
+                               'num' => 12,
+                               'execonly' => 'tez', #Limit_5 was not able to 
test on tez. 
+                               'pig' =>q\a = load 
':INPATH:/singlefile/studenttab10k';
+b = load ':INPATH:/singlefile/studenttab10k';
+a1 = foreach a generate $0, $1;
+b1 = foreach b generate $0, $1;
+c = union a1, b1;
+d = limit c 100;
+store d into ':OUTPATH:';\,
+                               'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int);
+b = limit a 100;
+store b into ':OUTPATH:';\,
+                       }
                ]
                },
                {
@@ -2736,6 +2750,41 @@ store c into ':OUTPATH:';\,
                        },
                ],
         },
+        {
+        'name' => 'StoreLoad',
+        'tests' => [
+            {
+            'num' => 1,
+            'floatpostprocess' => 1,
+            'delimiter' => '    ',
+            'pig' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:int, gpa: double);
+b = filter a by age < 25;
+c = filter a by age > 70;
+store b into ':OUTPATH:.intermediate1' using PigStorage(',');
+store c into ':OUTPATH:.intermediate2' using PigStorage(',');
+d = load ':OUTPATH:.intermediate1' using PigStorage(',') as (name:chararray, 
age:int, gpa: double);
+e = load ':OUTPATH:.intermediate2' using PigStorage(',') as (name:chararray, 
age:int, gpa: double);
+f = join d by name, e by name;
+store f into ':OUTPATH:';\,
+            'notmq' => 1,
+            },
+            {
+            # Self join
+            'num' => 2,
+            'floatpostprocess' => 1,
+            'delimiter' => '    ',
+            'pig' => q\
+a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:int, gpa: double);
+b = filter a by name == 'nick miller';
+store b into ':OUTPATH:.intermediate' using PigStorage(',');
+c = load ':OUTPATH:.intermediate' using PigStorage(',') as (name:chararray, 
age:int, gpa: double);
+d = join a by name, c by name;
+store d into ':OUTPATH:';\,
+            'notmq' => 1,
+            },
+        ],
+        },
 
        {
                'name' => 'MergeJoin',
@@ -3171,6 +3220,25 @@ e = join a by name full outer, b by name
 store e into ':OUTPATH:';\,
 
                         },
+                        # skew join with tuple key
+                        {
+                            'num' => 15,
+                            'java_params' => 
['-Dpig.skewedjoin.reduce.maxtuple=100'],
+                            'pig' => q\a = load 
':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0 using 'skewed' parallel 5;
+f = foreach e generate c::group, flatten(c::a), d::group, flatten(d::b);
+store f into ':OUTPATH:';\,
+                            'verify_pig_script' => q\a = load 
':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa);
+b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, 
contributions);
+c = group a by (name, age);
+d = group b by (name, age);
+e = join c by $0, d by $0;
+f = foreach e generate c::group, flatten(c::a), d::group, flatten(d::b);
+store f into ':OUTPATH:';\
+                        }
                 ]
 
             },
@@ -4243,40 +4311,32 @@ store e into ':OUTPATH:';\,
                     # test common
                     'num' => 1,
                     'pig' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
 a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
-b = native ':MAPREDJARS:/hadoop-examples.jar' Store a into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
`wordcount table_testNativeMRJobSimple_input 
table_testNativeMRJobSimple_output`;
+b = native ':MAPREDJARS:/hadoop-examples.jar' Store a into 
':OUTPATH:.intermediate.1' Load ':OUTPATH:.intermediate.2' `wordcount 
:OUTPATH:.intermediate.1 :OUTPATH:.intermediate.2`;
 store b into ':OUTPATH:';\,
                     'notmq' => 1,
                     'verify_pig_script' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
 a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
-b = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store a into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
`wordcount table_testNativeMRJobSimple_input 
table_testNativeMRJobSimple_output`;
+b = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store a into 
':OUTPATH:.intermediate.1' Load ':OUTPATH:.intermediate.2' `wordcount 
:OUTPATH:.intermediate.1 :OUTPATH:.intermediate.2`;
 store b into ':OUTPATH:';\,
                     },
                     {
                     # test complex
                     'num' => 2,
                     'pig' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
 a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
 b = foreach a generate name;
 c = distinct b;
-d = native ':MAPREDJARS:/hadoop-examples.jar' Store c into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
as (name:chararray, count: int) `wordcount table_testNativeMRJobSimple_input 
table_testNativeMRJobSimple_output`;
+d = native ':MAPREDJARS:/hadoop-examples.jar' Store c into 
':OUTPATH:.intermediate.1' Load ':OUTPATH:.intermediate.2' as (name:chararray, 
count: int) `wordcount :OUTPATH:.intermediate.1 :OUTPATH:.intermediate.2`;
 e = order d by name;
 store e into ':OUTPATH:';\,
                     'sortArgs' => ['-t', '     '],
                     'notmq' => 1,
                     'verify_pig_script' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
 a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
 b = foreach a generate name;
 c = distinct b;
-d = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store c into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
as (name:chararray, count: int) `wordcount table_testNativeMRJobSimple_input 
table_testNativeMRJobSimple_output`;
+d = mapreduce ':MAPREDJARS:/hadoop-examples.jar' Store c into 
':OUTPATH:.intermediate.1' Load ':OUTPATH:.intermediate.2' as (name:chararray, 
count: int) `wordcount :OUTPATH:.intermediate.1 :OUTPATH:.intermediate.2`;
 e = order d by name;
 store e into ':OUTPATH:';\,
                     },
@@ -4284,16 +4344,8 @@ store e into ':OUTPATH:';\,
                     # test streaming
                     'num' => 3,
                     'pig' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
 a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
-b = mapreduce ':MAPREDJARS:/hadoop-streaming.jar' Store a into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
as (name:chararray, count: int) `-input table_testNativeMRJobSimple_input 
-output table_testNativeMRJobSimple_output -mapper cat -reducer wc`;
-store b into ':OUTPATH:';\,
-                    'pig23' => q\
-rmf table_testNativeMRJobSimple_input
-rmf table_testNativeMRJobSimple_output
-a = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, 
gpa);
-b = mapreduce ':MAPREDJARS:/hadoop-0.23.0-streaming.jar' Store a into 
'table_testNativeMRJobSimple_input' Load 'table_testNativeMRJobSimple_output' 
as (name:chararray, count: int) `-input table_testNativeMRJobSimple_input 
-output table_testNativeMRJobSimple_output -mapper cat -reducer wc`;
+b = mapreduce ':MAPREDJARS:/hadoop-streaming.jar' Store a into 
':OUTPATH:.intermediate.1' Load ':OUTPATH:.intermediate.2' as (name:chararray, 
count: int) `-input :OUTPATH:.intermediate.1 -output :OUTPATH:.intermediate.2 
-mapper cat -reducer wc`;
 store b into ':OUTPATH:';\,
                     'notmq' => 1,
                     },
@@ -4884,21 +4936,6 @@ a = load ':INPATH:/singlefile/allscalar1
 b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as 
(name:chararray, age:int, gpa:double, instate:chararray);
 C = union a, b;
 store C into ':OUTPATH:';\, 
-                },
-                {
-                    # Test Union using merge with incompatible types.  
float->bytearray and chararray->bytearray
-                    'num' => 8,
-                    'delimiter' => '   ',
-                    'pig' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:int);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:chararray);
-C = union onschema A, B;
-store C into ':OUTPATH:';\,
-                    'verify_pig_script' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:bytearray);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:bytearray);
-C = union A, B;
-store C into ':OUTPATH:';\,
                 }
               ]
 
@@ -4927,7 +4964,6 @@ store C into ':OUTPATH:';\,
                 'tests' => [
                     {
                         'num' => 1,
-                        'ignore23' => 'guava version of Pig is higher than 
hadoop 23',
                         'pig' => q?register :FUNCPATH:/testudf.jar;
                                 define gm 
org.apache.pig.test.udf.evalfunc.GoodMonitored();
                                 a = load ':INPATH:/singlefile/studenttab10k' 
as (name, age, gpa);
@@ -5297,6 +5333,26 @@ store C into ':OUTPATH:';\,
                                 C = UNION A,B;
                                 D = filter C by name == 'alice allen';
                                 store D into ':OUTPATH:';",
+                    },{
+                        'num' => 5,
+                        'pig' => "set pig.optimizer.rules.disabled 
PushUpFilter;
+                                define bb BuildBloom('Hash.JENKINS_HASH', 
'fixed', '128', '3');
+                                A = LOAD ':INPATH:/singlefile/studenttab10k' 
AS (name:chararray, age:int, gpa:double);
+                                B = filter A by name == 'alice allen';
+                                C = group B all;
+                                D = foreach C generate bb(B.name) as 
bloomfilter;
+                                E = LOAD ':INPATH:/singlefile/studenttab10k' 
AS (name:chararray, age:int, gpa:double);
+                                F = LOAD ':INPATH:/singlefile/studenttab10k' 
AS (name:chararray, age:int, gpa:double);
+                                G = union E, F;
+                                -- PushUpFilter is disabled to avoid filter 
being pushed before union
+                                H = filter G by Bloom(D.bloomfilter, name);
+                                store H into ':OUTPATH:';",
+                        'verify_pig_script' => "
+                                A = LOAD ':INPATH:/singlefile/studenttab10k' 
AS (name, age:int ,gpa:double);
+                                B = LOAD ':INPATH:/singlefile/studenttab10k' 
AS (name, age:int ,gpa:double);
+                                C = UNION A,B;
+                                D = filter C by name == 'alice allen';
+                                store D into ':OUTPATH:';",
                     }
                 ],
             },{
@@ -5637,13 +5693,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = foreach B generate rank_A,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rownumber,a,b,c;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5652,13 +5710,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 9;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by b DESC,a ASC;
                                     C = foreach B generate rank_A,b,a;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankbdaa,b,a;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5667,13 +5727,15 @@ store a into ':OUTPATH:';\,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
                                     SET default_parallel 7;
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A by c ASC,b DESC;
                                     C = foreach B generate rank_A,c,b;
                                     store C into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = foreach A generate rankcabd,c,b;
                                     store C into ':OUTPATH:';
                                 \,
@@ -5681,26 +5743,29 @@ store a into ':OUTPATH:';\,
                         'num' => 4,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 25;
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    SET default_parallel 5;
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = rank A;
                                     C = order B by rank_A;
-                                    D = foreach C generate rank_A,rownumber;
+                                    D = foreach C generate rank_A,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    D = foreach A generate idx,rownumber;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    D = foreach A generate rownumber,a,b,c;
                                     store D into ':OUTPATH:';
                                 \,
                     }, {
                         'num' => 5,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    SET default_parallel 11;
+                                    SET default_parallel 5;
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
                                     SET pig.splitCombination false;
-                                    A = LOAD ':INPATH:/singlefile/biggish' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
-                                    B = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                    B = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     C = join A by rownumber, B by rownumber;
                                     D = order C by 
B::rankcabd,B::rankbdca,B::rankaaba;
                                     E = rank D;
@@ -5710,7 +5775,7 @@ store a into ':OUTPATH:';\,
                                     store H into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as (rownumber:long,idx:long,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as (rownumber:long,idx:long);
                                     B = foreach A generate rownumber,1;
                                     C = order B by rownumber;
                                     store C into ':OUTPATH:';
@@ -5719,14 +5784,16 @@ store a into ':OUTPATH:';\,
                         'num' => 6,
                         'execonly' => 'mapred,tez',
                         'pig' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                    SET pig.splitCombination false;
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     split A into M if rownumber > 15, N if 
rownumber < 25;
                                     C = rank N;
                                     D = foreach C generate $0, a, b, c;
                                     store D into ':OUTPATH:';
                                 \,
                         'verify_pig_script' => q\
-                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                    A = LOAD ':INPATH:/singlefile/prerank' 
using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                     B = filter A by rownumber < 25;
                                     D = foreach B generate rownumber, a, b, c;
                                     store D into ':OUTPATH:';
@@ -5741,14 +5808,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 1,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 9;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by a ASC,b ASC DENSE;
                                                                        C = 
foreach B generate rank_A,a,b;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaaba,a,b;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5756,14 +5825,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 2,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 9;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by a ASC,c DESC DENSE;
                                                                        C = 
foreach B generate rank_A,a,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaacd,a,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5771,14 +5842,16 @@ store a into ':OUTPATH:';\,
                                                'num' => 3,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 7;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
rank A by b DESC,c ASC DENSE;
                                                                        C = 
foreach B generate rank_A,b,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankbdca,b,c;
                                                                        store C 
into ':OUTPATH:';
                                                                \,
@@ -5786,9 +5859,11 @@ store a into ':OUTPATH:';\,
                                                'num' => 4,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
+                                                                        SET 
pig.splitCombination false;
                                                                        SET 
default_parallel 7;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
-                                                                       B = 
foreach A generate a,b,c,tail;
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
+                                                                       B = 
foreach A generate a,b,c;
                                                                        C = 
rank B by a ASC,b ASC DENSE;
                                                                        D = 
rank C by a ASC,c DESC DENSE;
                                                                        E = 
rank D by b DESC,c ASC DENSE;
@@ -5796,7 +5871,7 @@ store a into ':OUTPATH:';\,
                                                                        store F 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
foreach A generate rankbdca,rankaacd,rankaaba,a,b,c;
                                                                        store B 
into ':OUTPATH:';
                                                                \,
@@ -5805,8 +5880,9 @@ store a into ':OUTPATH:';\,
                                                'execonly' => 'mapred,tez',
                                                'pig' => q\
                                                                        SET 
default_parallel 9;
+                                                                        SET 
mapreduce.input.fileinputformat.split.maxsize '300';
                                                                        SET 
pig.splitCombination false;
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        B = 
foreach A generate a,b,c;
                                                                        C = 
rank B by a ASC,b ASC DENSE;
                                                                        D = 
rank B by a ASC,c DESC DENSE;
@@ -5816,7 +5892,7 @@ store a into ':OUTPATH:';\,
                                                                        store H 
into ':OUTPATH:';
                                                                \,
                                                'verify_pig_script' => q\
-                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int,tail:bytearray);
+                                                                       A = 
LOAD ':INPATH:/singlefile/prerank' using PigStorage(',') as 
(rownumber:long,rankcabd:long,rankbdaa:long,rankbdca:long,rankaacd:long,rankaaba:long,a:int,b:int,c:int);
                                                                        C = 
foreach A generate rankaaba,a,b,c;
                                                                        E = 
order C by a ASC,b ASC;
                                                                        D = 
foreach A generate rankaacd,a,b,c;

Modified: pig/branches/spark/test/e2e/pig/tests/orc.conf
URL: 
http://svn.apache.org/viewvc/pig/branches/spark/test/e2e/pig/tests/orc.conf?rev=1784237&r1=1784236&r2=1784237&view=diff
==============================================================================
--- pig/branches/spark/test/e2e/pig/tests/orc.conf (original)
+++ pig/branches/spark/test/e2e/pig/tests/orc.conf Fri Feb 24 08:19:42 2017
@@ -1,3 +1,21 @@
+#!/usr/bin/env perl
+############################################################################   
        
+#  Licensed to the Apache Software Foundation (ASF) under one or more          
        
+#  contributor license agreements.  See the NOTICE file distributed with       
        
+#  this work for additional information regarding copyright ownership.         
        
+#  The ASF licenses this file to You under the Apache License, Version 2.0     
        
+#  (the "License"); you may not use this file except in compliance with        
        
+#  the License.  You may obtain a copy of the License at                       
        
+#                                                                              
        
+#      http://www.apache.org/licenses/LICENSE-2.0                              
        
+#                                                                              
        
+#  Unless required by applicable law or agreed to in writing, software         
        
+#  distributed under the License is distributed on an "AS IS" BASIS,           
        
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.    
        
+#  See the License for the specific language governing permissions and         
        
+#  limitations under the License.                                              
        
+                                                                               
        
+###############################################################################
 $cfg = {
         'driver' => 'Pig',
         'nummachines' => 5,

svn commit: r1784237 [13/22] - in /pig/branches/spark: ./ bin/ conf/ contrib/piggybank/java/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachel...

Reply via email to