Modified: pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java (original) +++ pig/trunk/src/org/apache/pig/newplan/logical/relational/LogToPhyTranslationVisitor.java Thu Jan 26 17:40:35 2017 @@ -1414,7 +1414,7 @@ public class LogToPhyTranslationVisitor return; } - else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){ + else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH || loj.getJoinType() == LOJoin.JOINTYPE.BLOOM){ POPackage poPackage = compileToLR_GR_PackTrio(loj, loj.getCustomPartitioner(), innerFlags, loj.getExpressionPlans()); POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, location, inputs); currentPlan.add(fe); @@ -1425,7 +1425,20 @@ public class LogToPhyTranslationVisitor e.getErrorCode(),e.getErrorSource(),e); } logToPhyMap.put(loj, fe); - poPackage.getPkgr().setPackageType(PackageType.JOIN); + if (loj.getJoinType() == LOJoin.JOINTYPE.BLOOM) { + if (innerFlags.length == 2) { + if (innerFlags[0] == false && innerFlags[1] == false) { + throw new LogicalToPhysicalTranslatorException( + "Error at " + loj.getLocation() + " with alias "+ loj.getAlias() + + ". Bloom join cannot be used with a FULL OUTER join.", + 1109, + PigException.INPUT); + } + } + poPackage.getPkgr().setPackageType(PackageType.BLOOMJOIN); + } else { + poPackage.getPkgr().setPackageType(PackageType.JOIN); + } } translateSoftLinks(loj); }
Modified: pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java (original) +++ pig/trunk/src/org/apache/pig/parser/LogicalPlanBuilder.java Thu Jan 26 17:40:35 2017 @@ -1788,6 +1788,8 @@ public class LogicalPlanBuilder { return JOINTYPE.REPLICATED; } else if( modifier.equalsIgnoreCase( "hash" ) || modifier.equalsIgnoreCase( "default" ) ) { return LOJoin.JOINTYPE.HASH; + } else if( modifier.equalsIgnoreCase( "bloom" ) ) { + return LOJoin.JOINTYPE.BLOOM; } else if( modifier.equalsIgnoreCase( "skewed" ) ) { return JOINTYPE.SKEWED; } else if (modifier.equalsIgnoreCase("merge")) { @@ -1796,7 +1798,7 @@ public class LogicalPlanBuilder { return JOINTYPE.MERGESPARSE; } else { throw new ParserValidationException( intStream, loc, - "Only REPL, REPLICATED, HASH, SKEWED, MERGE, and MERGE-SPARSE are vaild JOIN modifiers." ); + "Only REPL, REPLICATED, HASH, BLOOM, SKEWED, MERGE, and MERGE-SPARSE are vaild JOIN modifiers." ); } } Modified: pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java (original) +++ pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java Thu Jan 26 17:40:35 2017 @@ -133,6 +133,8 @@ public abstract class ScriptState { MERGE_SPARSE_JOIN, REPLICATED_JOIN, SKEWED_JOIN, + BUILD_BLOOM, + FILTER_BLOOM, HASH_JOIN, COLLECTED_GROUP, MERGE_COGROUP, @@ -312,7 +314,7 @@ public abstract class ScriptState { maxScriptSize = Integer.valueOf(prop); } } - + this.truncatedScript = (script.length() > maxScriptSize) ? script.substring(0, maxScriptSize) : script; @@ -485,6 +487,10 @@ public abstract class ScriptState { public void visit(LOJoin op) { if (op.getJoinType() == JOINTYPE.HASH) { feature.set(PIG_FEATURE.HASH_JOIN.ordinal()); + } else if (op.getJoinType() == JOINTYPE.BLOOM) { + feature.set(PIG_FEATURE.HASH_JOIN.ordinal()); + feature.set(PIG_FEATURE.BUILD_BLOOM.ordinal()); + feature.set(PIG_FEATURE.FILTER_BLOOM.ordinal()); } else if (op.getJoinType() == JOINTYPE.MERGE) { feature.set(PIG_FEATURE.MERGE_JOIN.ordinal()); } else if (op.getJoinType() == JOINTYPE.MERGESPARSE) { @@ -506,6 +512,7 @@ public abstract class ScriptState { feature.set(PIG_FEATURE.RANK.ordinal()); } + @Override public void visit(LOSort op) { feature.set(PIG_FEATURE.ORDER_BY.ordinal()); } Modified: pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java (original) +++ pig/trunk/src/org/apache/pig/tools/pigstats/tez/TezScriptState.java Thu Jan 26 17:40:35 2017 @@ -275,6 +275,12 @@ public class TezScriptState extends Scri if (tezOp.isRegularJoin()) { feature.set(PIG_FEATURE.HASH_JOIN.ordinal()); } + if (tezOp.isBuildBloom()) { + feature.set(PIG_FEATURE.BUILD_BLOOM.ordinal()); + } + if (tezOp.isFilterBloom()) { + feature.set(PIG_FEATURE.FILTER_BLOOM.ordinal()); + } if (tezOp.isUnion()) { feature.set(PIG_FEATURE.UNION.ordinal()); } Modified: pig/trunk/test/e2e/pig/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/build.xml?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/test/e2e/pig/build.xml (original) +++ pig/trunk/test/e2e/pig/build.xml Thu Jan 26 17:40:35 2017 @@ -137,6 +137,7 @@ <path path="${test.location}/tests/multiquery.conf"/> <path path="${test.location}/tests/negative.conf"/> <path path="${test.location}/tests/nightly.conf"/> + <path path="${test.location}/tests/join.conf"/> <path path="${test.location}/tests/streaming.conf"/> <path path="${test.location}/tests/streaming_local.conf"/> <path path="${test.location}/tests/turing_jython.conf"/> Added: pig/trunk/test/e2e/pig/tests/join.conf URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/join.conf?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/e2e/pig/tests/join.conf (added) +++ pig/trunk/test/e2e/pig/tests/join.conf Thu Jan 26 17:40:35 2017 @@ -0,0 +1,310 @@ +#!/usr/bin/env perl +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### + +$cfg = { + 'driver' => 'Pig', + + 'groups' => [ + { + 'name' => 'BloomJoin_Map', + 'execonly' => 'tez', + 'tests' => [ + { + # Tuple join key + 'num' => 1, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +--c = filter a by age < 20; +--d = filter b by age < 20; +e = join a by (name, age), b by (name, age) using 'bloom'; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +--c = filter a by age < 20; +--d = filter b by age < 20; +e = join a by (name, age), b by (name, age); +store e into ':OUTPATH:';\, + }, + { + # bytearray join key + 'num' => 2, + 'pig' => q\ +SET mapreduce.input.fileinputformat.split.maxsize '50000'; +SET pig.splitCombination false; +a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +c = filter a by age < 20; +d = filter b by age < 20; +e = join c by name, d by name using 'bloom'; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +c = filter a by age < 20; +d = filter b by age < 20; +e = join c by name, d by name; +store e into ':OUTPATH:';\, + }, + { + # Left outer join and chararray join key + 'num' => 3, + 'pig' => q\ +SET mapreduce.input.fileinputformat.split.maxsize '50000'; +SET pig.splitCombination false; +a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions); +c = join a by name left, b by name using 'bloom'; +d = foreach c generate a::name, a::age, gpa, registration, contributions; +store d into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions); +c = join a by name left, b by name; +d = foreach c generate a::name, a::age, gpa, registration, contributions; +store d into ':OUTPATH:';\, + }, + { + # Right outer join + 'num' => 4, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions); +c = join a by (name,age) right, b by (name,age) using 'bloom'; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions); +c = join a by (name,age) right, b by (name,age); +store c into ':OUTPATH:';\, + }, + { + # Left input from a union + 'num' => 5, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +d = filter d by age > 60; +e = join c by name, d by name using 'bloom' PARALLEL 3; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +d = filter d by age > 60; +e = join c by name, d by name; +store e into ':OUTPATH:';\, + }, + { + # Right input from a union and integer join key + 'num' => 6, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +c = filter c by age > 75; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +e = join d by age, c by age using 'bloom' PARALLEL 3; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +c = filter c by age > 75; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +e = join d by age, c by age; +store e into ':OUTPATH:';\, + }, + { + # Left input from a split + 'num' => 7, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +b = filter b by age > 75; +c = filter a by age > 50; +d = join a by age, b by age using 'bloom'; +store c into ':OUTPATH:.1'; +store d into ':OUTPATH:.2';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +b = filter b by age > 75; +c = filter a by age > 50; +d = join a by age, b by age; +store c into ':OUTPATH:.1'; +store d into ':OUTPATH:.2';\, + }, + { + # Right input from a split + 'num' => 8, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +c = filter a by age > 75; +d = filter a by name == 'nick miller'; +e = join b by age, c by age using 'bloom'; +store d into ':OUTPATH:.1'; +store e into ':OUTPATH:.2';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +c = filter a by age > 75; +d = filter a by name == 'nick miller'; +e = join b by age, c by age; +store d into ':OUTPATH:.1'; +store e into ':OUTPATH:.2';\, + }, + ] # end of tests + }, + { + 'name' => 'BloomJoin_Reduce', + 'execonly' => 'tez', + 'java_params' => ['-Dpig.bloomjoin.strategy=reduce'], + 'tests' => [ + { + # Tuple join key + 'num' => 1, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +--c = filter a by age < 20; +--d = filter b by age < 20; +e = join a by (name, age), b by (name, age) using 'bloom'; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +--c = filter a by age < 20; +--d = filter b by age < 20; +e = join a by (name, age), b by (name, age); +store e into ':OUTPATH:';\, + }, + { + # bytearray join key + 'num' => 2, + 'pig' => q\ +SET mapreduce.input.fileinputformat.split.maxsize '50000'; +SET pig.splitCombination false; +a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +c = filter a by age < 20; +d = filter b by age < 20; +e = join c by name, d by name using 'bloom'; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age, registration, contributions); +c = filter a by age < 20; +d = filter b by age < 20; +e = join c by name, d by name; +store e into ':OUTPATH:';\, + }, + { + # Left outer join and chararray join key + 'num' => 3, + 'pig' => q\ +SET mapreduce.input.fileinputformat.split.maxsize '50000'; +SET pig.splitCombination false; +a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions); +c = join a by name left, b by name using 'bloom'; +d = foreach c generate a::name, a::age, gpa, registration, contributions; +store d into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age, registration, contributions); +c = join a by name left, b by name; +d = foreach c generate a::name, a::age, gpa, registration, contributions; +store d into ':OUTPATH:';\, + }, + { + # Right outer join + 'num' => 4, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions); +c = join a by (name,age) right, b by (name,age) using 'bloom'; +store c into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name:chararray, age:int, registration, contributions); +c = join a by (name,age) right, b by (name,age); +store c into ':OUTPATH:';\, + }, + { + # Left input from a union + 'num' => 5, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +d = filter d by age > 60; +e = join c by name, d by name using 'bloom' PARALLEL 3; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +d = filter d by age > 60; +e = join c by name, d by name; +store e into ':OUTPATH:';\, + }, + { + # Right input from a union and integer join key + 'num' => 6, + 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +c = filter c by age > 75; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +e = join d by age, c by age using 'bloom' PARALLEL 3; +store e into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa); +b = load ':INPATH:/singlefile/studentcolon10k' using PigStorage(':') as (name:chararray, age:int, gpa); +c = union a, b; +c = filter c by age > 75; +d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); +e = join d by age, c by age; +store e into ':OUTPATH:';\, + }, + { + # Left input from a split + 'num' => 7, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +b = filter b by age > 75; +c = filter a by age > 50; +d = join a by age, b by age using 'bloom'; +store c into ':OUTPATH:.1'; +store d into ':OUTPATH:.2';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +b = filter b by age > 75; +c = filter a by age > 50; +d = join a by age, b by age; +store c into ':OUTPATH:.1'; +store d into ':OUTPATH:.2';\, + }, + { + # Right input from a split + 'num' => 8, + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +c = filter a by age > 75; +d = filter a by name == 'nick miller'; +e = join b by age, c by age using 'bloom'; +store d into ':OUTPATH:.1'; +store e into ':OUTPATH:.2';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age:int, gpa); +b = load ':INPATH:/singlefile/voternulltab10k' as (name, age:int, registration, contributions); +c = filter a by age > 75; +d = filter a by name == 'nick miller'; +e = join b by age, c by age; +store d into ':OUTPATH:.1'; +store e into ':OUTPATH:.2';\, + }, + ] # end of tests + } + ] # end of groups +}; \ No newline at end of file Modified: pig/trunk/test/e2e/pig/tests/multiquery.conf URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/multiquery.conf?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/test/e2e/pig/tests/multiquery.conf (original) +++ pig/trunk/test/e2e/pig/tests/multiquery.conf Thu Jan 26 17:40:35 2017 @@ -906,7 +906,38 @@ m = UNION e, i, j, n; n = JOIN a BY name, m BY name; store n into ':OUTPATH:';\, - } + }, + { + # Self join bloom left outer + 'num' => 12, + 'execonly' => 'tez', + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = filter a by gpa >= 3.9; +c = filter a by gpa > 3; +d = join b by name left outer, c by name using 'bloom'; +store d into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = filter a by gpa >= 3.9; +c = filter a by gpa > 3; +d = join b by name left outer, c by name; +store d into ':OUTPATH:';\, + }, + { + # Self join bloom left outer with strategy as reduce + 'num' => 13, + 'execonly' => 'tez', + 'java_params' => ['-Dpig.bloomjoin.strategy=reduce'], + 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = filter a by gpa >= 3.9; +c = filter a by gpa > 3; +d = join b by name left outer, c by name using 'bloom'; +store d into ':OUTPATH:';\, + 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age, gpa); +b = filter a by gpa >= 3.9; +c = filter a by gpa > 3; +d = join b by name left outer, c by name; +store d into ':OUTPATH:';\, + }, ] # end of tests }, Modified: pig/trunk/test/e2e/pig/tests/orc.conf URL: http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/orc.conf?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/test/e2e/pig/tests/orc.conf (original) +++ pig/trunk/test/e2e/pig/tests/orc.conf Thu Jan 26 17:40:35 2017 @@ -1,3 +1,21 @@ +#!/usr/bin/env perl +############################################################################ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +############################################################################### $cfg = { 'driver' => 'Pig', 'nummachines' => 5, Modified: pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java (original) +++ pig/trunk/test/org/apache/pig/test/TestEmptyInputDir.java Thu Jan 26 17:40:35 2017 @@ -246,6 +246,66 @@ public class TestEmptyInputDir { } } + @Test + public void testBloomJoin() throws Exception { + PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); + w.println("A = load '" + INPUT_FILE + "' as (x:int);"); + w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); + w.println("C = join B by $0, A by $0 using 'bloom';"); + w.println("D = join A by $0, B by $0 using 'bloom';"); + w.println("store C into '" + OUTPUT_FILE + "';"); + w.println("store D into 'output1';"); + w.close(); + + try { + String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; + PigStats stats = PigRunner.run(args, null); + + assertTrue(stats.isSuccessful()); + assertEquals(0, stats.getNumberRecords(OUTPUT_FILE)); + assertEquals(0, stats.getNumberRecords("output1")); + assertEmptyOutputFile(); + } finally { + new File(PIG_FILE).delete(); + Util.deleteFile(cluster, OUTPUT_FILE); + Util.deleteFile(cluster, "output1"); + } + } + + @Test + public void testBloomJoinOuter() throws Exception { + PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE)); + w.println("A = load '" + INPUT_FILE + "' as (x:int);"); + w.println("B = load '" + EMPTY_DIR + "' as (x:int);"); + w.println("C = join B by $0 left outer, A by $0 using 'bloom';"); + w.println("D = join A by $0 left outer, B by $0 using 'bloom';"); + w.println("E = join B by $0 right outer, A by $0 using 'bloom';"); + w.println("F = join A by $0 right outer, B by $0 using 'bloom';"); + w.println("store C into '" + OUTPUT_FILE + "';"); + w.println("store D into 'output1';"); + w.println("store E into 'output2';"); + w.println("store F into 'output3';"); + w.close(); + + try { + String[] args = { "-x", cluster.getExecType().name(), PIG_FILE, }; + PigStats stats = PigRunner.run(args, null); + + assertTrue(stats.isSuccessful()); + assertEquals(0, stats.getNumberRecords(OUTPUT_FILE)); + assertEquals(2, stats.getNumberRecords("output1")); + assertEquals(2, stats.getNumberRecords("output2")); + assertEquals(0, stats.getNumberRecords("output3")); + assertEmptyOutputFile(); + } finally { + new File(PIG_FILE).delete(); + Util.deleteFile(cluster, OUTPUT_FILE); + Util.deleteFile(cluster, "output1"); + Util.deleteFile(cluster, "output2"); + Util.deleteFile(cluster, "output3"); + } + } + private void assertEmptyOutputFile() throws IllegalArgumentException, IOException { FileSystem fs = cluster.getFileSystem(); FileStatus status = fs.getFileStatus(new Path(OUTPUT_FILE)); Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,91 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50, +Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47, +Tez vertex scope-46 -> Tez vertex scope-49, +Tez vertex scope-47 -> Tez vertex scope-49, +Tez vertex scope-49 + +Tez vertex scope-48 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50] +| | +| Project[bytearray][0] - scope-31 +| +|---c: New For Each(false,false)[bag] - scope-20 + | | + | Project[bytearray][0] - scope-15 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][1] - scope-17 + | + |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14 +Tez vertex scope-50 +# Combine plan on edge <scope-48> +Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50 +| | +| Project[int][0] - scope-54 +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +# Plan on vertex +POValueOutputTez - scope-52 -> [scope-46, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-51 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-27 +| +|---b: New For Each(false,false)[bag] - scope-6 + | | + | Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-4 + | | + | |---Project[bytearray][1] - scope-3 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-47 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-29 +| +|---a: New For Each(false,false)[bag] - scope-13 + | | + | Project[bytearray][0] - scope-8 + | | + | Cast[int] - scope-11 + | | + | |---Project[bytearray][1] - scope-10 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7 +Tez vertex scope-49 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45 +| +|---e: New For Each(false,false,false,false)[bag] - scope-44 + | | + | Project[bytearray][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | | + | Project[int][5] - scope-42 + | + |---d: New For Each(true,true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-32 + | | + | Project[bag][2] - scope-33 + | | + | Project[bag][3] - scope-34 + | + |---d: Package(Packager)[tuple]{bytearray} - scope-25 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,91 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50, +Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47, +Tez vertex scope-46 -> Tez vertex scope-49, +Tez vertex scope-47 -> Tez vertex scope-49, +Tez vertex scope-49 + +Tez vertex scope-48 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50] +| | +| Project[bytearray][0] - scope-31 +| +|---c: New For Each(false,false)[bag] - scope-20 + | | + | Project[bytearray][0] - scope-15 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][1] - scope-17 + | + |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14 +Tez vertex scope-50 +# Combine plan on edge <scope-48> +Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50 +| | +| Project[int][0] - scope-54 +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +# Plan on vertex +POValueOutputTez - scope-52 -> [scope-46, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-51 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-27 +| +|---b: New For Each(false,false)[bag] - scope-6 + | | + | Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-4 + | | + | |---Project[bytearray][1] - scope-3 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-47 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-29 +| +|---a: New For Each(false,false)[bag] - scope-13 + | | + | Project[bytearray][0] - scope-8 + | | + | Cast[int] - scope-11 + | | + | |---Project[bytearray][1] - scope-10 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7 +Tez vertex scope-49 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45 +| +|---e: New For Each(false,false,false,false)[bag] - scope-44 + | | + | Project[bytearray][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | | + | Project[int][5] - scope-42 + | + |---d: New For Each(true,true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-32 + | | + | Project[bag][2] - scope-33 + | | + | Project[bag][3] - scope-34 + | + |---d: Package(Packager)[tuple]{bytearray} - scope-25 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,83 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42, +Tez vertex scope-42 -> Tez vertex scope-40, +Tez vertex scope-40 -> Tez vertex scope-41, +Tez vertex scope-41 + +Tez vertex scope-39 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42] +| | +| Project[chararray][0] - scope-21 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[chararray] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-42 +# Combine plan on edge <scope-39> +Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42 +| | +| Project[int][0] - scope-46 +| +|---Package(BloomPackager)[tuple]{int} - scope-45 +# Plan on vertex +POValueOutputTez - scope-44 -> [scope-40] +| +|---Package(BloomPackager)[tuple]{int} - scope-43 +Tez vertex scope-40 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41 +| | +| Project[chararray][0] - scope-23 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[chararray] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-41 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38 +| +|---e: New For Each(false,false,false)[bag] - scope-37 + | | + | Project[chararray][0] - scope-31 + | | + | Project[int][1] - scope-33 + | | + | Project[int][3] - scope-35 + | + |---d: New For Each(true,true)[tuple] - scope-30 + | | + | Project[bag][1] - scope-24 + | | + | POBinCond[bag] - scope-29 + | | + | |---Project[bag][2] - scope-25 + | | + | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27 + | | | + | | |---Project[bag][2] - scope-26 + | | + | |---Constant({(,)}) - scope-28 + | + |---d: Package(Packager)[tuple]{chararray} - scope-19 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,83 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42, +Tez vertex scope-42 -> Tez vertex scope-40, +Tez vertex scope-40 -> Tez vertex scope-41, +Tez vertex scope-41 + +Tez vertex scope-39 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42] +| | +| Project[chararray][0] - scope-21 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[chararray] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-42 +# Combine plan on edge <scope-39> +Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42 +| | +| Project[int][0] - scope-46 +| +|---Package(BloomPackager)[tuple]{int} - scope-45 +# Plan on vertex +POValueOutputTez - scope-44 -> [scope-40] +| +|---Package(BloomPackager)[tuple]{int} - scope-43 +Tez vertex scope-40 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41 +| | +| Project[chararray][0] - scope-23 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[chararray] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-41 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38 +| +|---e: New For Each(false,false,false)[bag] - scope-37 + | | + | Project[chararray][0] - scope-31 + | | + | Project[int][1] - scope-33 + | | + | Project[int][3] - scope-35 + | + |---d: New For Each(true,true)[tuple] - scope-30 + | | + | Project[bag][1] - scope-24 + | | + | POBinCond[bag] - scope-29 + | | + | |---Project[bag][2] - scope-25 + | | + | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27 + | | | + | | |---Project[bag][2] - scope-26 + | | + | |---Constant({(,)}) - scope-28 + | + |---d: Package(Packager)[tuple]{chararray} - scope-19 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,105 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex group scope-59 -> Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-44, +Tez vertex scope-44 -> Tez vertex scope-51, +Tez vertex group scope-58 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-45 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-61 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-46 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-63 +| +|---c: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52 +# No plan on vertex group +Tez vertex scope-52 +# Combine plan on edge <scope-45> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Combine plan on edge <scope-46> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-44] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-44 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51 +# No plan on vertex group +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][0] - scope-36 + | | + | Project[int][1] - scope-38 + | | + | Project[int][3] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,105 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex group scope-59 -> Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-44, +Tez vertex scope-44 -> Tez vertex scope-51, +Tez vertex group scope-58 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-45 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-61 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-46 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-63 +| +|---c: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52 +# No plan on vertex group +Tez vertex scope-52 +# Combine plan on edge <scope-45> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Combine plan on edge <scope-46> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-44] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-44 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51 +# No plan on vertex group +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][0] - scope-36 + | | + | Project[int][1] - scope-38 + | | + | Project[int][3] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,97 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-44 -> Tez vertex scope-46, +Tez vertex scope-45 -> Tez vertex scope-46, +Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-46, +Tez vertex scope-46 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-44 +# Plan on vertex +POValueOutputTez - scope-48 -> [scope-46] +| +|---b: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-45 +# Plan on vertex +POValueOutputTez - scope-49 -> [scope-46] +| +|---c: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-50 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-32 +| +|---a: New For Each(false,false)[bag] - scope-24 + | | + | Cast[int] - scope-19 + | | + | |---Project[bytearray][0] - scope-18 + | | + | Cast[int] - scope-22 + | | + | |---Project[bytearray][1] - scope-21 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17 +Tez vertex scope-52 +# Combine plan on edge <scope-50> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-46] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45] +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,97 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-44 -> Tez vertex scope-46, +Tez vertex scope-45 -> Tez vertex scope-46, +Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-46, +Tez vertex scope-46 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-44 +# Plan on vertex +POValueOutputTez - scope-48 -> [scope-46] +| +|---b: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-45 +# Plan on vertex +POValueOutputTez - scope-49 -> [scope-46] +| +|---c: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-50 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-32 +| +|---a: New For Each(false,false)[bag] - scope-24 + | | + | Cast[int] - scope-19 + | | + | |---Project[bytearray][0] - scope-18 + | | + | Cast[int] - scope-22 + | | + | |---Project[bytearray][1] - scope-21 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17 +Tez vertex scope-52 +# Combine plan on edge <scope-50> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-46] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45] +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,107 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-60 -> Tez vertex scope-61,Tez vertex scope-62, +Tez vertex scope-62 -> Tez vertex scope-54,Tez vertex scope-58, +Tez vertex scope-54 -> Tez vertex scope-58,Tez vertex scope-61, +Tez vertex scope-58 -> Tez vertex scope-61, +Tez vertex scope-61 + +Tez vertex scope-60 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-38 -> [ scope-61, scope-62] +| | +| Project[int][0] - scope-39 +| +|---b: New For Each(false,false)[bag] - scope-28 + | | + | Cast[int] - scope-23 + | | + | |---Project[bytearray][0] - scope-22 + | | + | Cast[int] - scope-26 + | | + | |---Project[bytearray][1] - scope-25 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-21 +Tez vertex scope-62 +# Combine plan on edge <scope-60> +Local Rearrange[tuple]{int}(false) - scope-67 -> scope-62 +| | +| Project[int][0] - scope-66 +| +|---Package(BloomPackager)[tuple]{int} - scope-65 +# Plan on vertex +POValueOutputTez - scope-64 -> [scope-54, scope-58] +| +|---Package(BloomPackager)[tuple]{int} - scope-63 +Tez vertex scope-54 +# Plan on vertex +a: Split - scope-68 +| | +| d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-62 -> scope-61 +| | | +| | Project[int][0] - scope-35 +| | +| |---a1: Filter[bag] - scope-11 +| | | +| | Equal To[boolean] - scope-14 +| | | +| | |---Project[int][0] - scope-12 +| | | +| | |---Constant(3) - scope-13 +| | +| POValueOutputTez - scope-55 -> [scope-58] +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-58 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-36 <- scope-62 -> scope-61 +| | +| Project[int][0] - scope-37 +| +|---a2: Filter[bag] - scope-17 + | | + | Equal To[boolean] - scope-20 + | | + | |---Project[int][0] - scope-18 + | | + | |---Constant(4) - scope-19 + | + |---POValueInputTez - scope-59 <- scope-54 +Tez vertex scope-61 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-53 +| +|---e: New For Each(false,false,false,false)[bag] - scope-52 + | | + | Project[int][0] - scope-44 + | | + | Project[int][1] - scope-46 + | | + | Project[int][3] - scope-48 + | | + | Project[int][5] - scope-50 + | + |---d: New For Each(true,true,true)[tuple] - scope-43 + | | + | Project[bag][1] - scope-40 + | | + | Project[bag][2] - scope-41 + | | + | Project[bag][3] - scope-42 + | + |---d: Package(Packager)[tuple]{int} - scope-33 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,107 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-60 -> Tez vertex scope-61,Tez vertex scope-62, +Tez vertex scope-62 -> Tez vertex scope-54,Tez vertex scope-58, +Tez vertex scope-54 -> Tez vertex scope-58,Tez vertex scope-61, +Tez vertex scope-58 -> Tez vertex scope-61, +Tez vertex scope-61 + +Tez vertex scope-60 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-38 -> [ scope-61, scope-62] +| | +| Project[int][0] - scope-39 +| +|---b: New For Each(false,false)[bag] - scope-28 + | | + | Cast[int] - scope-23 + | | + | |---Project[bytearray][0] - scope-22 + | | + | Cast[int] - scope-26 + | | + | |---Project[bytearray][1] - scope-25 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-21 +Tez vertex scope-62 +# Combine plan on edge <scope-60> +Local Rearrange[tuple]{int}(false) - scope-67 -> scope-62 +| | +| Project[int][0] - scope-66 +| +|---Package(BloomPackager)[tuple]{int} - scope-65 +# Plan on vertex +POValueOutputTez - scope-64 -> [scope-54, scope-58] +| +|---Package(BloomPackager)[tuple]{int} - scope-63 +Tez vertex scope-54 +# Plan on vertex +a: Split - scope-68 +| | +| d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-62 -> scope-61 +| | | +| | Project[int][0] - scope-35 +| | +| |---a1: Filter[bag] - scope-11 +| | | +| | Equal To[boolean] - scope-14 +| | | +| | |---Project[int][0] - scope-12 +| | | +| | |---Constant(3) - scope-13 +| | +| POValueOutputTez - scope-55 -> [scope-58] +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-58 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-36 <- scope-62 -> scope-61 +| | +| Project[int][0] - scope-37 +| +|---a2: Filter[bag] - scope-17 + | | + | Equal To[boolean] - scope-20 + | | + | |---Project[int][0] - scope-18 + | | + | |---Constant(4) - scope-19 + | + |---POValueInputTez - scope-59 <- scope-54 +Tez vertex scope-61 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-53 +| +|---e: New For Each(false,false,false,false)[bag] - scope-52 + | | + | Project[int][0] - scope-44 + | | + | Project[int][1] - scope-46 + | | + | Project[int][3] - scope-48 + | | + | Project[int][5] - scope-50 + | + |---d: New For Each(true,true,true)[tuple] - scope-43 + | | + | Project[bag][1] - scope-40 + | | + | Project[bag][2] - scope-41 + | | + | Project[bag][3] - scope-42 + | + |---d: Package(Packager)[tuple]{int} - scope-33 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,95 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-49 -> Tez vertex scope-56,Tez vertex scope-57, +Tez vertex scope-57 -> Tez vertex scope-53, +Tez vertex scope-53 -> Tez vertex scope-56, +Tez vertex scope-56 + +Tez vertex scope-49 +# Plan on vertex +a: Split - scope-63 +| | +| a2: Store(file:///tmp/pigoutput/a2:org.apache.pig.builtin.PigStorage) - scope-15 +| | +| |---a2: Filter[bag] - scope-11 +| | | +| | Equal To[boolean] - scope-14 +| | | +| | |---Project[int][0] - scope-12 +| | | +| | |---Constant(4) - scope-13 +| | +| d: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-56, scope-57] +| | | +| | Project[int][0] - scope-37 +| | +| |---a1: Filter[bag] - scope-26 +| | | +| | Equal To[boolean] - scope-29 +| | | +| | |---Project[int][0] - scope-27 +| | | +| | |---Constant(3) - scope-28 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-57 +# Combine plan on edge <scope-49> +Local Rearrange[tuple]{int}(false) - scope-62 -> scope-57 +| | +| Project[int][0] - scope-61 +| +|---Package(BloomPackager)[tuple]{int} - scope-60 +# Plan on vertex +POValueOutputTez - scope-59 -> [scope-53] +| +|---Package(BloomPackager)[tuple]{int} - scope-58 +Tez vertex scope-53 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-57 -> scope-56 +| | +| Project[int][0] - scope-35 +| +|---b: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex scope-56 +# Plan on vertex +e: Store(file:///tmp/pigoutput/e:org.apache.pig.builtin.PigStorage) - scope-48 +| +|---e: New For Each(false,false,false)[bag] - scope-47 + | | + | Project[int][2] - scope-41 + | | + | Project[int][3] - scope-43 + | | + | Project[int][1] - scope-45 + | + |---d: New For Each(true,true)[tuple] - scope-40 + | | + | Project[bag][1] - scope-38 + | | + | Project[bag][2] - scope-39 + | + |---d: Package(Packager)[tuple]{int} - scope-33 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,95 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-49 -> Tez vertex scope-56,Tez vertex scope-57, +Tez vertex scope-57 -> Tez vertex scope-53, +Tez vertex scope-53 -> Tez vertex scope-56, +Tez vertex scope-56 + +Tez vertex scope-49 +# Plan on vertex +a: Split - scope-63 +| | +| a2: Store(file:///tmp/pigoutput/a2:org.apache.pig.builtin.PigStorage) - scope-15 +| | +| |---a2: Filter[bag] - scope-11 +| | | +| | Equal To[boolean] - scope-14 +| | | +| | |---Project[int][0] - scope-12 +| | | +| | |---Constant(4) - scope-13 +| | +| d: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-56, scope-57] +| | | +| | Project[int][0] - scope-37 +| | +| |---a1: Filter[bag] - scope-26 +| | | +| | Equal To[boolean] - scope-29 +| | | +| | |---Project[int][0] - scope-27 +| | | +| | |---Constant(3) - scope-28 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-57 +# Combine plan on edge <scope-49> +Local Rearrange[tuple]{int}(false) - scope-62 -> scope-57 +| | +| Project[int][0] - scope-61 +| +|---Package(BloomPackager)[tuple]{int} - scope-60 +# Plan on vertex +POValueOutputTez - scope-59 -> [scope-53] +| +|---Package(BloomPackager)[tuple]{int} - scope-58 +Tez vertex scope-53 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-57 -> scope-56 +| | +| Project[int][0] - scope-35 +| +|---b: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex scope-56 +# Plan on vertex +e: Store(file:///tmp/pigoutput/e:org.apache.pig.builtin.PigStorage) - scope-48 +| +|---e: New For Each(false,false,false)[bag] - scope-47 + | | + | Project[int][2] - scope-41 + | | + | Project[int][3] - scope-43 + | | + | Project[int][1] - scope-45 + | + |---d: New For Each(true,true)[tuple] - scope-40 + | | + | Project[bag][1] - scope-38 + | | + | Project[bag][2] - scope-39 + | + |---d: Package(Packager)[tuple]{int} - scope-33 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,95 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-43 -> Tez vertex scope-45,Tez vertex scope-47,Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-45,Tez vertex scope-47, +Tez vertex scope-45 -> Tez vertex scope-51, +Tez vertex scope-47 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-43 +# Plan on vertex +a: Split - scope-58 +| | +| e: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-51, scope-52] +| | | +| | Project[int][0] - scope-37 +| | +| |---d: Filter[bag] - scope-23 +| | | +| | Greater Than[boolean] - scope-26 +| | | +| | |---Project[int][0] - scope-24 +| | | +| | |---Constant(10) - scope-25 +| | +| POValueOutputTez - scope-44 -> [scope-45, scope-47] +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-52 +# Combine plan on edge <scope-43> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-45, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-45 +# Plan on vertex +e: BloomFilter Rearrange[tuple]{int}(false) - scope-32 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-33 +| +|---b: Filter[bag] - scope-11 + | | + | Less Than[boolean] - scope-14 + | | + | |---Project[int][0] - scope-12 + | | + | |---Constant(5) - scope-13 + | + |---POValueInputTez - scope-46 <- scope-43 +Tez vertex scope-47 +# Plan on vertex +e: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-35 +| +|---c: Filter[bag] - scope-17 + | | + | Equal To[boolean] - scope-20 + | | + | |---Project[int][0] - scope-18 + | | + | |---Constant(10) - scope-19 + | + |---POValueInputTez - scope-48 <- scope-43 +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-42 +| +|---e: New For Each(true,true,true)[tuple] - scope-41 + | | + | Project[bag][1] - scope-38 + | | + | Project[bag][2] - scope-39 + | | + | Project[bag][3] - scope-40 + | + |---e: Package(Packager)[tuple]{int} - scope-31 Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld?rev=1780431&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld Thu Jan 26 17:40:35 2017 @@ -0,0 +1,95 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-43 -> Tez vertex scope-45,Tez vertex scope-47,Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-45,Tez vertex scope-47, +Tez vertex scope-45 -> Tez vertex scope-51, +Tez vertex scope-47 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-43 +# Plan on vertex +a: Split - scope-58 +| | +| e: BuildBloom Rearrange[tuple]{int}(false) - scope-36 -> [ scope-51, scope-52] +| | | +| | Project[int][0] - scope-37 +| | +| |---d: Filter[bag] - scope-23 +| | | +| | Greater Than[boolean] - scope-26 +| | | +| | |---Project[int][0] - scope-24 +| | | +| | |---Constant(10) - scope-25 +| | +| POValueOutputTez - scope-44 -> [scope-45, scope-47] +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-52 +# Combine plan on edge <scope-43> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-45, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-45 +# Plan on vertex +e: BloomFilter Rearrange[tuple]{int}(false) - scope-32 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-33 +| +|---b: Filter[bag] - scope-11 + | | + | Less Than[boolean] - scope-14 + | | + | |---Project[int][0] - scope-12 + | | + | |---Constant(5) - scope-13 + | + |---POValueInputTez - scope-46 <- scope-43 +Tez vertex scope-47 +# Plan on vertex +e: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-35 +| +|---c: Filter[bag] - scope-17 + | | + | Equal To[boolean] - scope-20 + | | + | |---Project[int][0] - scope-18 + | | + | |---Constant(10) - scope-19 + | + |---POValueInputTez - scope-48 <- scope-43 +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-42 +| +|---e: New For Each(true,true,true)[tuple] - scope-41 + | | + | Project[bag][1] - scope-38 + | | + | Project[bag][2] - scope-39 + | | + | Project[bag][3] - scope-40 + | + |---e: Package(Packager)[tuple]{int} - scope-31 Modified: pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java?rev=1780431&r1=1780430&r2=1780431&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java (original) +++ pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Thu Jan 26 17:40:35 2017 @@ -88,6 +88,7 @@ public class TestTezCompiler { pc.getProperties().remove(PigConfiguration.PIG_OPT_MULTIQUERY); pc.getProperties().remove(PigConfiguration.PIG_TEZ_OPT_UNION); pc.getProperties().remove(PigConfiguration.PIG_EXEC_NO_SECONDARY_KEY); + pc.getProperties().remove(PigConfiguration.PIG_BLOOMJOIN_STRATEGY); pigServer = new PigServer(pc); } @@ -178,6 +179,125 @@ public class TestTezCompiler { } @Test + public void testBloomJoin() throws Exception { + String query = + "a = load 'file:///tmp/input1' as (x, y:int);" + + "b = load 'file:///tmp/input2' as (x, z:int);" + + "c = load 'file:///tmp/input2' as (x, w:int);" + + "d = join b by x, a by x, c by x using 'bloom';" + + "e = foreach d generate a::x as x, y, z, w;" + + "store e into 'file:///tmp/pigoutput';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld"); + } + + @Test + public void testBloomJoinLeftOuter() throws Exception { + String query = + "a = load 'file:///tmp/input1' as (x:chararray, y:int);" + + "b = load 'file:///tmp/input2' as (x:chararray, z:int);" + + "d = join a by x left, b by x using 'bloom';" + + "e = foreach d generate a::x as x, y, z;" + + "store e into 'file:///tmp/pigoutput';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld"); + } + + @Test + public void testBloomJoinUnion() throws Exception { + // Left input from a union + String query = + "a = load 'file:///tmp/input1' as (x:int, y:int);" + + "b = load 'file:///tmp/input2' as (x:int, z:int);" + + "c = load 'file:///tmp/input3' as (x:int, z:int);" + + "b = union b, c;" + + "d = join a by x, b by x using 'bloom';" + + "e = foreach d generate a::x as x, y, z;" + + "store e into 'file:///tmp/pigoutput';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld"); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, null); + + resetScope(); + // Right input from a union + query = + "a = load 'file:///tmp/input1' as (x:int, y:int);" + + "b = load 'file:///tmp/input2' as (x:int, z:int);" + + "c = load 'file:///tmp/input3' as (x:int, z:int);" + + "b = union b, c;" + + "d = join b by x, a by x using 'bloom';" + + "e = foreach d generate a::x as x, y, z;" + + "store e into 'file:///tmp/pigoutput';"; + + // Needs shared edges and PIG-3856 to be a more optimial plan + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld"); + } + + @Test + public void testBloomJoinSplit() throws Exception { + // Left input from a split + String query = + "a = load 'file:///tmp/input1' as (x:int, y:int);" + + "b = load 'file:///tmp/input2' as (x:int, z:int);" + + "a1 = filter a by x == 3;" + + "a2 = filter a by x == 4;" + + "d = join a1 by x, a2 by x, b by x using 'bloom';" + + "e = foreach d generate a1::x as x, a1::y as y1, a2::y as y2, z;" + + "store e into 'file:///tmp/pigoutput';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld"); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, null); + + resetScope(); + // Right input from a split + query = + "a = load 'file:///tmp/input1' as (x:int, y:int);" + + "b = load 'file:///tmp/input2' as (x:int, z:int);" + + "a1 = filter a by x == 3;" + + "a2 = filter a by x == 4;" + + "d = join b by x, a1 by x using 'bloom';" + + "e = foreach d generate a1::x as x, y, z;" + + "store a2 into 'file:///tmp/pigoutput/a2';" + + "store e into 'file:///tmp/pigoutput/e';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-6-KeyToReducer.gld"); + } + + @Test + public void testBloomSelfJoin() throws Exception { + String query = + "a = load 'file:///tmp/input1' as (x:int, y:int);" + + "b = filter a by x < 5;" + + "c = filter a by x == 10;" + + "d = filter a by x > 10;" + + "e = join b by x, c by x, d by x using 'bloom';" + + "store e into 'file:///tmp/pigoutput';"; + + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7.gld"); + resetScope(); + setProperty(PigConfiguration.PIG_BLOOMJOIN_STRATEGY, "reduce"); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-7-KeyToReducer.gld"); + } + + @Test public void testSelfJoin() throws Exception { String query = "a = load 'file:///tmp/input1' as (x:int, y:int);" +
