Modified: pig/branches/spark/test/org/apache/pig/test/TestPlanGeneration.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestPlanGeneration.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestPlanGeneration.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestPlanGeneration.java Fri Feb 24 08:19:42 2017 @@ -20,6 +20,9 @@ import static org.junit.Assert.assertNot import static org.junit.Assert.assertNull; import java.io.IOException; +import java.util.List; + +import junit.framework.Assert; import org.apache.hadoop.mapreduce.Job; import org.apache.pig.ExecType; @@ -36,15 +39,22 @@ import org.apache.pig.backend.hadoop.exe import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POSort; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; import org.apache.pig.builtin.PigStorage; +import org.apache.pig.builtin.mock.Storage; +import org.apache.pig.builtin.mock.Storage.Data; +import static org.apache.pig.builtin.mock.Storage.*; import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.Utils; import org.apache.pig.newplan.Operator; +import org.apache.pig.newplan.logical.expression.CastExpression; import org.apache.pig.newplan.logical.expression.LogicalExpression; +import org.apache.pig.newplan.logical.expression.ProjectExpression; import org.apache.pig.newplan.logical.relational.LOCogroup; import org.apache.pig.newplan.logical.relational.LOFilter; import org.apache.pig.newplan.logical.relational.LOForEach; +import org.apache.pig.newplan.logical.relational.LOGenerate; import org.apache.pig.newplan.logical.relational.LOLoad; import org.apache.pig.newplan.logical.relational.LOSort; import org.apache.pig.newplan.logical.relational.LOStore; @@ -61,8 +71,8 @@ public class TestPlanGeneration { private static PigServer ps; @BeforeClass - public static void setUp() throws ExecException { - ps = new PigServer(ExecType.LOCAL); + public static void setUp() throws Exception { + ps = new PigServer(Util.getLocalTestMode()); pc = ps.getPigContext(); pc.connect(); } @@ -311,4 +321,218 @@ public class TestPlanGeneration { assertNotNull(((PartitionedLoader)loLoad.getLoadFunc()).getPartFilter()); assertEquals("b", loStore.getAlias()); } + + @Test + // See PIG-2315 + public void testForEachWithCast1() throws Exception { + // A cast ForEach is inserted to take care of the user schema + String query = "A = load 'foo' as (a, b:int);\n" + + "B = foreach A generate a as a0:chararray, b as b:int;\n" + + "store B into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach1 = (LOForEach)lp.getSuccessors(loLoad).get(0); + LOForEach loForEach2 = (LOForEach)lp.getSuccessors(loForEach1).get(0); + // before a0 is typecasted to chararray, it should be bytearray + assertEquals(DataType.BYTEARRAY, loForEach1.getSchema().getField(0).type); + // type of b should stay as int + assertEquals(DataType.INTEGER, loForEach1.getSchema().getField(1).type); + assertEquals("B", loForEach2.getAlias()); + LOGenerate generate = (LOGenerate)loForEach2.getInnerPlan().getSinks().get(0); + CastExpression cast = (CastExpression)generate.getOutputPlans().get(0).getSources().get(0); + Assert.assertTrue(cast.getType()==DataType.CHARARRAY); + assertEquals(loForEach2.getSchema().getField(0).alias, "a0"); + Assert.assertTrue(lp.getSuccessors(loForEach2).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testForEachWithCast2() throws Exception { + // No additional cast ForEach will be inserted, but schema should match + String query = "A = load 'foo' as (a, b);\n" + + "B = foreach A generate (chararray)a as a0:chararray;\n" + + "store B into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach = (LOForEach)lp.getSuccessors(loLoad).get(0); + assertEquals(loForEach.getSchema().getField(0).alias, "a0"); + Assert.assertTrue(lp.getSuccessors(loForEach).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testForEachWithCast3() throws Exception { + // No additional cast ForEach will be inserted, but schema should match + String query = "A = load 'foo' as (a, b);\n" + + "B = foreach A generate (chararray)a as a0:int;\n" + + "store B into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach1 = (LOForEach)lp.getSuccessors(loLoad).get(0); + LOGenerate generate1 = (LOGenerate)loForEach1.getInnerPlan().getSinks().get(0); + CastExpression cast1 = (CastExpression)generate1.getOutputPlans().get(0).getSources().get(0); + Assert.assertTrue(cast1.getType()==DataType.CHARARRAY); + //before a0 is typecasted to int, it should be chararray + Assert.assertEquals(DataType.CHARARRAY, loForEach1.getSchema().getField(0).type); + LOForEach loForEach2 = (LOForEach)lp.getSuccessors(loForEach1).get(0); + LOGenerate generate2 = (LOGenerate)loForEach2.getInnerPlan().getSinks().get(0); + CastExpression cast2 = (CastExpression)generate2.getOutputPlans().get(0).getSources().get(0); + Assert.assertTrue(cast2.getType()==DataType.INTEGER); + Assert.assertTrue(lp.getSuccessors(loForEach2).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testForEachWithCast4() throws Exception { + // No additional cast ForEach will be inserted + String query = "a = load 'foo' as (nb1:bag{}, nb2:chararray);\n" + + "b = foreach a generate flatten(nb1) as (year, name), nb2;\n" + + "store b into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach = (LOForEach)lp.getSuccessors(loLoad).get(0); + Assert.assertTrue(lp.getSuccessors(loForEach).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testForEachWithCast5() throws Exception { + // cast ForEach will be inserted + String query = "a = load 'foo' as (nb1:bag{}, nb2:chararray);\n" + + "b = foreach a generate flatten(nb1) as (year, name:chararray), nb2 as nb2:chararray;\n" + + "store b into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach1 = (LOForEach)lp.getSuccessors(loLoad).get(0); + // flattened "name" field should be bytearray before typecasted to chararray + Assert.assertEquals(DataType.BYTEARRAY, loForEach1.getSchema().getField(1).type); + LOForEach loForEach2 = (LOForEach)lp.getSuccessors(loForEach1).get(0); + LOGenerate generate = (LOGenerate)loForEach2.getInnerPlan().getSinks().get(0); + Assert.assertTrue(generate.getOutputPlans().get(0).getSources().get(0) instanceof ProjectExpression); + CastExpression cast = (CastExpression)generate.getOutputPlans().get(1).getSources().get(0); + Assert.assertTrue(cast.getType()==DataType.CHARARRAY); + Assert.assertTrue(generate.getOutputPlans().get(2).getSources().get(0) instanceof ProjectExpression); + } + + @Test + // See PIG-2315 + public void testForEachWithCast6() throws Exception { + // no cast ForEach will be inserted + String query = "a = load 'foo' as (nb1:bag{(year,name)}, nb2);\n" + + "b = foreach a generate flatten(nb1) as (year, name2), nb2;\n" + + "store b into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach = (LOForEach)lp.getSuccessors(loLoad).get(0); + assertEquals(loForEach.getSchema().getField(1).alias, "name2"); + Assert.assertTrue(lp.getSuccessors(loForEach).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testForEachWithCast7() throws Exception { + // no cast ForEach will be inserted, since we don't know the size of outputs + // in first inner plan + String query = "a = load 'foo' as (nb1:bag{}, nb2:bag{});\n" + + "b = foreach a generate flatten(nb1), flatten(nb2) as (year, name);\n" + + "store b into 'output';"; + + LogicalPlan lp = Util.parse(query, pc); + Util.optimizeNewLP(lp); + + LOLoad loLoad = (LOLoad)lp.getSources().get(0); + LOForEach loForEach = (LOForEach)lp.getSuccessors(loLoad).get(0); + Assert.assertTrue(lp.getSuccessors(loForEach).get(0) instanceof LOStore); + } + + @Test + // See PIG-2315 + public void testAsType1() throws Exception { + Data data = Storage.resetData(ps); + data.set("input", tuple(0.1), tuple(1.2), tuple(2.3)); + + String query = + "A = load 'input' USING mock.Storage() as (a1:double);\n" + + "B = FOREACH A GENERATE a1 as (a2:int);\n" + + "store B into 'out' using mock.Storage;" ; + + Util.registerMultiLineQuery(ps, query); + List<Tuple> list = data.get("out"); + // Without PIG-2315, this failed with (0.1), (1.2), (2.3) + List<Tuple> expectedRes = + Util.getTuplesFromConstantTupleStrings( + new String[] {"(0)", "(1)", "(2)"}); + Util.checkQueryOutputsAfterSort(list, expectedRes); + } + + @Test + // See PIG-2315 + public void testAsType2() throws Exception { + Data data = Storage.resetData(ps); + data.set("input", tuple("a"), tuple("b"), tuple("c")); + + String query = + "A = load 'input' USING mock.Storage(); \n" + + "A2 = FOREACH A GENERATE 12345 as (a2:chararray); \n" + + "B = load 'input' USING mock.Storage(); \n" + + "B2 = FOREACH A GENERATE '12345' as (b2:chararray); \n" + + "C = union A2, B2;\n" + + "D = distinct C;\n" + + "store D into 'out' using mock.Storage;" ; + + Util.registerMultiLineQuery(ps, query); + List<Tuple> list = data.get("out"); + // Without PIG-2315, this produced TWO 12345. + // One by chararray and another by int. + List<Tuple> expectedRes = + Util.getTuplesFromConstantTupleStrings( + new String[] {"('12345')"}); + Util.checkQueryOutputsAfterSort(list, expectedRes); + } + + @Test + // See PIG-4933 + public void testAsWithByteArrayCast() throws Exception { + Data data = Storage.resetData(ps); + data.set("input_testAsWithByteArrayCast", "t1:(f1:bytearray, f2:bytearray), f3:chararray", + tuple(tuple(1,5), "a"), + tuple(tuple(2,4), "b"), + tuple(tuple(3,3), "c") ); + + String query = + "A = load 'input_testAsWithByteArrayCast' USING mock.Storage();\n" + + "B = FOREACH A GENERATE t1 as (t2:(newf1, newf2:float)), f3;" + + "store B into 'out' using mock.Storage;" ; + + // This will call typecast of (bytearray,float) on a tuple + // bytearray2bytearray should be no-op. + // Without pig-4933 patch on POCast, + // this typecast was producing empty results + + Util.registerMultiLineQuery(ps, query); + List<Tuple> list = data.get("out"); + String[] expectedRes = + new String[] {"((1,5.0),a)","((2,4.0),b)","((3,3.0),c)"}; + for( int i=0; i < list.size(); i++ ) { + Assert.assertEquals(expectedRes[i], list.get(i).toString()); + } + } }
Modified: pig/branches/spark/test/org/apache/pig/test/TestPruneColumn.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestPruneColumn.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestPruneColumn.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestPruneColumn.java Fri Feb 24 08:19:42 2017 @@ -1397,13 +1397,13 @@ public class TestPruneColumn { } @Test - public void testRelayFlattenMap() throws Exception { + public void testFlattenMapCantPruneKeys() throws Exception { pigServer.registerQuery("A = load '"+ Util.generateURI(tmpFile3.toString(), pigServer.getPigContext()) - + "' as (a0, a1:map[]);"); + + "' as (a0, a1:map[int]);"); pigServer.registerQuery("B = foreach A generate flatten(a1);"); - pigServer.registerQuery("C = foreach B generate a1#'key1';"); - + pigServer.registerQuery("B1 = filter B by a1::key == 'key1';"); + pigServer.registerQuery("C = foreach B1 generate a1::value;"); Iterator<Tuple> iter = pigServer.openIterator("C"); assertTrue(iter.hasNext()); @@ -1418,8 +1418,7 @@ public class TestPruneColumn { assertFalse(iter.hasNext()); - assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0", - "Map key required for A: $1->[key1]"})); + assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $0"})); } @Test Modified: pig/branches/spark/test/org/apache/pig/test/TestRegisterParser.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestRegisterParser.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestRegisterParser.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestRegisterParser.java Fri Feb 24 08:19:42 2017 @@ -30,6 +30,7 @@ import java.io.IOException; import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; +import java.util.Properties; import org.apache.pig.ExecType; import org.apache.pig.impl.PigContext; @@ -42,6 +43,9 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; +import org.apache.pig.impl.util.PropertiesUtil; +import org.apache.hadoop.fs.LocalFileSystem; + public class TestRegisterParser { private PigServer pigServer; @@ -49,7 +53,12 @@ public class TestRegisterParser { @Before public void setUp() throws Exception { - pigServer = new PigServer(ExecType.LOCAL); + Properties properties = PropertiesUtil.loadDefaultProperties(); + properties.setProperty("fs.s3.impl", LocalFileSystem.class.getName()); + properties.setProperty("fs.s3n.impl", LocalFileSystem.class.getName()); + properties.setProperty("fs.s3a.impl", LocalFileSystem.class.getName()); + + pigServer = new PigServer(ExecType.LOCAL, properties); // Generate test jar files for (int i = 1; i <= 5; i++) { @@ -107,6 +116,34 @@ public class TestRegisterParser { } } + @Test + public void testResolveForVariousFileSystemSchemes() throws URISyntaxException, IOException, ParserException { + URI[] list = new URI[6]; + list[0] = new URI("file://test.jar"); + list[1] = new URI("hdfs://test.jar"); + list[2] = new URI("s3://test.jar"); + list[3] = new URI("s3n://test.jar"); + list[4] = new URI("s3a://test.jar"); + list[5] = new URI("test.jar"); + + RegisterResolver registerResolver = new RegisterResolver(pigServer); + for (URI uri : list) { + URI[] resolvedUris = registerResolver.resolve(uri); + Assert.assertEquals(1, resolvedUris.length); + Assert.assertEquals(uri, resolvedUris[0]); + } + } + + @Test(expected = ParserException.class) + public void testResolveParseException() throws URISyntaxException, IOException, ParserException { + new RegisterResolver(pigServer).resolve(new URI("abc://test.jar")); + } + + @Test(expected = URISyntaxException.class) + public void testResolveURISyntaxException() throws URISyntaxException, IOException, ParserException { + new RegisterResolver(pigServer).resolve(new URI("123://test.jar")); + } + // Throw error when a scripting language and namespace is specified for a jar @Test(expected = ParserException.class) public void testRegisterJarException1() throws IOException, ParserException { Modified: pig/branches/spark/test/org/apache/pig/test/TestScriptUDF.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestScriptUDF.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestScriptUDF.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestScriptUDF.java Fri Feb 24 08:19:42 2017 @@ -247,7 +247,11 @@ public class TestScriptUDF{ Assert.assertTrue(t.get(0).toString().equals(System.getenv(input[0]))); Assert.assertTrue(iter.hasNext()); t = iter.next(); - Assert.assertTrue(t.get(0).toString().equals(System.getenv(input[1]))); + if (System.getenv(input[1]) != null) { // JAVA_HOME is set, t.get(0) is not null + Assert.assertTrue(t.get(0).toString().equals(System.getenv(input[1]))); + } else { // JAVA_HOME is not set, t.get(0) is null + Assert.assertNull(t.get(0)); + } Assert.assertFalse(iter.hasNext()); } Modified: pig/branches/spark/test/org/apache/pig/test/TestSkewedJoin.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestSkewedJoin.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestSkewedJoin.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestSkewedJoin.java Fri Feb 24 08:19:42 2017 @@ -65,6 +65,7 @@ public class TestSkewedJoin { private static final String INPUT_FILE5 = "SkewedJoinInput5.txt"; private static final String INPUT_FILE6 = "SkewedJoinInput6.txt"; private static final String INPUT_FILE7 = "SkewedJoinInput7.txt"; + private static final String INPUT_FILE8 = "SkewedJoinInput8.txt"; private static final String TEST_DIR = Util.getTestDirectory(TestSkewedJoin.class); private static final String INPUT_DIR = TEST_DIR + Path.SEPARATOR + "input"; private static final String OUTPUT_DIR = TEST_DIR + Path.SEPARATOR + "output"; @@ -173,6 +174,11 @@ public class TestSkewedJoin { } w7.close(); + //Empty file + PrintWriter w8 = new PrintWriter(new FileWriter(INPUT_DIR + "/" + INPUT_FILE8)); + w8.close(); + + Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE1, INPUT_FILE1); Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE2, INPUT_FILE2); Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE3, INPUT_FILE3); @@ -180,6 +186,7 @@ public class TestSkewedJoin { Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE5, INPUT_FILE5); Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE6, INPUT_FILE6); Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE7, INPUT_FILE7); + Util.copyFromLocalToCluster(cluster, INPUT_DIR + "/" + INPUT_FILE8, INPUT_FILE8); } private static void deleteFiles() throws IOException { @@ -187,6 +194,21 @@ public class TestSkewedJoin { } @Test + public void testSkewedJoinMapLeftEmpty() throws IOException{ + pigServer.registerQuery("A = LOAD '" + INPUT_FILE8 + "' as (idM:[]);"); + pigServer.registerQuery("B = LOAD '" + INPUT_FILE1 + "' as (id, name, n);"); + pigServer.registerQuery("C = join A by idM#'id', B by id using 'skewed' PARALLEL 2;"); + Iterator<Tuple> iter = pigServer.openIterator("C"); + int count = 0; + while(iter.hasNext()) { + count++; + iter.next(); + } + assertEquals(0, count); + } + + + @Test public void testSkewedJoinWithGroup() throws IOException{ pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' as (id, name, n);"); pigServer.registerQuery("B = LOAD '" + INPUT_FILE2 + "' as (id, name);"); Modified: pig/branches/spark/test/org/apache/pig/test/TestStreamingLocal.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestStreamingLocal.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestStreamingLocal.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestStreamingLocal.java Fri Feb 24 08:19:42 2017 @@ -18,6 +18,7 @@ package org.apache.pig.test; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; @@ -372,4 +373,41 @@ public class TestStreamingLocal { Util.checkQueryOutputs(pigServer.openIterator("OP"), expectedResults); } } + + @Test + // Perl script with a syntax error, See PIG-4976 + public void testNegativeScriptSyntaxError() throws IOException { + + for( int numinput : new int [] {10, 9999} ) { + String[] inputStrings = new String[numinput]; + for (int i=0;i<numinput;i++) { + inputStrings[i] = Integer.toString(i); + } + File input = Util.createInputFile("tmp", "", inputStrings); + // Perl script + String[] script = + new String[] { + "#!/usr/bin/perl", + "syntax error", + }; + File command1 = Util.createInputFile("script", "pl", script); + String query = + "define CMD `perl " + command1.getName() + "` output('foo')" + + "ship ('" + Util.encodeEscape(command1.toString()) + "');"; + boolean succeeded=true; + try { + pigServer.registerQuery( query ); + pigServer.registerQuery("A = load '" + + Util.generateURI(input.toString(), + pigServer.getPigContext()) + + "' using PigStorage();"); + pigServer.registerQuery("B = stream A through CMD;"); + pigServer.openIterator("B"); + } catch(Exception ex) { + succeeded=false; + } + Assert.assertFalse("Job with " + numinput + " lines input did not fail.", succeeded); + } + } + } Modified: pig/branches/spark/test/org/apache/pig/test/TestTypeCheckingValidatorNewLP.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestTypeCheckingValidatorNewLP.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestTypeCheckingValidatorNewLP.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestTypeCheckingValidatorNewLP.java Fri Feb 24 08:19:42 2017 @@ -39,8 +39,10 @@ import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Properties; import junit.framework.Assert; @@ -55,6 +57,8 @@ import org.apache.pig.builtin.PigStorage import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.DataType; +import org.apache.pig.data.DefaultTuple; +import org.apache.pig.data.NonSpillableDataBag; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.impl.PigContext; @@ -2908,12 +2912,12 @@ public class TestTypeCheckingValidatorNe @Test public void testUnionLineageDifferentSchemaFail() throws Throwable { - String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b') as (field4, field5, field6: chararray, field7 );" - + "c = union a , b;" + String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b') as (field4, field5, field6: chararray, field7 );\n" + + "c = union a , b;\n" + "d = foreach c generate $3 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } private void checkWarning(String query, String warnMsg) throws FrontendException { @@ -2955,12 +2959,12 @@ public class TestTypeCheckingValidatorNe public void testUnionLineageMixSchemaFail() throws Throwable { // different loader caster associated with each input, so can't determine // which one to use on union output - String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');" - + "c = union a , b;" + String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');\n" + + "c = union a , b;\n" + "d = foreach c generate $3 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } @Test @@ -3302,12 +3306,12 @@ public class TestTypeCheckingValidatorNe @Test public void testCrossLineageNoSchemaFail() throws Throwable { - String query = "a = load 'a' using PigStorage('a');" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');" - + "c = cross a , b;" + String query = "a = load 'a' using PigStorage('a');\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');\n" + + "c = cross a , b;\n" + "d = foreach c generate $1 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } @Test @@ -3323,12 +3327,12 @@ public class TestTypeCheckingValidatorNe @Test public void testCrossLineageMixSchemaFail() throws Throwable { - String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');" - + "c = cross a , b;" + String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster('b');\n" + + "c = cross a , b;\n" + "d = foreach c generate $3 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } @Test @@ -3357,12 +3361,12 @@ public class TestTypeCheckingValidatorNe public void testJoinLineageNoSchemaFail() throws Throwable { //this test case should change when we decide on what flattening a tuple or bag //with null schema results in a foreach flatten and hence a join - String query = "a = load 'a' using PigStorage('a');" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster();" - + "c = join a by $0, b by $0;" + String query = "a = load 'a' using PigStorage('a');\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster();\n" + + "c = join a by $0, b by $0;\n" + "d = foreach c generate $1 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } @Test @@ -3378,12 +3382,12 @@ public class TestTypeCheckingValidatorNe public void testJoinLineageMixSchemaFail() throws Throwable { //this test case should change when we decide on what flattening a tuple or bag //with null schema results in a foreach flatten and hence a join - String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );" - + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster();" - + "c = join a by field1, b by $0;" + String query = "a = load 'a' using PigStorage('a') as (field1, field2: float, field3: chararray );\n" + + "b = load 'a' using org.apache.pig.test.PigStorageWithDifferentCaster();\n" + + "c = join a by field1, b by $0;\n" + "d = foreach c generate $3 + 2.0 ;"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to double at <line 4,"); } @Test @@ -3867,12 +3871,12 @@ public class TestTypeCheckingValidatorNe */ @Test public void testLineageMultipleLoader3() throws FrontendException { - String query = "A = LOAD 'data1' USING PigStorage() AS (u, v, w);" - + "B = LOAD 'data2' USING TextLoader() AS (x, y);" - + "C = COGROUP A BY u, B by x;" - + "D = FOREACH C GENERATE (chararray)group;"; + String query = "A = LOAD 'data1' USING PigStorage() AS (u, v, w);\n" + + "B = LOAD 'data2' USING TextLoader() AS (x, y);\n" + + "C = COGROUP A BY u, B by x;\n" + + "D = FOREACH C GENERATE (chararray)group;\n"; - checkWarning(query, CAST_LOAD_NOT_FOUND); + checkWarning(query, CAST_LOAD_NOT_FOUND + " to chararray at <line 4,"); } /** @@ -4063,12 +4067,12 @@ public class TestTypeCheckingValidatorNe @Test public void testUDFNoInnerSchema() throws FrontendException { - String query = "a= load '1.txt';" + String query = "a= load '1.txt' using PigStorage(':') ;" + "b = foreach a generate "+TestUDFTupleNullInnerSchema.class.getName()+"($0);" + "c = foreach b generate flatten($0);" + "d = foreach c generate $0 + 1;"; - checkLastForeachCastLoadFunc(query, null, 0); + checkLastForeachCastLoadFunc(query, "PigStorage(':')"); } //see PIG-1990 @@ -4118,4 +4122,56 @@ public class TestTypeCheckingValidatorNe " corresponding column in earlier relation(s) in the statement"; Util.checkExceptionMessage(query, "c", msg); } + //see PIG-4734 + public static class GenericToMap extends EvalFunc<Map<String, Double>> { + @Override + public Map exec(Tuple input) throws IOException { + Map<String, Double> output = new HashMap<String, Double>(); + output.put((String)input.get(0), (Double)input.get(1)); + return output; + } + } + @Test + public void testBinCondCompatMap() throws Exception { + String query = + "a = load 'studenttab10k' as (name:chararray, gpa:double);" + + "b = foreach a generate gpa, TOMAP(name, gpa) as m1, " + + GenericToMap.class.getName() + "(name, gpa) as m2;" + + "c = foreach b generate (gpa>3? m1 : m2);"; + createAndProcessLPlan(query); + } + public static class GenericToTuple extends EvalFunc<Tuple> { + @Override + public Tuple exec(Tuple input) throws IOException { + return input; + } + } + @Test + public void testBinCondCompatTuple() throws Exception { + String query = + "a = load 'studenttab10k' as (name:chararray, gpa:double);" + + "b = foreach a generate gpa, TOTUPLE(name, gpa) as t1, " + + GenericToTuple.class.getName() + "(name, gpa) as t2;" + + "c = foreach b generate (gpa>3? t1 : t2);"; + createAndProcessLPlan(query); + } + public static class GenericToBag extends EvalFunc<DataBag> { + @Override + public DataBag exec(Tuple input) throws IOException { + DataBag bag = new NonSpillableDataBag(1); + Tuple t = new DefaultTuple(); + t.append(input.get(0)); + bag.add(t); + return bag; + } + } + @Test + public void testBinCondCompatBag() throws Exception { + String query = + "a = load 'studenttab10k' as (name:chararray, gpa:double);" + + "b = foreach a generate gpa, TOBAG(name) as b1, " + + GenericToBag.class.getName() + "(name) as b2;" + + "c = foreach b generate (gpa>3? b1 : b2);"; + createAndProcessLPlan(query); + } } Modified: pig/branches/spark/test/org/apache/pig/test/TestUnionOnSchema.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestUnionOnSchema.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TestUnionOnSchema.java (original) +++ pig/branches/spark/test/org/apache/pig/test/TestUnionOnSchema.java Fri Feb 24 08:19:42 2017 @@ -96,8 +96,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA on two inputs with same schema - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaSameSchema() throws Exception { @@ -128,8 +126,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with operations after the union - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaFilter() throws Exception { @@ -161,8 +157,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with operations after the union - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaSuccOps() throws Exception { @@ -194,8 +188,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with cast from bytearray to another type - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaCastOnByteArray() throws Exception { @@ -223,8 +215,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in one of the inputs - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnName() throws Exception { @@ -266,8 +256,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in both the inputs - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnNameBothInp1() throws Exception { @@ -302,8 +290,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in both the inputs - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnNameBothInp2() throws Exception { @@ -340,8 +326,6 @@ public class TestUnionOnSchema { * Test UNION ONSCHEMA where a common column has additional 'namespace' part * in the column name in one of the inputs. * Negative test case - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaScopedColumnNameNeg() throws Exception { @@ -366,8 +350,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA on two inputs with same column names, but different * numeric types - test type promotion - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaDiffNumType() throws Exception { @@ -396,8 +378,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA on two inputs with no common columns - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaNoCommonCols() throws Exception { @@ -424,8 +404,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA on two inputs , one input with additional columns - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaAdditionalColumn() throws Exception { @@ -498,8 +476,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA on 3 inputs - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchema3Inputs() throws Exception { @@ -533,8 +509,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with bytearray type - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaByteArrayConversions() throws Exception { @@ -572,8 +546,6 @@ public class TestUnionOnSchema { /** * negative test - test error on no schema - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaNoSchema() throws Exception { @@ -597,8 +569,6 @@ public class TestUnionOnSchema { /** * negative test - test error on null alias in one of the FieldSchema - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaNullAliasInFieldSchema() throws Exception { @@ -640,8 +610,6 @@ public class TestUnionOnSchema { /** * test union with incompatible types in schema - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaIncompatibleTypes() throws Exception { @@ -650,7 +618,15 @@ public class TestUnionOnSchema { + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);" + "u = union onschema l1, l2;"; - checkSchemaEquals(query, "x : long, y : bytearray"); + checkSchemaEx(query, "Cannot cast from chararray to bytearray"); + + //without "onschema" + query = + " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : chararray);" + + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);" + + "u = union l1, l2;"; + + checkSchemaEx(query, "Cannot cast from chararray to bytearray"); @@ -659,8 +635,15 @@ public class TestUnionOnSchema { + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : chararray);" + "u = union onschema l1, l2;" ; - checkSchemaEquals(query, "x : bytearray, y : chararray"); + checkSchemaEx(query, "Cannot cast from long to bytearray"); + query = + " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : chararray);" + + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : chararray);" + + "u = union l1, l2;" + ; + checkSchemaEx(query, "Cannot cast from long to bytearray"); + // bag column with different internal column types query = " l1 = load '" + INP_FILE_2NUMS @@ -708,8 +691,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with input relation having udfs - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaInputUdfs() throws Exception { @@ -745,8 +726,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with udf whose default type is different from * final type - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaUdfTypeEvolution() throws Exception { @@ -797,8 +776,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with udf whose default type is different from * final type - where udf is not in immediate input of union - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaUdfTypeEvolution2() throws Exception { @@ -869,8 +846,6 @@ public class TestUnionOnSchema { /** * Test UNION ONSCHEMA with input relation having column names with multiple * level of namespace in their names - * @throws IOException - * @throws ParserException */ @Test public void testUnionOnSchemaScopeMulti() throws Exception { @@ -916,8 +891,6 @@ public class TestUnionOnSchema { /** * Test query with a union-onschema having another as input - * @throws IOException - * @throws ParserException */ @Test public void testTwoUnions() throws Exception { Added: pig/branches/spark/test/org/apache/pig/test/TezMiniCluster.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TezMiniCluster.java?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/TezMiniCluster.java (added) +++ pig/branches/spark/test/org/apache/pig/test/TezMiniCluster.java Fri Feb 24 08:19:42 2017 @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.test; + +import java.io.File; +import java.io.FileFilter; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Map.Entry; + +import org.apache.commons.io.filefilter.RegexFileFilter; +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.pig.ExecType; +import org.apache.pig.PigConfiguration; +import org.apache.pig.backend.hadoop.executionengine.Launcher; +import org.apache.pig.backend.hadoop.executionengine.tez.TezExecType; +import org.apache.pig.backend.hadoop.executionengine.tez.TezLauncher; +import org.apache.pig.backend.hadoop.executionengine.tez.TezSessionManager; +import org.apache.tez.dag.api.TezConfiguration; +import org.apache.tez.mapreduce.hadoop.MRJobConfig; +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration; + +public class TezMiniCluster extends MiniGenericCluster { + private static final File CONF_DIR = new File("build/classes"); + private static final File TEZ_LIB_DIR = new File("build/ivy/lib/Pig"); + private static final File TEZ_CONF_FILE = new File(CONF_DIR, "tez-site.xml"); + private static final File CORE_CONF_FILE = new File(CONF_DIR, "core-site.xml"); + private static final File HDFS_CONF_FILE = new File(CONF_DIR, "hdfs-site.xml"); + private static final File MAPRED_CONF_FILE = new File(CONF_DIR, "mapred-site.xml"); + private static final File YARN_CONF_FILE = new File(CONF_DIR, "yarn-site.xml"); + private static final ExecType TEZ = new TezExecType(); + + protected MiniMRYarnCluster m_mr = null; + private Configuration m_dfs_conf = null; + private Configuration m_mr_conf = null; + + @Override + public ExecType getExecType() { + return TEZ; + } + + @Override + public void setupMiniDfsAndMrClusters() { + try { + deleteConfFiles(); + CONF_DIR.mkdirs(); + + // Build mini DFS cluster + Configuration hdfsConf = new Configuration(); + m_dfs = new MiniDFSCluster.Builder(hdfsConf) + .numDataNodes(2) + .format(true) + .racks(null) + .build(); + m_fileSys = m_dfs.getFileSystem(); + m_dfs_conf = m_dfs.getConfiguration(0); + //Create user home directory + m_fileSys.mkdirs(m_fileSys.getWorkingDirectory()); + + // Write core-site.xml + Configuration core_site = new Configuration(false); + core_site.set(FileSystem.FS_DEFAULT_NAME_KEY, m_dfs_conf.get(FileSystem.FS_DEFAULT_NAME_KEY)); + core_site.writeXml(new FileOutputStream(CORE_CONF_FILE)); + + Configuration hdfs_site = new Configuration(false); + for (Entry<String, String> conf : m_dfs_conf) { + if (ArrayUtils.contains(m_dfs_conf.getPropertySources(conf.getKey()), "programatically")) { + hdfs_site.set(conf.getKey(), m_dfs_conf.getRaw(conf.getKey())); + } + } + hdfs_site.writeXml(new FileOutputStream(HDFS_CONF_FILE)); + + // Build mini YARN cluster + m_mr = new MiniMRYarnCluster("PigMiniCluster", 2); + m_mr.init(m_dfs_conf); + m_mr.start(); + m_mr_conf = m_mr.getConfig(); + File libDir = new File(System.getProperty("ivy.lib.dir", "build/ivy/lib/Pig")); + File classesDir = new File(System.getProperty("build.classes", "build/classes")); + File testClassesDir = new File(System.getProperty("test.build.classes", "test/build/classes")); + String classpath = libDir.getAbsolutePath() + "/*" + + File.pathSeparator + classesDir.getAbsolutePath() + + File.pathSeparator + testClassesDir.getAbsolutePath(); + m_mr_conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, classpath); + m_mr_conf.set(MRJobConfig.MAP_JAVA_OPTS, "-Xmx512m"); + m_mr_conf.set(MRJobConfig.REDUCE_JAVA_OPTS, "-Xmx512m"); + + Configuration mapred_site = new Configuration(false); + Configuration yarn_site = new Configuration(false); + for (Entry<String, String> conf : m_mr_conf) { + if (ArrayUtils.contains(m_mr_conf.getPropertySources(conf.getKey()), "programatically")) { + if (conf.getKey().contains("yarn")) { + yarn_site.set(conf.getKey(), m_mr_conf.getRaw(conf.getKey())); + } else if (!conf.getKey().startsWith("dfs")){ + mapred_site.set(conf.getKey(), m_mr_conf.getRaw(conf.getKey())); + } + } + } + + mapred_site.writeXml(new FileOutputStream(MAPRED_CONF_FILE)); + yarn_site.writeXml(new FileOutputStream(YARN_CONF_FILE)); + + // Write tez-site.xml + Configuration tez_conf = new Configuration(false); + // TODO PIG-3659 - Remove this once memory management is fixed + tez_conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, "20"); + tez_conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, "false"); + tez_conf.set("tez.lib.uris", "hdfs:///tez,hdfs:///tez/lib"); + // Set to a lower value so that tests don't get stuck for long because of 1 AM running at a time + tez_conf.set(TezConfiguration.TEZ_SESSION_AM_DAG_SUBMIT_TIMEOUT_SECS, "20"); + // Lower the max task attempts to 2 so that negative tests fail + // faster. By default, tasks retry 4 times + tez_conf.set(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, "2"); + tez_conf.writeXml(new FileOutputStream(TEZ_CONF_FILE)); + + // Copy tez jars to hdfs + m_fileSys.mkdirs(new Path("/tez/lib")); + FileFilter fileFilter = new RegexFileFilter("tez-.+\\.jar$"); + File[] tezJars = TEZ_LIB_DIR.listFiles(fileFilter); + for (int i = 0; i < tezJars.length; i++) { + if (tezJars[i].getName().startsWith("tez-api")) { + m_fileSys.copyFromLocalFile( + new Path(tezJars[i].getAbsoluteFile().toString()), + new Path("/tez")); + } else { + m_fileSys.copyFromLocalFile( + new Path(tezJars[i].getAbsoluteFile().toString()), + new Path("/tez/lib")); + } + } + + m_conf = m_mr_conf; + // Turn FetchOptimizer off so that we can actually test Tez + m_conf.set(PigConfiguration.PIG_OPT_FETCH, System.getProperty("test.opt.fetch", "false")); + + System.setProperty("junit.hadoop.conf", CONF_DIR.getPath()); + System.setProperty("hadoop.log.dir", "build/test/logs"); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + protected void shutdownMiniDfsAndMrClusters() { + TezSessionManager.shutdown(); + super.shutdownMiniDfsAndMrClusters(); + } + + @Override + protected void shutdownMiniMrClusters() { + deleteConfFiles(); + if (m_mr != null) { + m_mr.stop(); + m_mr = null; + } + } + + private void deleteConfFiles() { + if(TEZ_CONF_FILE.exists()) { + TEZ_CONF_FILE.delete(); + } + if(CORE_CONF_FILE.exists()) { + CORE_CONF_FILE.delete(); + } + if(HDFS_CONF_FILE.exists()) { + HDFS_CONF_FILE.delete(); + } + if(MAPRED_CONF_FILE.exists()) { + MAPRED_CONF_FILE.delete(); + } + if(YARN_CONF_FILE.exists()) { + YARN_CONF_FILE.delete(); + } + } + + static public Launcher getLauncher() { + return new TezLauncher(); + } +} Modified: pig/branches/spark/test/org/apache/pig/test/Util.java URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/Util.java?rev=1784237&r1=1784236&r2=1784237&view=diff ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/Util.java (original) +++ pig/branches/spark/test/org/apache/pig/test/Util.java Fri Feb 24 08:19:42 2017 @@ -480,6 +480,19 @@ public class Util { fs.delete(new Path(fileName), true); } + /** + * Deletes a dfs file from the MiniCluster DFS quietly + * + * @param miniCluster the MiniCluster where the file should be deleted + * @param fileName the path of the file to be deleted + */ + public static void deleteQuietly(MiniGenericCluster miniCluster, String fileName) { + try { + deleteFile(miniCluster, fileName); + } catch (IOException ignored) { + } + } + static public void deleteFile(PigContext pigContext, String fileName) throws IOException { Configuration conf = ConfigurationUtil.toConfiguration( @@ -658,13 +671,10 @@ public class Util { } } - static private String getMkDirCommandForHadoop2_0(String fileName) { - if (org.apache.pig.impl.util.Utils.isHadoop23() || org.apache.pig.impl.util.Utils.isHadoop2()) { - Path parentDir = new Path(fileName).getParent(); - String mkdirCommand = parentDir.getName().isEmpty() ? "" : "fs -mkdir -p " + parentDir + "\n"; - return mkdirCommand; - } - return ""; + static private String getFSMkDirCommand(String fileName) { + Path parentDir = new Path(fileName).getParent(); + String mkdirCommand = parentDir.getName().isEmpty() ? "" : "fs -mkdir -p " + parentDir + "\n"; + return mkdirCommand; } /** @@ -686,7 +696,7 @@ public class Util { fileNameOnCluster = fileNameOnCluster.replace('\\','/'); } PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties()); - String script = getMkDirCommandForHadoop2_0(fileNameOnCluster) + "fs -put " + localFileName + " " + fileNameOnCluster; + String script = getFSMkDirCommand(fileNameOnCluster) + "fs -put " + localFileName + " " + fileNameOnCluster; GruntParser parser = new GruntParser(new StringReader(script), ps); parser.setInteractive(false); try { @@ -847,7 +857,23 @@ public class Util { } public static File createFile(String[] data) throws Exception{ - File f = File.createTempFile("tmp", ""); + return createFile(null,data); + } + + public static File createFile(String filePath, String[] data) throws Exception { + File f; + if( null == filePath || filePath.isEmpty() ) { + f = File.createTempFile("tmp", ""); + } else { + f = new File(filePath); + } + + if (f.getParent() != null && !(new File(f.getParent())).exists()) { + (new File(f.getParent())).mkdirs(); + } + + f.deleteOnExit(); + PrintWriter pw = new PrintWriter(f); for (int i=0; i<data.length; i++){ pw.println(data[i]); @@ -918,14 +944,7 @@ public class Util { MapRedUtil.checkLeafIsStore(pp, pc); MapReduceLauncher launcher = new MapReduceLauncher(); - - java.lang.reflect.Method compile = launcher.getClass() - .getDeclaredMethod("compile", - new Class[] { PhysicalPlan.class, PigContext.class }); - - compile.setAccessible(true); - - return (MROperPlan) compile.invoke(launcher, new Object[] { pp, pc }); + return launcher.compile(pp,pc); } public static MROperPlan buildMRPlan(String query, PigContext pc) throws Exception { Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1-KeyToReducer.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,91 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50, +Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47, +Tez vertex scope-46 -> Tez vertex scope-49, +Tez vertex scope-47 -> Tez vertex scope-49, +Tez vertex scope-49 + +Tez vertex scope-48 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50] +| | +| Project[bytearray][0] - scope-31 +| +|---c: New For Each(false,false)[bag] - scope-20 + | | + | Project[bytearray][0] - scope-15 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][1] - scope-17 + | + |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14 +Tez vertex scope-50 +# Combine plan on edge <scope-48> +Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50 +| | +| Project[int][0] - scope-54 +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +# Plan on vertex +POValueOutputTez - scope-52 -> [scope-46, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-51 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-27 +| +|---b: New For Each(false,false)[bag] - scope-6 + | | + | Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-4 + | | + | |---Project[bytearray][1] - scope-3 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-47 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-29 +| +|---a: New For Each(false,false)[bag] - scope-13 + | | + | Project[bytearray][0] - scope-8 + | | + | Cast[int] - scope-11 + | | + | |---Project[bytearray][1] - scope-10 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7 +Tez vertex scope-49 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45 +| +|---e: New For Each(false,false,false,false)[bag] - scope-44 + | | + | Project[bytearray][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | | + | Project[int][5] - scope-42 + | + |---d: New For Each(true,true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-32 + | | + | Project[bag][2] - scope-33 + | | + | Project[bag][3] - scope-34 + | + |---d: Package(Packager)[tuple]{bytearray} - scope-25 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-1.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,91 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-48 -> Tez vertex scope-49,Tez vertex scope-50, +Tez vertex scope-50 -> Tez vertex scope-46,Tez vertex scope-47, +Tez vertex scope-46 -> Tez vertex scope-49, +Tez vertex scope-47 -> Tez vertex scope-49, +Tez vertex scope-49 + +Tez vertex scope-48 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{bytearray}(false) - scope-30 -> [ scope-49, scope-50] +| | +| Project[bytearray][0] - scope-31 +| +|---c: New For Each(false,false)[bag] - scope-20 + | | + | Project[bytearray][0] - scope-15 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][1] - scope-17 + | + |---c: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-14 +Tez vertex scope-50 +# Combine plan on edge <scope-48> +Local Rearrange[tuple]{int}(false) - scope-55 -> scope-50 +| | +| Project[int][0] - scope-54 +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +# Plan on vertex +POValueOutputTez - scope-52 -> [scope-46, scope-47] +| +|---Package(BloomPackager)[tuple]{int} - scope-51 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-26 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-27 +| +|---b: New For Each(false,false)[bag] - scope-6 + | | + | Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-4 + | | + | |---Project[bytearray][1] - scope-3 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-47 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{bytearray}(false) - scope-28 <- scope-50 -> scope-49 +| | +| Project[bytearray][0] - scope-29 +| +|---a: New For Each(false,false)[bag] - scope-13 + | | + | Project[bytearray][0] - scope-8 + | | + | Cast[int] - scope-11 + | | + | |---Project[bytearray][1] - scope-10 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-7 +Tez vertex scope-49 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-45 +| +|---e: New For Each(false,false,false,false)[bag] - scope-44 + | | + | Project[bytearray][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | | + | Project[int][5] - scope-42 + | + |---d: New For Each(true,true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-32 + | | + | Project[bag][2] - scope-33 + | | + | Project[bag][3] - scope-34 + | + |---d: Package(Packager)[tuple]{bytearray} - scope-25 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2-KeyToReducer.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,83 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42, +Tez vertex scope-42 -> Tez vertex scope-40, +Tez vertex scope-40 -> Tez vertex scope-41, +Tez vertex scope-41 + +Tez vertex scope-39 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42] +| | +| Project[chararray][0] - scope-21 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[chararray] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-42 +# Combine plan on edge <scope-39> +Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42 +| | +| Project[int][0] - scope-46 +| +|---Package(BloomPackager)[tuple]{int} - scope-45 +# Plan on vertex +POValueOutputTez - scope-44 -> [scope-40] +| +|---Package(BloomPackager)[tuple]{int} - scope-43 +Tez vertex scope-40 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41 +| | +| Project[chararray][0] - scope-23 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[chararray] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-41 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38 +| +|---e: New For Each(false,false,false)[bag] - scope-37 + | | + | Project[chararray][0] - scope-31 + | | + | Project[int][1] - scope-33 + | | + | Project[int][3] - scope-35 + | + |---d: New For Each(true,true)[tuple] - scope-30 + | | + | Project[bag][1] - scope-24 + | | + | POBinCond[bag] - scope-29 + | | + | |---Project[bag][2] - scope-25 + | | + | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27 + | | | + | | |---Project[bag][2] - scope-26 + | | + | |---Constant({(,)}) - scope-28 + | + |---d: Package(Packager)[tuple]{chararray} - scope-19 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-2.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,83 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-39 -> Tez vertex scope-41,Tez vertex scope-42, +Tez vertex scope-42 -> Tez vertex scope-40, +Tez vertex scope-40 -> Tez vertex scope-41, +Tez vertex scope-41 + +Tez vertex scope-39 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{chararray}(false) - scope-20 -> [ scope-41, scope-42] +| | +| Project[chararray][0] - scope-21 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[chararray] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-42 +# Combine plan on edge <scope-39> +Local Rearrange[tuple]{int}(false) - scope-47 -> scope-42 +| | +| Project[int][0] - scope-46 +| +|---Package(BloomPackager)[tuple]{int} - scope-45 +# Plan on vertex +POValueOutputTez - scope-44 -> [scope-40] +| +|---Package(BloomPackager)[tuple]{int} - scope-43 +Tez vertex scope-40 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{chararray}(false) - scope-22 <- scope-42 -> scope-41 +| | +| Project[chararray][0] - scope-23 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[chararray] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-41 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-38 +| +|---e: New For Each(false,false,false)[bag] - scope-37 + | | + | Project[chararray][0] - scope-31 + | | + | Project[int][1] - scope-33 + | | + | Project[int][3] - scope-35 + | + |---d: New For Each(true,true)[tuple] - scope-30 + | | + | Project[bag][1] - scope-24 + | | + | POBinCond[bag] - scope-29 + | | + | |---Project[bag][2] - scope-25 + | | + | |---POUserFunc(org.apache.pig.builtin.IsEmpty)[boolean] - scope-27 + | | | + | | |---Project[bag][2] - scope-26 + | | + | |---Constant({(,)}) - scope-28 + | + |---d: Package(Packager)[tuple]{chararray} - scope-19 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3-KeyToReducer.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,105 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex group scope-59 -> Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-44, +Tez vertex scope-44 -> Tez vertex scope-51, +Tez vertex group scope-58 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-45 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-61 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-46 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-63 +| +|---c: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52 +# No plan on vertex group +Tez vertex scope-52 +# Combine plan on edge <scope-45> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Combine plan on edge <scope-46> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-44] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-44 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51 +# No plan on vertex group +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][0] - scope-36 + | | + | Project[int][1] - scope-38 + | | + | Project[int][3] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-3.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,105 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-45 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex scope-46 -> Tez vertex group scope-58,Tez vertex group scope-59, +Tez vertex group scope-59 -> Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-44, +Tez vertex scope-44 -> Tez vertex scope-51, +Tez vertex group scope-58 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-45 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-60 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-61 +| +|---b: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-46 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-62 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-63 +| +|---c: New For Each(false,false)[bag] - scope-23 + | | + | Cast[int] - scope-18 + | | + | |---Project[bytearray][0] - scope-17 + | | + | Cast[int] - scope-21 + | | + | |---Project[bytearray][1] - scope-20 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-16 +Tez vertex group scope-59 <- [scope-45, scope-46] -> scope-52 +# No plan on vertex group +Tez vertex scope-52 +# Combine plan on edge <scope-45> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Combine plan on edge <scope-46> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-44] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-44 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex group scope-58 <- [scope-45, scope-46] -> scope-51 +# No plan on vertex group +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][0] - scope-36 + | | + | Project[int][1] - scope-38 + | | + | Project[int][3] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4-KeyToReducer.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,97 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-44 -> Tez vertex scope-46, +Tez vertex scope-45 -> Tez vertex scope-46, +Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-46, +Tez vertex scope-46 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-44 +# Plan on vertex +POValueOutputTez - scope-48 -> [scope-46] +| +|---b: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-45 +# Plan on vertex +POValueOutputTez - scope-49 -> [scope-46] +| +|---c: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-50 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-32 +| +|---a: New For Each(false,false)[bag] - scope-24 + | | + | Cast[int] - scope-19 + | | + | |---Project[bytearray][0] - scope-18 + | | + | Cast[int] - scope-22 + | | + | |---Project[bytearray][1] - scope-21 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17 +Tez vertex scope-52 +# Combine plan on edge <scope-50> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-46] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45] +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-4.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,97 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-44 -> Tez vertex scope-46, +Tez vertex scope-45 -> Tez vertex scope-46, +Tez vertex scope-50 -> Tez vertex scope-51,Tez vertex scope-52, +Tez vertex scope-52 -> Tez vertex scope-46, +Tez vertex scope-46 -> Tez vertex scope-51, +Tez vertex scope-51 + +Tez vertex scope-44 +# Plan on vertex +POValueOutputTez - scope-48 -> [scope-46] +| +|---b: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-45 +# Plan on vertex +POValueOutputTez - scope-49 -> [scope-46] +| +|---c: New For Each(false,false)[bag] - scope-15 + | | + | Cast[int] - scope-10 + | | + | |---Project[bytearray][0] - scope-9 + | | + | Cast[int] - scope-13 + | | + | |---Project[bytearray][1] - scope-12 + | + |---c: Load(file:///tmp/input3:org.apache.pig.builtin.PigStorage) - scope-8 +Tez vertex scope-50 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-31 -> [ scope-51, scope-52] +| | +| Project[int][0] - scope-32 +| +|---a: New For Each(false,false)[bag] - scope-24 + | | + | Cast[int] - scope-19 + | | + | |---Project[bytearray][0] - scope-18 + | | + | Cast[int] - scope-22 + | | + | |---Project[bytearray][1] - scope-21 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-17 +Tez vertex scope-52 +# Combine plan on edge <scope-50> +Local Rearrange[tuple]{int}(false) - scope-57 -> scope-52 +| | +| Project[int][0] - scope-56 +| +|---Package(BloomPackager)[tuple]{int} - scope-55 +# Plan on vertex +POValueOutputTez - scope-54 -> [scope-46] +| +|---Package(BloomPackager)[tuple]{int} - scope-53 +Tez vertex scope-46 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-29 <- scope-52 -> scope-51 +| | +| Project[int][0] - scope-30 +| +|---POShuffledValueInputTez - scope-47 <- [scope-44, scope-45] +Tez vertex scope-51 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-43 +| +|---e: New For Each(false,false,false)[bag] - scope-42 + | | + | Project[int][2] - scope-36 + | | + | Project[int][3] - scope-38 + | | + | Project[int][1] - scope-40 + | + |---d: New For Each(true,true)[tuple] - scope-35 + | | + | Project[bag][1] - scope-33 + | | + | Project[bag][2] - scope-34 + | + |---d: Package(Packager)[tuple]{int} - scope-28 Added: pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld URL: http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld?rev=1784237&view=auto ============================================================================== --- pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld (added) +++ pig/branches/spark/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-BloomJoin-5-KeyToReducer.gld Fri Feb 24 08:19:42 2017 @@ -0,0 +1,107 @@ +#-------------------------------------------------- +# There are 1 DAGs in the session +#-------------------------------------------------- +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-60 -> Tez vertex scope-61,Tez vertex scope-62, +Tez vertex scope-62 -> Tez vertex scope-54,Tez vertex scope-58, +Tez vertex scope-54 -> Tez vertex scope-58,Tez vertex scope-61, +Tez vertex scope-58 -> Tez vertex scope-61, +Tez vertex scope-61 + +Tez vertex scope-60 +# Plan on vertex +d: BuildBloom Rearrange[tuple]{int}(false) - scope-38 -> [ scope-61, scope-62] +| | +| Project[int][0] - scope-39 +| +|---b: New For Each(false,false)[bag] - scope-28 + | | + | Cast[int] - scope-23 + | | + | |---Project[bytearray][0] - scope-22 + | | + | Cast[int] - scope-26 + | | + | |---Project[bytearray][1] - scope-25 + | + |---b: Load(file:///tmp/input2:org.apache.pig.builtin.PigStorage) - scope-21 +Tez vertex scope-62 +# Combine plan on edge <scope-60> +Local Rearrange[tuple]{int}(false) - scope-67 -> scope-62 +| | +| Project[int][0] - scope-66 +| +|---Package(BloomPackager)[tuple]{int} - scope-65 +# Plan on vertex +POValueOutputTez - scope-64 -> [scope-54, scope-58] +| +|---Package(BloomPackager)[tuple]{int} - scope-63 +Tez vertex scope-54 +# Plan on vertex +a: Split - scope-68 +| | +| d: BloomFilter Rearrange[tuple]{int}(false) - scope-34 <- scope-62 -> scope-61 +| | | +| | Project[int][0] - scope-35 +| | +| |---a1: Filter[bag] - scope-11 +| | | +| | Equal To[boolean] - scope-14 +| | | +| | |---Project[int][0] - scope-12 +| | | +| | |---Constant(3) - scope-13 +| | +| POValueOutputTez - scope-55 -> [scope-58] +| +|---a: New For Each(false,false)[bag] - scope-7 + | | + | Cast[int] - scope-2 + | | + | |---Project[bytearray][0] - scope-1 + | | + | Cast[int] - scope-5 + | | + | |---Project[bytearray][1] - scope-4 + | + |---a: Load(file:///tmp/input1:org.apache.pig.builtin.PigStorage) - scope-0 +Tez vertex scope-58 +# Plan on vertex +d: BloomFilter Rearrange[tuple]{int}(false) - scope-36 <- scope-62 -> scope-61 +| | +| Project[int][0] - scope-37 +| +|---a2: Filter[bag] - scope-17 + | | + | Equal To[boolean] - scope-20 + | | + | |---Project[int][0] - scope-18 + | | + | |---Constant(4) - scope-19 + | + |---POValueInputTez - scope-59 <- scope-54 +Tez vertex scope-61 +# Plan on vertex +e: Store(file:///tmp/pigoutput:org.apache.pig.builtin.PigStorage) - scope-53 +| +|---e: New For Each(false,false,false,false)[bag] - scope-52 + | | + | Project[int][0] - scope-44 + | | + | Project[int][1] - scope-46 + | | + | Project[int][3] - scope-48 + | | + | Project[int][5] - scope-50 + | + |---d: New For Each(true,true,true)[tuple] - scope-43 + | | + | Project[bag][1] - scope-40 + | | + | Project[bag][2] - scope-41 + | | + | Project[bag][3] - scope-42 + | + |---d: Package(Packager)[tuple]{int} - scope-33