[ 
https://issues.apache.org/jira/browse/PIG-4276?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14504032#comment-14504032
 ] 

liyunzhang_intel commented on PIG-4276:
---------------------------------------

[~mohitsabharwal]:
In other places, you introduces an ORDER BY command to sort the GROUP BY or 
DISTINCT output. I think it is not very good to change the original script. I 
think following is better(you can also ask rohini or other pig committers' 
suggestions):
{code}
 @Test
    public void testNestedPlan() throws Exception{
        int LOOP_COUNT = 10;
        File tmpFile = Util.createTempFileDelOnExit("test", "txt");
        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
        for(int i = 0; i < LOOP_COUNT; i++) {
            for(int j=0;j<LOOP_COUNT;j+=2){
                ps.println(i+"\t"+j);
                ps.println(i+"\t"+j);
            }
        }
        ps.close();

        pigServer.registerQuery("A = LOAD '"
                + Util.generateURI(tmpFile.toString(), pigContext) + "';");
        pigServer.registerQuery("B = group A by $0;");
        String query = "C = foreach B {"
        + "C1 = filter A by $0 > -1;"
        + "C2 = distinct C1;"
        + "C3 = distinct A;"
        + "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2), 
SUM(C2.$1)," +  TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
        + "};";

        pigServer.registerQuery(query);
        Iterator<Tuple> iter = pigServer.openIterator("C");
        if(!iter.hasNext()) Assert.fail("No output found");
        int numIdentity = 0;
        while(iter.hasNext()){
            Tuple t = iter.next();
            Assert.assertEquals((Integer)numIdentity, (Integer)t.get(0));
            Assert.assertEquals((Long)5L, (Long)t.get(2));
            Assert.assertEquals(LOOP_COUNT*2.0, (Double)t.get(3), 0.01);
            Assert.assertEquals(8.0, (Double)t.get(5), 0.01);
            Assert.assertEquals(5L, ((DataBag)t.get(6)).size());
            Assert.assertEquals(7, t.size());
            ++numIdentity;
        }
        Assert.assertEquals(LOOP_COUNT, numIdentity);
    }
{code}

can be
{code}
@Test
    public void testNestedPlan() throws Exception{
        int LOOP_COUNT = 10;
        File tmpFile = Util.createTempFileDelOnExit("test", "txt");
        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
        for(int i = 0; i < LOOP_COUNT; i++) {
            for(int j=0;j<LOOP_COUNT;j+=2){
                ps.println(i+"\t"+j);
                ps.println(i+"\t"+j);
            }
        }
        ps.close();
        pigServer.registerQuery("A = LOAD '"
                + Util.generateURI(tmpFile.toString(), pigContext) + "';");
        pigServer.registerQuery("B = group A by $0;");
        String query = "C = foreach B {"
        + "C1 = filter A by $0 > -1;"
        + "C2 = distinct C1;"
        + "C3 = distinct A;"
        + "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2), 
SUM(C2.$1)," +  TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
        + "};";

        pigServer.registerQuery(query);
        Iterator<Tuple> iter = pigServer.openIterator("C");
        if(!iter.hasNext()) Assert.fail("No output found");
        int numIdentity = 0;
        List<String> expectedStrResults = new ArrayList<String>();
        for(int i=0;i<LOOP_COUNT;i++){
            StringBuilder sb = new StringBuilder();
            sb.append("(").append(numIdentity).append(",");
            sb.append("5L").append(",");
            sb.append(Double.toString(LOOP_COUNT*2.0)).append(",");
            sb.append("8.0").append(",");
            sb.append("5L").append(",");
            sb.append("7").append(")");
            expectedStrResults.add(sb.toString());
            ++numIdentity;
        }

        List<Tuple> expectedResults = 
Util.getTuplesFromConstantTupleStrings(expectedStrResults.toArray(new 
String[0]));
        List<Tuple> actualResults = new ArrayList<Tuple>();
                while(iter.hasNext()){
                        Tuple t = iter.next();
                        Tuple actualTuple = 
TupleFactory.getInstance().newTuple(6);
                        actualTuple.set(0,t.get(0));
                        actualTuple.set(1,(Long)t.get(2));
                        actualTuple.set(2,(Double)t.get(3));
                        actualTuple.set(3,(Double)t.get(5));
                        actualTuple.set(4,((DataBag)t.get(6)).size());
                        actualTuple.set(5,t.size());
                        actualResults.add(actualTuple);
                }
      Util.checkQueryOutputsAfterSort(actualResults.iterator(), 
expectedResults);
      Assert.assertEquals(LOOP_COUNT, numIdentity);
    }
{code}


> Fix ordering related failures in TestEvalPipeline for Spark
> -----------------------------------------------------------
>
>                 Key: PIG-4276
>                 URL: https://issues.apache.org/jira/browse/PIG-4276
>             Project: Pig
>          Issue Type: Sub-task
>          Components: spark
>            Reporter: liyunzhang_intel
>            Assignee: Mohit Sabharwal
>             Fix For: spark-branch
>
>         Attachments: PIG-4276.patch, 
> TEST-org.apache.pig.test.TestEvalPipeline.txt
>
>
> error log is attached



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to