[
https://issues.apache.org/jira/browse/PIG-4276?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14504032#comment-14504032
]
liyunzhang_intel commented on PIG-4276:
---------------------------------------
[~mohitsabharwal]:
In other places, you introduces an ORDER BY command to sort the GROUP BY or
DISTINCT output. I think it is not very good to change the original script. I
think following is better(you can also ask rohini or other pig committers'
suggestions):
{code}
@Test
public void testNestedPlan() throws Exception{
int LOOP_COUNT = 10;
File tmpFile = Util.createTempFileDelOnExit("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
for(int i = 0; i < LOOP_COUNT; i++) {
for(int j=0;j<LOOP_COUNT;j+=2){
ps.println(i+"\t"+j);
ps.println(i+"\t"+j);
}
}
ps.close();
pigServer.registerQuery("A = LOAD '"
+ Util.generateURI(tmpFile.toString(), pigContext) + "';");
pigServer.registerQuery("B = group A by $0;");
String query = "C = foreach B {"
+ "C1 = filter A by $0 > -1;"
+ "C2 = distinct C1;"
+ "C3 = distinct A;"
+ "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2),
SUM(C2.$1)," + TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
+ "};";
pigServer.registerQuery(query);
Iterator<Tuple> iter = pigServer.openIterator("C");
if(!iter.hasNext()) Assert.fail("No output found");
int numIdentity = 0;
while(iter.hasNext()){
Tuple t = iter.next();
Assert.assertEquals((Integer)numIdentity, (Integer)t.get(0));
Assert.assertEquals((Long)5L, (Long)t.get(2));
Assert.assertEquals(LOOP_COUNT*2.0, (Double)t.get(3), 0.01);
Assert.assertEquals(8.0, (Double)t.get(5), 0.01);
Assert.assertEquals(5L, ((DataBag)t.get(6)).size());
Assert.assertEquals(7, t.size());
++numIdentity;
}
Assert.assertEquals(LOOP_COUNT, numIdentity);
}
{code}
can be
{code}
@Test
public void testNestedPlan() throws Exception{
int LOOP_COUNT = 10;
File tmpFile = Util.createTempFileDelOnExit("test", "txt");
PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
for(int i = 0; i < LOOP_COUNT; i++) {
for(int j=0;j<LOOP_COUNT;j+=2){
ps.println(i+"\t"+j);
ps.println(i+"\t"+j);
}
}
ps.close();
pigServer.registerQuery("A = LOAD '"
+ Util.generateURI(tmpFile.toString(), pigContext) + "';");
pigServer.registerQuery("B = group A by $0;");
String query = "C = foreach B {"
+ "C1 = filter A by $0 > -1;"
+ "C2 = distinct C1;"
+ "C3 = distinct A;"
+ "generate (int)group," + Identity.class.getName() +"(*), COUNT(C2),
SUM(C2.$1)," + TitleNGrams.class.getName() + "(C3), MAX(C3.$1), C2;"
+ "};";
pigServer.registerQuery(query);
Iterator<Tuple> iter = pigServer.openIterator("C");
if(!iter.hasNext()) Assert.fail("No output found");
int numIdentity = 0;
List<String> expectedStrResults = new ArrayList<String>();
for(int i=0;i<LOOP_COUNT;i++){
StringBuilder sb = new StringBuilder();
sb.append("(").append(numIdentity).append(",");
sb.append("5L").append(",");
sb.append(Double.toString(LOOP_COUNT*2.0)).append(",");
sb.append("8.0").append(",");
sb.append("5L").append(",");
sb.append("7").append(")");
expectedStrResults.add(sb.toString());
++numIdentity;
}
List<Tuple> expectedResults =
Util.getTuplesFromConstantTupleStrings(expectedStrResults.toArray(new
String[0]));
List<Tuple> actualResults = new ArrayList<Tuple>();
while(iter.hasNext()){
Tuple t = iter.next();
Tuple actualTuple =
TupleFactory.getInstance().newTuple(6);
actualTuple.set(0,t.get(0));
actualTuple.set(1,(Long)t.get(2));
actualTuple.set(2,(Double)t.get(3));
actualTuple.set(3,(Double)t.get(5));
actualTuple.set(4,((DataBag)t.get(6)).size());
actualTuple.set(5,t.size());
actualResults.add(actualTuple);
}
Util.checkQueryOutputsAfterSort(actualResults.iterator(),
expectedResults);
Assert.assertEquals(LOOP_COUNT, numIdentity);
}
{code}
> Fix ordering related failures in TestEvalPipeline for Spark
> -----------------------------------------------------------
>
> Key: PIG-4276
> URL: https://issues.apache.org/jira/browse/PIG-4276
> Project: Pig
> Issue Type: Sub-task
> Components: spark
> Reporter: liyunzhang_intel
> Assignee: Mohit Sabharwal
> Fix For: spark-branch
>
> Attachments: PIG-4276.patch,
> TEST-org.apache.pig.test.TestEvalPipeline.txt
>
>
> error log is attached
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)