[
https://issues.apache.org/jira/browse/PIG-4166?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14188590#comment-14188590
]
Brian Johnson commented on PIG-4166:
------------------------------------
testMapsideGroupByOneColumn
```
org.apache.pig.impl.logicalLayer.FrontendException: ERROR 1066: Unable to open
iterator for alias C
at org.apache.pig.PigServer.openIterator(PigServer.java:923)
at
org.apache.pig.test.TestCollectedGroup.testMapsideGroupByOneColumn(TestCollectedGroup.java:199)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
at
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
at
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at
org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
at
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
at
org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
at
org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at
org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:467)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:683)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:390)
at
org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:197)
Caused by: org.apache.pig.PigException: ERROR 1002: Unable to store alias C
at org.apache.pig.PigServer.storeEx(PigServer.java:1026)
at org.apache.pig.PigServer.store(PigServer.java:985)
at org.apache.pig.PigServer.openIterator(PigServer.java:898)
... 27 more
Caused by: org.apache.pig.backend.hadoop.executionengine.JobCreationException:
ERROR 2017: Internal error creating job configuration.
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.getJob(JobControlCompiler.java:998)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.compile(JobControlCompiler.java:323)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher.launchPig(MapReduceLauncher.java:196)
at
org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.launchPig(HExecutionEngine.java:280)
at org.apache.pig.PigServer.launchPlan(PigServer.java:1378)
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1363)
at org.apache.pig.PigServer.storeEx(PigServer.java:1022)
... 29 more
Caused by: java.lang.NullPointerException
at java.io.File.<init>(File.java:277)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler.getJob(JobControlCompiler.java:628)
... 35 more
```
> Collected group drops last record when combined with merge join
> ---------------------------------------------------------------
>
> Key: PIG-4166
> URL: https://issues.apache.org/jira/browse/PIG-4166
> Project: Pig
> Issue Type: Bug
> Affects Versions: 0.12.0
> Reporter: Brian Johnson
> Fix For: 0.15.0
>
>
> If the final two keys in each relation join, they will never make it to the
> final output. The reason is that POMergeJoin does a read-ahead and
> POCollectedGroup doesn't call processInput when this.parentPlan.endOfAllInput
> == true. This prevents the final join from being output because POMergeJoin
> never sees endOfAllInput == true.
> {code}
> diff --git
> a/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java
>
> b/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java
> index c355d1d..8fd44fa 100644
> ---
> a/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java
> +++
> b/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java
> @@ -127,28 +127,30 @@ public class POCollectedGroup extends PhysicalOperator {
> @Override
> public Result getNextTuple() throws ExecException {
>
> - // Since the output is buffered, we need to flush the last
> - // set of records when the close method is called by mapper.
> - if (this.parentPlan.endOfAllInput) {
> - if (outputBag != null) {
> - Tuple tup = mTupleFactory.newTuple(2);
> - tup.set(0, prevKey);
> - tup.set(1, outputBag);
> - outputBag = null;
> - return new Result(POStatus.STATUS_OK, tup);
> - }
> -
> - return new Result(POStatus.STATUS_EOP, null);
> - }
> +
>
> Result inp = null;
> Result res = null;
>
> while (true) {
> inp = processInput();
> +
> if (inp.returnStatus == POStatus.STATUS_EOP ||
> inp.returnStatus == POStatus.STATUS_ERR) {
> - break;
> + // Since the output is buffered, we need to flush the last
> + // set of records when the close method is called by mapper.
> + if (this.parentPlan.endOfAllInput) {
> + if (outputBag != null) {
> + Tuple tup = mTupleFactory.newTuple(2);
> + tup.set(0, prevKey);
> + tup.set(1, outputBag);
> + outputBag = null;
> + return new Result(POStatus.STATUS_OK, tup);
> + }
> +
> + return new Result(POStatus.STATUS_EOP, null);
> + } else
> + break;
> }
>
> if (inp.returnStatus == POStatus.STATUS_NULL) {
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)