Author: olga Date: Mon Dec 21 20:42:35 2009 New Revision: 892972 URL: http://svn.apache.org/viewvc?rev=892972&view=rev Log: PIG-1159: merge join right side table does not support comma seperated paths (rding via olgan)
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892972&r1=892971&r2=892972&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Dec 21 20:42:35 2009 @@ -68,6 +68,9 @@ BUG FIXES +PIG-1159: merge join right side table does not support comma seperated paths +(rding via olgan) + PIG-1158: pig command line -M option doesn't support table union correctly (comma seperated paths) (rding via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java?rev=892972&r1=892971&r2=892972&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java Mon Dec 21 20:42:35 2009 @@ -33,9 +33,10 @@ * The sequence of calls made from the pig runtime are: * * {...@link IndexableLoadFunc#initialize(Configuration)} - * IndexableLoadFunc.bindTo(filename, bufferedPositionedInputStream, 0, LONG.MAX_VALUE); - * (the bufferedPositionedInputStream is a decorator around the underlying - * DFS input stream) + * IndexableLoadFunc.bindTo(filename, null, 0, LONG.MAX_VALUE); + * (it's the IndexableLoad's responsibility to create the underlying + * DFS input stream since indexer should have all the information + * required) * IndexableLoadFunc.seekNear(keys); * A series of IndexableLoadFunc.getNext(); calls to perform the join * IndexableLoadFunc.close(); Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java?rev=892972&r1=892971&r2=892972&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java Mon Dec 21 20:42:35 2009 @@ -395,14 +395,17 @@ rightLoader = (IndexableLoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec); pc = (PigContext)ObjectSerializer.deserialize(PigMapReduce.sJobConf.get("pig.pigContext")); pc.connect(); - InputStream is = FileLocalizer.open(rightInputFileName, pc); + // Pass signature of the loader to rightLoader PigMapReduce.sJobConf.set("pig.loader.signature", signature); rightLoader.initialize(PigMapReduce.sJobConf); - // the main purpose of this bindTo call is supply the input file name + + // the purpose of this bindTo call is supply the input file name // to the right loader - in the case of Pig's DefaultIndexableLoader - // this is really not used since the index has all information required - rightLoader.bindTo(rightInputFileName, new BufferedPositionedInputStream(is), 0, Long.MAX_VALUE); + // this is really not used since the index has all information required. + // It's responsibility of the right loader to create InputStream from which + // it reads data. + rightLoader.bindTo(rightInputFileName, null, 0, Long.MAX_VALUE); rightLoader.seekNear( firstLeftKey instanceof Tuple ? (Tuple)firstLeftKey : mTupleFactory.newTuple(firstLeftKey)); } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=892972&r1=892971&r2=892972&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Mon Dec 21 20:42:35 2009 @@ -540,6 +540,18 @@ Assert.assertTrue(shjSch == null); } + @Test + public void testMergeJoinWithCommaSeparatedFilePaths() throws IOException{ + + pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';"); + pigServer.registerQuery("B = LOAD 'temp_file,righinput_file' using " + + DummyIndexableLoader.class.getName() + "();"); + + pigServer.registerQuery("C = join A by $0, B by $0 using \"merge\";"); + + Iterator<Tuple> iter = pigServer.openIterator("C"); + Assert.assertFalse(iter.hasNext()); + } /** * A dummy loader which implements {...@link IndexableLoadFunc} to test