Author: olga
Date: Mon Dec 21 20:42:35 2009
New Revision: 892972
URL: http://svn.apache.org/viewvc?rev=892972&view=rev
Log:
PIG-1159: merge join right side table does not support comma seperated paths
(rding via olgan)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892972&r1=892971&r2=892972&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Mon Dec 21 20:42:35 2009
@@ -68,6 +68,9 @@
BUG FIXES
+PIG-1159: merge join right side table does not support comma seperated paths
+(rding via olgan)
+
PIG-1158: pig command line -M option doesn't support table union correctly
(comma seperated paths) (rding via olgan)
Modified: hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java?rev=892972&r1=892971&r2=892972&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/IndexableLoadFunc.java Mon Dec 21
20:42:35 2009
@@ -33,9 +33,10 @@
* The sequence of calls made from the pig runtime are:
*
* {...@link IndexableLoadFunc#initialize(Configuration)}
- * IndexableLoadFunc.bindTo(filename, bufferedPositionedInputStream, 0,
LONG.MAX_VALUE);
- * (the bufferedPositionedInputStream is a decorator around the underlying
- * DFS input stream)
+ * IndexableLoadFunc.bindTo(filename, null, 0, LONG.MAX_VALUE);
+ * (it's the IndexableLoad's responsibility to create the underlying
+ * DFS input stream since indexer should have all the information
+ * required)
* IndexableLoadFunc.seekNear(keys);
* A series of IndexableLoadFunc.getNext(); calls to perform the join
* IndexableLoadFunc.close();
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java?rev=892972&r1=892971&r2=892972&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java
Mon Dec 21 20:42:35 2009
@@ -395,14 +395,17 @@
rightLoader =
(IndexableLoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec);
pc =
(PigContext)ObjectSerializer.deserialize(PigMapReduce.sJobConf.get("pig.pigContext"));
pc.connect();
- InputStream is = FileLocalizer.open(rightInputFileName, pc);
+
// Pass signature of the loader to rightLoader
PigMapReduce.sJobConf.set("pig.loader.signature", signature);
rightLoader.initialize(PigMapReduce.sJobConf);
- // the main purpose of this bindTo call is supply the input file name
+
+ // the purpose of this bindTo call is supply the input file name
// to the right loader - in the case of Pig's DefaultIndexableLoader
- // this is really not used since the index has all information required
- rightLoader.bindTo(rightInputFileName, new
BufferedPositionedInputStream(is), 0, Long.MAX_VALUE);
+ // this is really not used since the index has all information
required.
+ // It's responsibility of the right loader to create InputStream from
which
+ // it reads data.
+ rightLoader.bindTo(rightInputFileName, null, 0, Long.MAX_VALUE);
rightLoader.seekNear(
firstLeftKey instanceof Tuple ? (Tuple)firstLeftKey :
mTupleFactory.newTuple(firstLeftKey));
}
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=892972&r1=892971&r2=892972&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Mon Dec 21
20:42:35 2009
@@ -540,6 +540,18 @@
Assert.assertTrue(shjSch == null);
}
+ @Test
+ public void testMergeJoinWithCommaSeparatedFilePaths() throws IOException{
+
+ pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
+ pigServer.registerQuery("B = LOAD 'temp_file,righinput_file' using " +
+ DummyIndexableLoader.class.getName() + "();");
+
+ pigServer.registerQuery("C = join A by $0, B by $0 using \"merge\";");
+
+ Iterator<Tuple> iter = pigServer.openIterator("C");
+ Assert.assertFalse(iter.hasNext());
+ }
/**
* A dummy loader which implements {...@link IndexableLoadFunc} to test