svn commit: r893259 - in /hadoop/pig/branches/branch-0.6: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/impl/builtin/ test/org/apache/pig/test/

2009-12-22 Thread olga
Author: olga
Date: Tue Dec 22 18:22:20 2009
New Revision: 893259

URL: http://svn.apache.org/viewvc?rev=893259view=rev
Log:
PIG-1143: Poisson Sample Loader should compute the number of samples required
only once (sriranjan via olgan)

Modified:
hadoop/pig/branches/branch-0.6/CHANGES.txt

hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java

hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java

hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java

hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java

hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPoissonSampleLoader.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestSkewedJoin.java
hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/Util.java

Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=893259r1=893258r2=893259view=diff
==
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Tue Dec 22 18:22:20 2009
@@ -141,6 +141,9 @@
 
 BUG FIXES
 
+PIG-1143: Poisson Sample Loader should compute the number of samples required
+only once (sriranjan via olgan)
+
 PIG-1135: skewed join partitioner returns negative partition index  (yinghe 
via olgan)
 
 PIG-1134: Skewed Join sampling job overwhelms the name node (sriranjan via 
olgan)

Modified: 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=893259r1=893258r2=893259view=diff
==
--- 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
 (original)
+++ 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
 Tue Dec 22 18:22:20 2009
@@ -1401,6 +1401,7 @@
}

FileSpec fSpec = getTempFileSpec();
+   System.err.println(#...@#@ fSpec:  + 
fSpec.getFileName());
MapReduceOper mro = compiledInputs[0];
POStore str = getStore();
str.setSFile(fSpec);
@@ -1845,7 +1846,7 @@
String inputFile = lFile.getFileName();
 
return getSamplingJob(sort, prevJob, transformPlans, lFile, 
sampleFile, rp, null, 
-   
PartitionSkewedKeys.class.getName(), new String[]{per, mc, inputFile}, 
RandomSampleLoader.class.getName());
+   
PartitionSkewedKeys.class.getName(), new String[]{per, mc, inputFile}, 
PoissonSampleLoader.class.getName());
}catch(Exception e) {
throw new PlanException(e);
}

Modified: 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java?rev=893259r1=893258r2=893259view=diff
==
--- 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
 (original)
+++ 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
 Tue Dec 22 18:22:20 2009
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -50,11 +51,14 @@
 import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.SliceWrapper;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.builtin.SampleLoader;
+import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.io.FileSpec;
 import org.apache.pig.impl.io.ValidatingInputFileSpec;
 import org.apache.pig.impl.plan.OperatorKey;
 import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.pig.impl.util.Pair;
+import org.apache.pig.impl.util.UDFContext;
 
 public class PigInputFormat implements InputFormatText, Tuple,
JobConfigurable {
@@ -239,6 +243,20 @@
if ((spec.getSlicer() instanceof PigSlicer)) {


svn commit: r893279 - in /hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test: TestBatchAliases.java TestMultiQuery.java

2009-12-22 Thread pradeepkth
Author: pradeepkth
Date: Tue Dec 22 19:24:09 2009
New Revision: 893279

URL: http://svn.apache.org/viewvc?rev=893279view=rev
Log:
simple fixes in unit tests failing after recent merge since file:filename is 
no longer supported in map reduce mode

Modified:

hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBatchAliases.java

hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java

Modified: 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBatchAliases.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBatchAliases.java?rev=893279r1=893278r2=893279view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBatchAliases.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBatchAliases.java
 Tue Dec 22 19:24:09 2009
@@ -37,6 +37,7 @@
 
 private PigServer myPig;
 
+@Override
 @Before
 public void setUp() throws Exception {
 cluster.setProperty(opt.multiquery, +true);
@@ -44,21 +45,24 @@
 deleteOutputFiles();
 }
 
+@Override
 @After
 public void tearDown() throws Exception {
 myPig = null;
+Util.deleteFile(cluster, passwd);
 }
 
 @Test
-public void testBatchAliases() {
+public void testBatchAliases() throws IOException {
 
 // test case: key ('group') isn't part of foreach output
 // and keys have the same type.
-
+Util.copyFromLocalToCluster(cluster, 
+test/org/apache/pig/test/data/passwd, passwd);
 try {
 myPig.setBatchOn();
 
-myPig.registerQuery(a = load 
'file:test/org/apache/pig/test/data/passwd'  +
+myPig.registerQuery(a = load 'passwd'  +
 using PigStorage(':') as (uname:chararray, 
passwd:chararray, uid:int, gid:int););
 myPig.registerQuery(b = group a by uid;);
 myPig.registerQuery(c = group a by gid;);

Modified: 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java?rev=893279r1=893278r2=893279view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java
 Tue Dec 22 19:24:09 2009
@@ -235,11 +235,10 @@
 
 @Test
 public void testMultiQueryJiraPig1108() {
-
 try {
 myPig.setBatchOn();
 
-myPig.registerQuery(a = load 
'file:test/org/apache/pig/test/data/passwd'  
+myPig.registerQuery(a = load 'passwd'  
 + using PigStorage(':') as (uname:chararray, 
passwd:chararray, uid:int, gid:int););
 myPig.registerQuery(split a into plan1 if (uid  5), plan2 if ( 
uid  5););
 myPig.registerQuery(b = group plan1 by uname;);




[Pig Wiki] Update of Pig070IncompatibleChanges by Olg aN

2009-12-22 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on Pig Wiki for change 
notification.

The Pig070IncompatibleChanges page has been changed by OlgaN.
http://wiki.apache.org/pig/Pig070IncompatibleChanges?action=diffrev1=7rev2=8

--

  
  We will have a different approach for streaming optimization if that 
functionality is necessary.
  
+ == Access to Local Files from Map-Reduce Mode
+ 
+ In the earlier version of Pig, you could access a local file from map-reduce 
mode by prepending file:// to the file location:
+ 
+ {{{
+ A = load 'file:/mydir/myfile';
+ ...
+ }}}
+ 
+ When Pig processed this statement, it would first copy the data to DFS and 
then import it into the execution pipeline.
+ 
+ In Pig 0.7.0, you can no longer do this and if this functionality is still 
desired, you can add the copy into your script manually:
+ 
+ {{{
+ fs copyFromLocal src dist
+ A = load 'dist';
+ 
+ }}}
+ 
  == Open Questions ==
  
  Q: Should String-Text conversion be part of this release.