Author: pradeepkth
Date: Mon Dec 21 20:05:34 2009
New Revision: 892954

URL: http://svn.apache.org/viewvc?rev=892954&view=rev
Log:
PIG-1149: Allow instantiation of SampleLoaders with parametrized LoadFuncs
(dvryaboy via pradeepkth)

Modified:
    hadoop/pig/branches/load-store-redesign/CHANGES.txt
    
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
    
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPoissonSampleLoader.java

Modified: hadoop/pig/branches/load-store-redesign/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/CHANGES.txt?rev=892954&r1=892953&r2=892954&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/CHANGES.txt (original)
+++ hadoop/pig/branches/load-store-redesign/CHANGES.txt Mon Dec 21 20:05:34 2009
@@ -45,6 +45,9 @@
 
 IMPROVEMENTS
 
+PIG-1149: Allow instantiation of SampleLoaders with parametrized LoadFuncs
+(dvryaboy via pradeepkth)
+
 PIG-1162: Pig 0.6.0 - UDF doc (chandec via olgan)
 
 PIG-1163: Pig/Zebra 0.6.0 release (chandec via olgan)

Modified: 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java?rev=892954&r1=892953&r2=892954&view=diff
==============================================================================
--- 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
 Mon Dec 21 20:05:34 2009
@@ -40,24 +40,25 @@
 
     // number of samples to be sampled
     protected int numSamples;
-    
+
     protected LoadFunc loader;
-    
+
     // RecordReader used by the underlying loader
     private RecordReader<?, ?> recordReader= null;
-    
+
     public SampleLoader(String funcSpec) {
-       loader = (LoadFunc)PigContext.instantiateFuncFromSpec(funcSpec);
+        funcSpec = funcSpec.replaceAll("\\\\'", "'");
+        loader = (LoadFunc)PigContext.instantiateFuncFromSpec(funcSpec);
     }
-    
+
     public void setNumSamples(int n) {
-       numSamples = n;
+        numSamples = n;
     }
-    
+
     public int getNumSamples() {
-       return numSamples;
+        return numSamples;
     }
-    
+
     @Override
     public InputFormat<?,?> getInputFormat() throws IOException {
         return loader.getInputFormat();
@@ -70,22 +71,22 @@
             throw new IOException("Error getting input",e);
         }
     }
-    
+
     public void computeSamples(ArrayList<Pair<FileSpec, Boolean>> inputs, 
             PigContext pc) throws ExecException {
     }
-    
+
     @Override
     public LoadCaster getLoadCaster() throws IOException {
         return loader.getLoadCaster();
     }
-    
+
     @Override
     public String relativeToAbsolutePath(String location, Path curDir)
-            throws IOException {
+    throws IOException {
         return loader.relativeToAbsolutePath(location, curDir);
     }
-    
+
     @Override
     public void prepareToRead(RecordReader reader, PigSplit split) throws 
IOException {
         loader.prepareToRead(reader, split);

Modified: 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPoissonSampleLoader.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPoissonSampleLoader.java?rev=892954&r1=892953&r2=892954&view=diff
==============================================================================
--- 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPoissonSampleLoader.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPoissonSampleLoader.java
 Mon Dec 21 20:05:34 2009
@@ -19,27 +19,22 @@
 
 
 import java.io.*;
-import java.util.Iterator;
 import java.util.ArrayList;
+import java.util.Iterator;
 
-import junit.framework.Assert;
 import junit.framework.TestCase;
 
-import org.apache.pig.EvalFunc;
 import org.apache.pig.ExecType;
 import org.apache.pig.FuncSpec;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.builtin.BinStorage;
 import org.apache.pig.builtin.PigStorage;
-import org.apache.pig.data.BagFactory;
-import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.builtin.PoissonSampleLoader;
 import org.apache.pig.impl.util.Pair;
-import org.apache.pig.test.utils.TestHelper;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Test;
 import org.apache.pig.impl.io.FileSpec;
 
 
@@ -48,64 +43,85 @@
 
     private PigServer pigServer;
     private MiniCluster cluster = MiniCluster.buildCluster();
-    
+
     public TestPoissonSampleLoader() throws ExecException, IOException{
-        pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+        pigServer = new PigServer(ExecType.LOCAL, cluster.getProperties());
         
pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.maxtuple",
 "5");     
         
pigServer.getPigContext().getProperties().setProperty("pig.skewedjoin.reduce.memusage",
 "0.0001");
         
pigServer.getPigContext().getProperties().setProperty("mapred.child.java.opts", 
"-Xmx512m");
 
         
pigServer.getPigContext().getProperties().setProperty("pig.mapsplits.count", 
"5");
     }
-    
-    
+
+
     @Before
     public void setUp() throws Exception {
         createFiles();
     }
 
     private void createFiles() throws IOException {
-       PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE1));
-               
-       int k = 0;
-       for(int j=0; j<100; j++) {                              
-               w.println("100\tapple1\taaa" + k);
-           k++;
-           w.println("200\torange1\tbbb" + k);
-           k++;
-           w.println("300\tstrawberry\tccc" + k);
-           k++;                            
-       }
-       
-       w.close();
-       
-       Util.copyFromLocalToCluster(cluster, INPUT_FILE1, INPUT_FILE1);
+        PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE1));
+
+        int k = 0;
+        for(int j=0; j<100; j++) {
+            w.println("100:apple1:aaa" + k);
+            k++;
+            w.println("200:orange1:bbb" + k);
+            k++;
+            w.println("300:strawberry:ccc" + k);
+            k++;                           
+        }
+
+        w.close();
+
+        Util.copyFromLocalToCluster(cluster, INPUT_FILE1, INPUT_FILE1);
     }
-    
-    
+
+
     @After
     public void tearDown() throws Exception {
-       new File(INPUT_FILE1).delete();
-       
+        new File(INPUT_FILE1).delete();
+
         Util.deleteFile(cluster, INPUT_FILE1);
     }
-    
-    
+
+    @Test
     public void testComputeSamples() throws IOException{
-               FileSpec fs = new FileSpec(INPUT_FILE1, new 
FuncSpec(PigStorage.class.getName()));
-               
-               ArrayList<Pair<FileSpec, Boolean>> inputs = new 
ArrayList<Pair<FileSpec, Boolean> >();
-               inputs.add(new Pair<FileSpec, Boolean>(fs, true));
-               
+        FileSpec fs = new FileSpec(INPUT_FILE1, new 
FuncSpec(PigStorage.class.getName()));
+
+        ArrayList<Pair<FileSpec, Boolean>> inputs = new 
ArrayList<Pair<FileSpec, Boolean> >();
+        inputs.add(new Pair<FileSpec, Boolean>(fs, true));
+
         // Use 100 as a default value;
         PoissonSampleLoader ps = new PoissonSampleLoader((new 
FuncSpec(PigStorage.class.getName())).toString(), "100");
 
         // Get the number of samples for the file
         ps.computeSamples(inputs, pigServer.getPigContext());
-        
+
         if (ps.getNumSamples() != 3) {
-               fail("Compute samples returned the wrong number of samples");
+            fail("Compute samples returned the wrong number of samples");
         }
     }
-       
+
+    /*
+     * FIXME This currently tests for 5 elements because PoissonSampleLoader
+     * only produces a single sample for the test data, and the last sample has
+     * extra information appended in PoissonSampleLoader. 
+     * 
+     * This is incorrect. The proper number of samples should be > 1, and 
therefore
+     * the first sample should only have 3 elements.
+     * 
+     * See PIG-1062 and PIG-1149 for more information.
+     * 
+     */
+    @Test
+    public void testInstantiation() throws IOException {
+        pigServer.registerQuery("A = Load '"+INPUT_FILE1+"' Using 
PoissonSampleLoader('PigStorage(\\\\\\':\\\\\\')', '100');");
+        Iterator<Tuple> iter = pigServer.openIterator("A");
+        assertTrue(iter.hasNext());
+        assertEquals(5, iter.next().size());
+    }
+
+
+
 }
\ No newline at end of file


Reply via email to