Author: olga
Date: Fri May 23 15:18:16 2008
New Revision: 659689

URL: http://svn.apache.org/viewvc?rev=659689&view=rev
Log:
PIG-85: allowing control characters as field delimiters in PigStorage

Modified:
    incubator/pig/trunk/CHANGES.txt
    
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/MapReduceLauncher.java
    
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/PigOutputFormat.java
    incubator/pig/trunk/src/org/apache/pig/data/Tuple.java
    
incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
    incubator/pig/trunk/test/org/apache/pig/test/TestStore.java

Modified: incubator/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Fri May 23 15:18:16 2008
@@ -297,3 +297,5 @@
     PIG-236: Fix properties so that values specified via the command line (-D) 
are not ignored (pkamath via gates).
 
     PIG-198: integration with hadoop 17
+
+    PIG-85: allowing control characters as delimiters for PigStorage

Modified: 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/MapReduceLauncher.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/MapReduceLauncher.java?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/MapReduceLauncher.java
 (original)
+++ 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/MapReduceLauncher.java
 Fri May 23 15:18:16 2008
@@ -216,7 +216,8 @@
             conf.set("pig.inputs", 
ObjectSerializer.serialize(pom.inputFileSpecs));
             
             conf.setOutputPath(new Path(pom.outputFileSpec.getFileName()));
-            conf.set("pig.storeFunc", pom.outputFileSpec.getFuncSpec());
+            conf.set("pig.storeFunc",
+                     
ObjectSerializer.serialize(pom.outputFileSpec.getFuncSpec()));
 
             // Setup the DistributedCache for this job
             setupDistributedCache(pom.pigContext, conf, pom.properties, 

Modified: 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/PigOutputFormat.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/PigOutputFormat.java?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/PigOutputFormat.java
 (original)
+++ 
incubator/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapreduceExec/PigOutputFormat.java
 Fri May 23 15:18:16 2008
@@ -35,6 +35,7 @@
 import org.apache.pig.builtin.PigStorage;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.util.ObjectSerializer;
 import org.apache.tools.bzip2r.BZip2Constants;
 import org.apache.tools.bzip2r.CBZip2OutputStream;
 
@@ -51,7 +52,7 @@
     public PigRecordWriter getRecordWriter(FileSystem fs, JobConf job, Path 
outputDir, String name, Progressable progress)
             throws IOException {
         StoreFunc store;
-        String storeFunc = job.get("pig.storeFunc", "");
+        String storeFunc = (String) 
ObjectSerializer.deserialize(job.get("pig.storeFunc", "")) ;
         if (storeFunc.length() == 0) {
             store = new PigStorage();
         } else {

Modified: incubator/pig/trunk/src/org/apache/pig/data/Tuple.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/data/Tuple.java?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/data/Tuple.java (original)
+++ incubator/pig/trunk/src/org/apache/pig/data/Tuple.java Fri May 23 15:18:16 
2008
@@ -69,18 +69,32 @@
      * 
      * @param textLine
      *            the line containing fields of data
-     * @param delimiter
-     *            a regular expression of the form specified by 
String.split(). If null, the default
-     *            delimiter "[,\t]" will be used.
+     * @param delimiter 
+     *              the delimiter (normal string, NO REGEX!!)
      */
     public Tuple(String textLine, String delimiter) {
         if (delimiter == null) {
             delimiter = defaultDelimiter;
         }
-        String[] splitString = textLine.split(delimiter, -1);
-        fields = new ArrayList<Datum>(splitString.length);
-        for (int i = 0; i < splitString.length; i++) {
-            fields.add(new DataAtom(splitString[i]));
+        
+        fields = new ArrayList<Datum>() ;
+        int delimSize = delimiter.length() ;
+        boolean done = false ;
+        
+        int lastIdx = 0 ;
+        
+        while (!done) {
+            int newIdx = textLine.indexOf(delimiter, lastIdx) ;
+            if (newIdx != (-1)) {
+                String token = textLine.substring(lastIdx, newIdx) ;
+                fields.add(new DataAtom(token));
+                lastIdx = newIdx + delimSize  ;
+            }
+            else {
+                String token = textLine.substring(lastIdx) ;
+                fields.add(new DataAtom(token));
+                done = true ;
+            }
         }
     }
 

Modified: 
incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- 
incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
(original)
+++ 
incubator/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
Fri May 23 15:18:16 2008
@@ -727,14 +727,14 @@
        (
        LOOKAHEAD(2)
        (
-       t = <QUOTEDSTRING> {sb.append(t.image);}
+       t = <QUOTEDSTRING> 
{sb.append(StringUtils.unescapeInputString(t.image));}
        | 
     t = <NUMBER> {sb.append(t.image);}
     )
     ( 
         "," 
         (
-        t = <QUOTEDSTRING> {sb.append(t.image);}
+        t = <QUOTEDSTRING> 
{sb.append(StringUtils.unescapeInputString(t.image));}
         | 
         t = <NUMBER> {sb.append(t.image);}
         )

Modified: incubator/pig/trunk/test/org/apache/pig/test/TestStore.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestStore.java?rev=659689&r1=659688&r2=659689&view=diff
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestStore.java (original)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestStore.java Fri May 23 
15:18:16 2008
@@ -110,5 +110,23 @@
                tmpFile2 = "'" + FileLocalizer.getTemporaryPath(null, 
pigServer.getPigContext()).toString() + "'";
                f.delete();
        }
-       
-}
+
+
+    public void testDelimiter() throws IOException{
+        System.out.println("Temp files: " + tmpFile1 + ", " + tmpFile2);
+        pigServer.registerQuery("A = load " + fileName + ";");
+        pigServer.store("A", tmpFile1, "PigStorage('\u0001')");
+        pigServer.registerQuery("B = load " + tmpFile1 + "using 
PigStorage('\\u0001') ;");
+        pigServer.registerQuery("C = foreach B generate $0, $1;");
+        pigServer.store("C", tmpFile2);
+        pigServer.registerQuery("E = load " + tmpFile2 + ";");
+        Iterator<Tuple> iter = pigServer.openIterator("E");
+        int i =0;
+        while (iter.hasNext()) {
+            Tuple t = iter.next();
+            assertEquals(t.getAtomField(0).numval().intValue(),i);
+            assertEquals(t.getAtomField(1).numval().intValue(),i); i++; 
+        }
+    }
+
+ }


Reply via email to