Author: daijy
Date: Tue Jan  6 01:06:26 2015
New Revision: 1649702

URL: http://svn.apache.org/r1649702
Log:
PIG-4213: CSVExcelStorage not quoting texts containing \r (CR) when storing

Modified:
    pig/trunk/CHANGES.txt
    
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
    
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1649702&r1=1649701&r2=1649702&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Tue Jan  6 01:06:26 2015
@@ -30,6 +30,8 @@ PIG-4333: Split BigData tests into multi
  
 BUG FIXES
 
+PIG-4213: CSVExcelStorage not quoting texts containing \r (CR) when storing 
(alfonso.nishikawa via daijy)
+
 PIG-2647: Split Combining drops splits with empty getLocations() (tmwoodruff 
via daijy)
 
 PIG-4294: Enable unit test "TestNestedForeach" for spark (kellyzly via rohini)

Modified: 
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1649702&r1=1649701&r2=1649702&view=diff
==============================================================================
--- 
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
 (original)
+++ 
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
 Tue Jan  6 01:06:26 2015
@@ -103,6 +103,7 @@ public class CSVExcelStorage extends Pig
     public static enum Headers { DEFAULT, READ_INPUT_HEADER, 
SKIP_INPUT_HEADER, WRITE_OUTPUT_HEADER, SKIP_OUTPUT_HEADER }
 
     protected final static byte LINEFEED = '\n';
+    protected final static byte CARRIAGE_RETURN = '\r';
     protected final static byte DOUBLE_QUOTE = '"';
     protected final static byte RECORD_DEL = LINEFEED;
 
@@ -293,6 +294,7 @@ public class CSVExcelStorage extends Pig
 
         ArrayList<Object> mProtoTuple = new ArrayList<Object>();
         int embeddedNewlineIndex = -1;
+        int embeddedCarriageReturn = -1;
         String fieldStr = null;
         // For good debug messages:
         int fieldCounter = -1;
@@ -315,13 +317,15 @@ public class CSVExcelStorage extends Pig
             // If any field delimiters are in the field, or if we did replace
             // any double quotes with a pair of double quotes above,
             // or if the string includes a newline character (LF:\n:0x0A)
+            //               or includes a carriage return (CR:\r:0x0D)
             // and we are to allow newlines in fields,
             // then the entire field must be enclosed in double quotes:
             embeddedNewlineIndex =  fieldStr.indexOf(LINEFEED);
+            embeddedCarriageReturn = fieldStr.indexOf(CARRIAGE_RETURN);
             
             if ((fieldStr.indexOf(fieldDelimiter) != -1) || 
                 (fieldStr.indexOf(DOUBLE_QUOTE) != -1) ||
-                (multilineTreatment == Multiline.YES) && (embeddedNewlineIndex 
!= -1))  {
+                (multilineTreatment == Multiline.YES) && (embeddedNewlineIndex 
!= -1 || embeddedCarriageReturn != -1))  {
                 fieldStr = "\"" + fieldStr + "\"";
             }
             

Modified: 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1649702&r1=1649701&r2=1649702&view=diff
==============================================================================
--- 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
 (original)
+++ 
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
 Tue Jan  6 01:06:26 2015
@@ -19,6 +19,7 @@
 package org.apache.pig.piggybank.test.storage;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -27,10 +28,11 @@ import java.util.Properties;
 import junit.framework.Assert;
 
 import org.apache.commons.lang.StringUtils;
-
 import org.apache.pig.ExecType;
 import org.apache.pig.PigServer;
 import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
+import org.apache.pig.builtin.mock.Storage;
+import org.apache.pig.builtin.mock.Storage.Data;
 import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.tools.parameters.ParseException;
@@ -58,6 +60,7 @@ public class TestCSVExcelStorage  {
         "1st Field,\"A poem that continues\n" +
         "for several lines\n" +
         "do we\n" +
+        "(even with \r)" +
         "handle that?\",Good,Fairy\n";
 
     String[] testStrCommaArray =
@@ -69,7 +72,7 @@ public class TestCSVExcelStorage  {
             "\"Conrad\nEmil\",Dinger,40",
                 "Emil,\"\nDinger\",40",
                 "Quote problem,\"My \"\"famous\"\"\nsong\",60",
-            "1st Field,\"A poem that continues\nfor several lines\ndo 
we\nhandle that?\",Good,Fairy",
+            "1st Field,\"A poem that continues\nfor several lines\ndo 
we\n(even with \r)handle that?\",Good,Fairy",
     };
 
     @SuppressWarnings("serial")
@@ -83,7 +86,7 @@ public class TestCSVExcelStorage  {
             add(Util.createTuple(new String[] {"Conrad\nEmil", "Dinger", 
"40"}));
             add(Util.createTuple(new String[] {"Emil", "\nDinger", "40"}));
             add(Util.createTuple(new String[] {"Quote problem", "My 
\"famous\"\nsong", "60"}));
-            add(Util.createTuple(new String[] {"1st Field", "A poem that 
continues\nfor several lines\ndo we\nhandle that?", "Good", "Fairy"}));
+            add(Util.createTuple(new String[] {"1st Field", "A poem that 
continues\nfor several lines\ndo we\n(even with \n)handle that?", "Good", 
"Fairy"}));
         }
     };
 
@@ -104,7 +107,8 @@ public class TestCSVExcelStorage  {
             add(Util.createTuple(new String[] {"1st Field", "A poem that 
continues"}));
             add(Util.createTuple(new String[] {"for several lines"}));
             add(Util.createTuple(new String[] {"do we"}));
-            add(Util.createTuple(new String[] {"handle that?,Good,Fairy"})); 
// Trailing double quote eats rest of line
+            add(Util.createTuple(new String[] {"(even with "}));
+            add(Util.createTuple(new String[] {")handle that?,Good,Fairy"})); 
// Trailing double quote eats rest of line
         }
     };
 
@@ -161,6 +165,7 @@ public class TestCSVExcelStorage  {
                 "1,,,,\"",
                 "qwe",
                 "rty\", uiop",
+                "1,10,2.718,3.14159,\"abc\rdef\",uiop",
                 "1,,,,\"qwe,rty\",uiop",
                 "1,,,,\"q\"\"wert\"\"y\", uiop",
                 "1,,,,qwerty,\"u\"\"io\"\"p\""
@@ -338,6 +343,7 @@ public class TestCSVExcelStorage  {
             "(1,10,,3.15159,,uiop)",             // extra field (input data 
has "moose" after "uiop")
             "(1,,2.718,,qwerty,uiop)",           // quoted regular fields 
(2.718, qwerty, and uiop in quotes)
             "(1,,,,\nqwe\nrty, uiop)",           // newlines in quotes
+            "(1,10,2.718,3.14159,abc\ndef,uiop)",// After LOAD \r => \n 
(PIG-4213)
             "(1,,,,qwe,rty,uiop)",               // commas in quotes
             "(1,,,,q\"wert\"y, uiop)",           // quotes in quotes
             "(1,,,,qwerty,u\"io\"p)"             // quotes in quotes at the 
end of a line
@@ -384,6 +390,8 @@ public class TestCSVExcelStorage  {
             "(1,,,,\")",                            // since we are just using 
TextLoader for verification
             "(qwe)",                                // it treats the 
linebreaks as meaning separate records
             "(rty\", uiop)",                        // but as shown in the 
load() test, CSVExcelStorage will read these properly
+            "(1,10,2.718,3.14159,\"abc)",
+            "(def\",uiop)",
             "(1,,,,\"qwe,rty\",uiop)",
             "(1,,,,\"q\"\"wert\"\"y\", uiop)",
             "(1,,,,qwerty,\"u\"\"io\"\"p\")"
@@ -426,4 +434,48 @@ public class TestCSVExcelStorage  {
 
         Assert.assertEquals(StringUtils.join(expected, "\n"), 
StringUtils.join(data, "\n"));
     }
+    
+    // Test that STORE stores CR (\r) quoted/unquoted in 
yes_multiline/no_multiline
+    @Test
+    public void storeCR() throws IOException {
+        ArrayList<Tuple> inputTuples = new ArrayList<Tuple>();
+        inputTuples.add(Storage.tuple(1,"text","a line\rand another line to 
write"));
+        String expected = "1,text,\"a line\rand another line to write\"\n";
+        String expectedNoMultiline = "1,text,a line\rand another line to 
write\n";
+
+        // Prepare the input using mock.Storage() since this will not 
interpret \r
+        Data data = Storage.resetData(pig);
+        data.set("inputTuples", inputTuples);
+
+        // Test for quoted when YES_MULTILINE
+        // Execute
+        String testOut = dataDir + "csv_cr_quoted_output_yes_multiline" ;
+        String script = "A = load 'inputTuples' USING mock.Storage() as 
(f1:int, f2:chararray, f3:chararray);" +
+                "STORE A INTO '" + testOut + "' USING " +
+                "org.apache.pig.piggybank.storage.CSVExcelStorage(',', 
'YES_MULTILINE', 'UNIX');";
+        Util.registerMultiLineQuery(pig, script);
+        // Load result
+        FileInputStream resultFile = new FileInputStream(testOut + 
"/part-m-00000");
+        byte[] actualBytes = new byte[resultFile.available()];
+        resultFile.read(actualBytes);
+        resultFile.close();
+        String actual = new String(actualBytes);
+        Assert.assertEquals(expected, actual);
+
+        // Test for unquoted when NO_MULTILINE
+        // Execute
+        testOut = dataDir + "csv_cr_quoted_output_no_multiline" ;
+        script = "A = load 'inputTuples' USING mock.Storage() as (f1:int, 
f2:chararray, f3:chararray);" +
+                "STORE A INTO '" + testOut + "' USING " +
+                "org.apache.pig.piggybank.storage.CSVExcelStorage(',', 
'NO_MULTILINE', 'UNIX');";
+        Util.registerMultiLineQuery(pig, script);
+        // Load result
+        resultFile = new FileInputStream(testOut + "/part-m-00000");
+        actualBytes = new byte[resultFile.available()];
+        resultFile.read(actualBytes);
+        resultFile.close();
+        actual = new String(actualBytes);
+        Assert.assertEquals(expectedNoMultiline, actual);
+    }
+
 }


Reply via email to