[ 
https://issues.apache.org/jira/browse/PIG-2556?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Peter Welch updated PIG-2556:
-----------------------------

    Labels: newbie patch  (was: )
    Status: Patch Available  (was: Open)

Index: 
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
===================================================================
--- 
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java    
    (revision 1294285)
+++ 
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java    
    (working copy)
@@ -49,6 +49,7 @@
     
     String testFileCommaName = "testFileComma.csv";
     String testFileTabName = "testFileTab.csv";
+    String testFileNewlines = "testFileNewlines.csv";
 
     String testStrComma = 
        "John,Doe,10\n" +
@@ -124,7 +125,36 @@
                add(Util.createTuple(new String[] {"Frank","Clean","70"}));
        }
     };
-    
+
+    String[] testFileNewlinesArray = new String[] {
+            "One,Two,Three",
+            "123,\"\nSecond line\nThird line\", \"456\""  // notice that the 
space after the comma but before the quote
+            // is considered to be part of the 3rd field.  TBD if that's 
correct.
+    };
+
+    @SuppressWarnings("serial")
+       ArrayList<Tuple> testStrNewlinesResultTuples =
+       new ArrayList<Tuple>() {
+       {
+               add(Util.createTuple(new String[] {"One","Two","Three"}));
+               add(Util.createTuple(new String[] {"123", "\nSecond line\nThird 
line"," 456"}));
+       }
+    };
+
+
+    @Test
+    public void testNewline() throws IOException {
+
+        // Read the test file:
+        String script =
+               "a = LOAD '" + testFileNewlines + "' " +
+               "USING org.apache.pig.piggybank.storage.CSVExcelStorage(',', 
'YES_MULTILINE');";
+        Util.registerMultiLineQuery(pigServer, script);
+        compareExpectedActual(testStrNewlinesResultTuples, "a");
+
+
+    }
+
     public TestCSVExcelStorage() throws ExecException, IOException {
 
         pigServer = new PigServer(ExecType.LOCAL);
@@ -135,6 +165,7 @@
         
         Util.createLocalInputFile(testFileCommaName, testStrCommaArray);
         Util.createLocalInputFile(testFileTabName, testStrTabArray);
+        Util.createLocalInputFile(testFileNewlines, testFileNewlinesArray);
     }
 
     @Test
@@ -148,7 +179,7 @@
         assertEquals(Util.createTuple(new String[] {"foo", "bar", "baz"}), 
it.next());
     }
    
-    @Test 
+    @Test
     public void testQuotedCommas() throws IOException {
         String inputFileName = "TestCSVExcelStorage-quotedcommas.txt";
         Util.createLocalInputFile(inputFileName, new String[] 
{"\"foo,bar,baz\"", "fee,foe,fum"});
Index: src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
===================================================================
--- src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java 
(revision 1294285)
+++ src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java 
(working copy)
@@ -622,6 +622,10 @@
                                // that entire field is quoted:
                                getNextInQuotedField = true;
                                evenQuotesSeen = true;
+                if (i == recordLen - 1) {
+                    fieldBuffer.put(b);
+                                       sawEmbeddedRecordDelimiter = true;
+                }
                        } else if (b == FIELD_DEL) {
                                readField(fieldBuffer, getNextFieldID++); // 
end of the field
                        } else {

                
> CSVExcelStorage load: quoted field with newline as first character sees 
> newline as record end 
> ----------------------------------------------------------------------------------------------
>
>                 Key: PIG-2556
>                 URL: https://issues.apache.org/jira/browse/PIG-2556
>             Project: Pig
>          Issue Type: Bug
>          Components: piggybank
>    Affects Versions: 0.9.1
>            Reporter: Peter Welch
>              Labels: patch, newbie
>
> Loading a record that contains a newline as the first character in a quoted 
> field is broken.  The loader interprets the quoted newline as the record 
> delimiter.  I've identified and fixed the bug and added a new testcase to 
> expose it.  I'll post a patch soon.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to