[
https://issues.apache.org/jira/browse/PIG-2556?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Peter Welch updated PIG-2556:
-----------------------------
Labels: newbie patch (was: )
Status: Patch Available (was: Open)
Index:
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
===================================================================
---
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
(revision 1294285)
+++
src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
(working copy)
@@ -49,6 +49,7 @@
String testFileCommaName = "testFileComma.csv";
String testFileTabName = "testFileTab.csv";
+ String testFileNewlines = "testFileNewlines.csv";
String testStrComma =
"John,Doe,10\n" +
@@ -124,7 +125,36 @@
add(Util.createTuple(new String[] {"Frank","Clean","70"}));
}
};
-
+
+ String[] testFileNewlinesArray = new String[] {
+ "One,Two,Three",
+ "123,\"\nSecond line\nThird line\", \"456\"" // notice that the
space after the comma but before the quote
+ // is considered to be part of the 3rd field. TBD if that's
correct.
+ };
+
+ @SuppressWarnings("serial")
+ ArrayList<Tuple> testStrNewlinesResultTuples =
+ new ArrayList<Tuple>() {
+ {
+ add(Util.createTuple(new String[] {"One","Two","Three"}));
+ add(Util.createTuple(new String[] {"123", "\nSecond line\nThird
line"," 456"}));
+ }
+ };
+
+
+ @Test
+ public void testNewline() throws IOException {
+
+ // Read the test file:
+ String script =
+ "a = LOAD '" + testFileNewlines + "' " +
+ "USING org.apache.pig.piggybank.storage.CSVExcelStorage(',',
'YES_MULTILINE');";
+ Util.registerMultiLineQuery(pigServer, script);
+ compareExpectedActual(testStrNewlinesResultTuples, "a");
+
+
+ }
+
public TestCSVExcelStorage() throws ExecException, IOException {
pigServer = new PigServer(ExecType.LOCAL);
@@ -135,6 +165,7 @@
Util.createLocalInputFile(testFileCommaName, testStrCommaArray);
Util.createLocalInputFile(testFileTabName, testStrTabArray);
+ Util.createLocalInputFile(testFileNewlines, testFileNewlinesArray);
}
@Test
@@ -148,7 +179,7 @@
assertEquals(Util.createTuple(new String[] {"foo", "bar", "baz"}),
it.next());
}
- @Test
+ @Test
public void testQuotedCommas() throws IOException {
String inputFileName = "TestCSVExcelStorage-quotedcommas.txt";
Util.createLocalInputFile(inputFileName, new String[]
{"\"foo,bar,baz\"", "fee,foe,fum"});
Index: src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
===================================================================
--- src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
(revision 1294285)
+++ src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
(working copy)
@@ -622,6 +622,10 @@
// that entire field is quoted:
getNextInQuotedField = true;
evenQuotesSeen = true;
+ if (i == recordLen - 1) {
+ fieldBuffer.put(b);
+ sawEmbeddedRecordDelimiter = true;
+ }
} else if (b == FIELD_DEL) {
readField(fieldBuffer, getNextFieldID++); //
end of the field
} else {
> CSVExcelStorage load: quoted field with newline as first character sees
> newline as record end
> ----------------------------------------------------------------------------------------------
>
> Key: PIG-2556
> URL: https://issues.apache.org/jira/browse/PIG-2556
> Project: Pig
> Issue Type: Bug
> Components: piggybank
> Affects Versions: 0.9.1
> Reporter: Peter Welch
> Labels: patch, newbie
>
> Loading a record that contains a newline as the first character in a quoted
> field is broken. The loader interprets the quoted newline as the record
> delimiter. I've identified and fixed the bug and added a new testcase to
> expose it. I'll post a patch soon.
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira