Author: daijy
Date: Fri Nov 6 20:03:44 2015
New Revision: 1713020
URL: http://svn.apache.org/viewvc?rev=1713020&view=rev
Log:
PIG-4689: CSV Writes incorrect header if two CSV files are created in one script
Modified:
pig/trunk/CHANGES.txt
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1713020&r1=1713019&r2=1713020&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Nov 6 20:03:44 2015
@@ -69,6 +69,8 @@ PIG-4639: Add better parser for Apache H
BUG FIXES
+PIG-4689: CSV Writes incorrect header if two CSV files are created in one
script (nielsbasjes via daijy)
+
PIG-4727: Incorrect types table for AVG in docs (nsmith via daijy)
PIG-4725: Typo in FrontendException messages "Incompatable" (nsmith via daijy)
Modified:
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL:
http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1713020&r1=1713019&r2=1713020&view=diff
==============================================================================
---
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
(original)
+++
pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
Fri Nov 6 20:03:44 2015
@@ -671,6 +671,11 @@ public class CSVExcelStorage extends Pig
}
@Override
+ public void setStoreFuncUDFContextSignature(String signature) {
+ this.udfContextSignature = signature;
+ }
+
+ @Override
public List<OperatorSet> getFeatures() {
return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION);
}
Modified:
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
URL:
http://svn.apache.org/viewvc/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1713020&r1=1713019&r2=1713020&view=diff
==============================================================================
---
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
(original)
+++
pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
Fri Nov 6 20:03:44 2015
@@ -40,6 +40,9 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
+
public class TestCSVExcelStorage {
Properties props = new Properties();
@@ -484,4 +487,71 @@ public class TestCSVExcelStorage {
Assert.assertEquals(expectedNoMultiline, actual);
}
+ // Test to validate that each CSV file gets the correct header if they are
run at the same time (PIG-4689)
+ @Test
+ public void storeTwoFilesWithDifferentHeaders() throws IOException,
ParseException {
+ pig.setBatchOn(); // Very important to reproduce this bug
+
+ Storage.Data data = resetData(pig);
+
+ String fooOutFileName = createOutputFileName();
+ data.set(
+ "foo",
+ "foo_1:chararray",
+ tuple("A")
+ );
+ pig.registerQuery(
+ "foo = LOAD 'foo' USING mock.Storage();"
+ );
+ pig.registerQuery(
+ "STORE foo INTO '" + fooOutFileName + "' " +
+ "USING org.apache.pig.piggybank.storage.CSVExcelStorage(',',
'YES_MULTILINE', 'UNIX', 'WRITE_OUTPUT_HEADER');"
+ );
+
+ String barOutFileName = createOutputFileName();
+ data.set(
+ "bar",
+ "bar_1:chararray, bar_2:chararray",
+ tuple("B","C")
+ );
+ pig.registerQuery(
+ "bar = LOAD 'bar' USING mock.Storage();"
+ );
+ pig.registerQuery(
+ "STORE bar INTO '" + barOutFileName + "' " +
+ "USING org.apache.pig.piggybank.storage.CSVExcelStorage(',',
'YES_MULTILINE', 'UNIX', 'WRITE_OUTPUT_HEADER');"
+ );
+
+ pig.executeBatch();
+
+ // -----
+
+ pig.registerQuery(
+ "fooCsv = load '" + fooOutFileName + "' ;"
+ );
+
+ Iterator<Tuple> fooCsv = pig.openIterator("fooCsv");
+ String[] expectedFooCsv = {
+ // header should be written because we used the
'WRITE_OUTPUT_HEADER' argument
+ "(foo_1)",
+ "(A)"
+ };
+
+ Assert.assertEquals(StringUtils.join(expectedFooCsv, "\n"),
StringUtils.join(fooCsv, "\n"));
+
+ // -----
+
+ pig.registerQuery(
+ "barCsv = load '" + barOutFileName + "' ;"
+ );
+ Iterator<Tuple> barCsv = pig.openIterator("barCsv");
+ String[] expectedbarCsv = {
+ // header should be written because we used the
'WRITE_OUTPUT_HEADER' argument
+ "(bar_1,bar_2)",
+ "(B,C)"
+ };
+
+ Assert.assertEquals(StringUtils.join(expectedbarCsv, "\n"),
StringUtils.join(barCsv, "\n"));
+ }
+
}