This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c1dea5951f [8835] Fix for CSV files surrounding space (#9028)
c1dea5951f is described below
commit c1dea5951fd513d77b479fcd44c77be81f757ec9
Author: Ravishankar <[email protected]>
AuthorDate: Fri Jul 8 03:31:13 2022 +0530
[8835] Fix for CSV files surrounding space (#9028)
---
.../plugin/inputformat/csv/CSVRecordReader.java | 2 +-
.../inputformat/csv/CSVRecordExtractorTest.java | 53 +++++++++++++++++++++-
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git
a/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordReader.java
b/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordReader.java
index 471dee9ea4..a95ee47036 100644
---
a/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordReader.java
+++
b/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordReader.java
@@ -80,7 +80,7 @@ public class CSVRecordReader implements RecordReader {
}
}
char delimiter = config.getDelimiter();
- format = format.withDelimiter(delimiter);
+ format =
format.withDelimiter(delimiter).withIgnoreSurroundingSpaces(true);
String csvHeader = config.getHeader();
if (csvHeader == null) {
format = format.withHeader();
diff --git
a/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordExtractorTest.java
b/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordExtractorTest.java
index d23fbd0dd7..4dea94f398 100644
---
a/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordExtractorTest.java
+++
b/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVRecordExtractorTest.java
@@ -105,6 +105,57 @@ public class CSVRecordExtractorTest extends
AbstractRecordExtractorTest {
}
}
+ @Test
+ public void testRemovingSurroundingSpaces() throws IOException {
+ CSVRecordReaderConfig csvRecordReaderConfig = new CSVRecordReaderConfig();
+
+ // Create a CSV file where records have two values and the second value
contains an extra space.
+ File spaceFile = new File(_tempDir, "space.csv");
+ BufferedWriter writer = new BufferedWriter(new FileWriter(spaceFile));
+ writer.write("col1 ,col2\n");
+ writer.write(" value11, value12");
+ writer.close();
+
+ CSVRecordReader csvRecordReader = new CSVRecordReader();
+ HashSet<String> fieldsToRead = new HashSet<>();
+ fieldsToRead.add("col1");
+ fieldsToRead.add("col2");
+ csvRecordReader.init(spaceFile, fieldsToRead, csvRecordReaderConfig);
+ GenericRow genericRow = new GenericRow();
+ csvRecordReader.rewind();
+
+ // check if parsing succeeded.
+ Assert.assertTrue(csvRecordReader.hasNext());
+ csvRecordReader.next(genericRow);
+ Assert.assertEquals(genericRow.getValue("col1"), "value11");
+ Assert.assertEquals(genericRow.getValue("col2"), "value12");
+ }
+
+ @Test
+ public void testIgnoringSurroundingSpaces() throws IOException {
+ CSVRecordReaderConfig csvRecordReaderConfig = new CSVRecordReaderConfig();
+
+ // Create a CSV file where records have two values and the second value
contains an extra space.
+ File spaceFile = new File(_tempDir, "space.csv");
+ BufferedWriter writer = new BufferedWriter(new FileWriter(spaceFile));
+ writer.write("col1 ,col2\n");
+ writer.write("\"value11\",\" value12\"");
+ writer.close();
+
+ CSVRecordReader csvRecordReader = new CSVRecordReader();
+ HashSet<String> fieldsToRead = new HashSet<>();
+ fieldsToRead.add("col1");
+ fieldsToRead.add("col2");
+ csvRecordReader.init(spaceFile, fieldsToRead, csvRecordReaderConfig);
+ GenericRow genericRow = new GenericRow();
+ csvRecordReader.rewind();
+
+ // check if parsing succeeded.
+ Assert.assertTrue(csvRecordReader.hasNext());
+ csvRecordReader.next(genericRow);
+ Assert.assertEquals(genericRow.getValue("col1"), "value11");
+ Assert.assertEquals(genericRow.getValue("col2"), " value12");
+ }
/**
* Check if we can parse a CSV file that has escaped comma characters within
fields.
*/
@@ -135,6 +186,6 @@ public class CSVRecordExtractorTest extends
AbstractRecordExtractorTest {
Assert.assertTrue(csvRecordReader.hasNext());
csvRecordReader.next(genericRow);
Assert.assertEquals(genericRow.getValue("first"), "string1");
- Assert.assertEquals(genericRow.getValue("second"), " string2, string3");
+ Assert.assertEquals(genericRow.getValue("second"), "string2, string3");
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]