This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c08342be84 Handle null string in CSV decoder (#9340)
c08342be84 is described below
commit c08342be840aee510e2dc30f39f9c3ad65e69e21
Author: Kartik Khare <[email protected]>
AuthorDate: Wed Sep 7 15:03:37 2022 +0530
Handle null string in CSV decoder (#9340)
Co-authored-by: Kartik Khare <[email protected]>
---
.../plugin/inputformat/csv/CSVMessageDecoder.java | 6 ++++++
.../inputformat/csv/CSVMessageDecoderTest.java | 22 ++++++++++++++++++++++
2 files changed, 28 insertions(+)
diff --git
a/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoder.java
b/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoder.java
index a535adc1fa..a049eefdac 100644
---
a/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoder.java
+++
b/pinot-plugins/pinot-input-format/pinot-csv/src/main/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoder.java
@@ -48,6 +48,7 @@ public class CSVMessageDecoder implements
StreamMessageDecoder<byte[]> {
private static final String CONFIG_COMMENT_MARKER = "commentMarker";
private static final String CONFIG_CSV_ESCAPE_CHARACTER = "escapeCharacter";
private static final String CONFIG_CSV_MULTI_VALUE_DELIMITER =
"multiValueDelimiter";
+ public static final String NULL_STRING_VALUE = "nullStringValue";
private CSVFormat _format;
private CSVRecordExtractor _recordExtractor;
@@ -111,6 +112,11 @@ public class CSVMessageDecoder implements
StreamMessageDecoder<byte[]> {
format =
format.withEscape(props.get(CONFIG_CSV_ESCAPE_CHARACTER).charAt(0));
}
+ String nullString = props.get(NULL_STRING_VALUE);
+ if (nullString != null) {
+ format = format.withNullString(nullString);
+ }
+
_format = format;
_recordExtractor = new CSVRecordExtractor();
diff --git
a/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoderTest.java
b/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoderTest.java
index ae407ee914..1dde1f37e7 100644
---
a/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoderTest.java
+++
b/pinot-plugins/pinot-input-format/pinot-csv/src/test/java/org/apache/pinot/plugin/inputformat/csv/CSVMessageDecoderTest.java
@@ -29,6 +29,7 @@ import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
public class CSVMessageDecoderTest {
@@ -129,6 +130,27 @@ public class CSVMessageDecoderTest {
assertEquals(destination.getValue("subjects"), "mat;hs");
}
+ @Test
+ public void testNullString()
+ throws Exception {
+ Map<String, String> decoderProps = getStandardDecoderProps();
+ decoderProps.put("header", "name;age;gender;subjects");
+ decoderProps.put("delimiter", ";");
+ decoderProps.put("nullStringValue", "null");
+ CSVMessageDecoder messageDecoder = new CSVMessageDecoder();
+ messageDecoder.init(decoderProps, ImmutableSet.of("name", "age", "gender",
"subjects"), "");
+ String incomingRecord = "Alice;null;F;null";
+ GenericRow destination = new GenericRow();
+ messageDecoder.decode(incomingRecord.getBytes(StandardCharsets.UTF_8),
destination);
+ assertNotNull(destination.getValue("name"));
+ assertNull(destination.getValue("age"));
+ assertNotNull(destination.getValue("gender"));
+ assertNull(destination.getValue("subjects"));
+
+ assertEquals(destination.getValue("name"), "Alice");
+ assertEquals(destination.getValue("gender"), "F");
+ }
+
@Test
public void testDefaultProps()
throws Exception {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]