This is an automated email from the ASF dual-hosted git repository. joewitt pushed a commit to branch support/nifi-1.16 in repository https://gitbox.apache.org/repos/asf/nifi.git
commit 51efe1f00fbfb2adf7d6721e1697a92e3bf56cb8 Author: Paul Grey <[email protected]> AuthorDate: Wed Apr 6 12:46:06 2022 -0400 NIFI-9884 - JacksonCSVRecordReader ignores specified encoding NIFI-9884 - JacksonCSVRecordReader ignores specified encoding; test case for ISO-8859-1 Signed-off-by: Matthew Burgess <[email protected]> This closes #5941 --- .../apache/nifi/csv/JacksonCSVRecordReader.java | 2 +- .../nifi/csv/TestJacksonCSVRecordReader.java | 27 +++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java index f3c3acce8e..d9c1f8d99b 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java @@ -59,7 +59,7 @@ public class JacksonCSVRecordReader extends AbstractCSVRecordReader { final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException { super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat); - final Reader reader = new InputStreamReader(new BOMInputStream(in)); + final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding); CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder() .setColumnSeparator(csvFormat.getDelimiter()) diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java index 8b7787f9f8..422b4496f3 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java @@ -34,6 +34,7 @@ import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -69,7 +70,7 @@ public class TestJacksonCSVRecordReader { fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); final RecordSchema schema = new SimpleRecordSchema(fields); - try (final InputStream bais = new ByteArrayInputStream(text.getBytes()); + try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)); final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) { @@ -80,6 +81,30 @@ public class TestJacksonCSVRecordReader { } } + @Test + public void testISO8859() throws IOException, MalformedRecordException { + final String text = "name\nÄËÖÜ"; + final byte[] bytesUTF = text.getBytes(StandardCharsets.UTF_8); + final byte[] bytes8859 = text.getBytes(StandardCharsets.ISO_8859_1); + assertEquals(13, bytesUTF.length, "expected size=13 for UTF-8 representation of test data"); + assertEquals(9, bytes8859.length, "expected size=9 for ISO-8859-1 representation of test data"); + + final List<RecordField> fields = new ArrayList<>(); + fields.add(new RecordField("name", RecordFieldType.STRING.getDataType())); + final RecordSchema schema = new SimpleRecordSchema(fields); + + try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.ISO_8859_1)); + final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false, + RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), + StandardCharsets.ISO_8859_1.name())) { + + final Record record = reader.nextRecord(); + final String name = (String)record.getValue("name"); + + assertEquals("ÄËÖÜ", name); + } + } + @Test public void testDate() throws IOException, MalformedRecordException { final String dateValue = "1983-11-30";
