Repository: nifi Updated Branches: refs/heads/master 518670dbf -> e68fdca51
NIFI-944 Added support and unit tests for escaped characters in ConvertCSVtoAvro processor properties. This closes #87. Reviewed by Tony Kurc <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/e68fdca5 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/e68fdca5 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/e68fdca5 Branch: refs/heads/master Commit: e68fdca517eac53700c0f38bbaa7a893cfc28d9c Parents: 518670d Author: Joe <[email protected]> Authored: Tue Oct 20 23:48:54 2015 -0400 Committer: Tony Kurc <[email protected]> Committed: Wed Oct 21 00:01:38 2015 -0400 ---------------------------------------------------------------------- .../nifi/processors/kite/ConvertCSVToAvro.java | 17 ++++++++- .../processors/kite/TestCSVToAvroProcessor.java | 40 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/e68fdca5/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java index 6c20a8f..ea84daa 100644 --- a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java +++ b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java @@ -30,6 +30,7 @@ import org.apache.avro.Schema; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData.Record; +import org.apache.commons.lang3.StringEscapeUtils; import org.apache.nifi.annotation.documentation.CapabilityDescription; import org.apache.nifi.annotation.documentation.Tags; import org.apache.nifi.annotation.lifecycle.OnScheduled; @@ -53,6 +54,7 @@ import org.kitesdk.data.spi.DefaultConfiguration; import org.kitesdk.data.spi.filesystem.CSVFileReader; import org.kitesdk.data.spi.filesystem.CSVProperties; + import static org.apache.nifi.processor.util.StandardValidators.createLongValidator; @Tags({"kite", "csv", "avro"}) @@ -66,11 +68,15 @@ public class ConvertCSVToAvro extends AbstractKiteProcessor { @Override public ValidationResult validate(String subject, String input, ValidationContext context) { + // Allows special, escaped characters as input, which is then unescaped and converted to a single character. + // Examples for special characters: \t (or \u0009), \f. + input = unescapeString(input); + return new ValidationResult.Builder() .subject(subject) .input(input) - .explanation("Only single characters are supported") - .valid(input.length() == 1) + .explanation("Only non-null single characters are supported") + .valid(input.length() == 1 && input.charAt(0) != 0) .build(); } }; @@ -295,4 +301,11 @@ public class ConvertCSVToAvro extends AbstractKiteProcessor { session.transfer(incomingCSV, FAILURE); } } + + private static String unescapeString(String input) { + if (input.length() > 1) { + input = StringEscapeUtils.unescapeJava(input); + } + return input; + } } http://git-wip-us.apache.org/repos/asf/nifi/blob/e68fdca5/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java index 43dea6e..0cde23c 100644 --- a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java +++ b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java @@ -48,9 +48,49 @@ public class TestCSVToAvroProcessor { public static final String FAILURE_CONTENT = "" + ",blue,\n"; // invalid, ID is missing + public static final String TSV_CONTENT = "" + + "1\tgreen\n" + + "\tblue\t\n" + // invalid, ID is missing + "2\tgrey\t12.95"; + public static final String FAILURE_SUMMARY = "" + "Field id: cannot make \"long\" value: '': Field id type:LONG pos:0 not set and has no default value"; + /** + * Basic test for tab separated files, similar to #test + * @throws IOException + */ + @Test + public void testTabSeparatedConversion() throws IOException { + TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class); + runner.assertNotValid(); + runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString()); + runner.setProperty(ConvertCSVToAvro.DELIMITER, "\\t"); + runner.assertValid(); + + runner.enqueue(streamFor(TSV_CONTENT)); + runner.run(); + + long converted = runner.getCounterValue("Converted records"); + long errors = runner.getCounterValue("Conversion errors"); + Assert.assertEquals("Should convert 2 rows", 2, converted); + Assert.assertEquals("Should reject 1 row", 1, errors); + + runner.assertTransferCount("success", 1); + runner.assertTransferCount("failure", 0); + runner.assertTransferCount("incompatible", 1); + + MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0); + String failureContent = new String(runner.getContentAsByteArray(incompatible), + StandardCharsets.UTF_8); + + Assert.assertEquals("Should reject an invalid string and double", + TSV_CONTENT, failureContent); + Assert.assertEquals("Should accumulate error messages", + FAILURE_SUMMARY, incompatible.getAttribute("errors")); + } + + @Test public void testBasicConversion() throws IOException { TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
