NIFI-4185: Minor tweaks to XML Record Reader, around documentation and error handling
This closes #2587. Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/0e736f59 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/0e736f59 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/0e736f59 Branch: refs/heads/master Commit: 0e736f59fdc29db471cecb4c7a885bf378e9ae71 Parents: d21bd38 Author: Mark Payne <[email protected]> Authored: Mon Apr 23 13:08:42 2018 -0400 Committer: Mark Payne <[email protected]> Committed: Mon Apr 23 14:41:17 2018 -0400 ---------------------------------------------------------------------- .../java/org/apache/nifi/xml/XMLReader.java | 37 +++++++++++------ .../java/org/apache/nifi/xml/TestXMLReader.java | 42 ++++++++------------ 2 files changed, 40 insertions(+), 39 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/0e736f59/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java index 73ee75d..d8216df 100755 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLReader.java @@ -45,24 +45,26 @@ import java.util.Map; "XML data, embedded in an enclosing root tag.") public class XMLReader extends SchemaRegistryService implements RecordReaderFactory { - public static final AllowableValue RECORD_SINGLE = new AllowableValue("false"); - public static final AllowableValue RECORD_ARRAY = new AllowableValue("true"); - public static final AllowableValue RECORD_EVALUATE = new AllowableValue("${xml.stream.is.array}","Use attribute xml.stream.is.array"); + public static final AllowableValue RECORD_SINGLE = new AllowableValue("false", "false", + "Each FlowFile will consist of a single record without any sort of \"wrapper\"."); + public static final AllowableValue RECORD_ARRAY = new AllowableValue("true", "true", + "Each FlowFile will consist of zero or more records. The outer-most XML element is expected to be a \"wrapper\" and will be ignored."); + public static final AllowableValue RECORD_EVALUATE = new AllowableValue("${xml.stream.is.array}", "Use attribute 'xml.stream.is.array'", + "Whether to treat a FlowFile as a single Record or an array of multiple Records is determined by the value of the 'xml.stream.is.array' attribute. " + + "If the value of the attribute is 'true' (case-insensitive), then the XML Reader will treat the FlowFile as a series of Records with the outer element being ignored. " + + "If the value of the attribute is 'false' (case-insensitive), then the FlowFile is treated as a single Record and no wrapper element is assumed. " + + "If the attribute is missing or its value is anything other than 'true' or 'false', then an Exception will be thrown and no records will be parsed."); public static final PropertyDescriptor RECORD_FORMAT = new PropertyDescriptor.Builder() .name("record_format") .displayName("Expect Records as Array") - .description("This property defines whether the reader expects a single record an array of records. If the property is " + - "set to \"true\", the reader expects an array of records and the outer element of the XML will be treated as a " + - "wrapper for the records. If the property is set to \"false\", the reader expects a single record for each FlowFile " + - "(without wrapper-element). If the property is set to \"Use attribute xml.stream.is.array\", the attribute " + - "\"xml.stream.is.array\" will be evaluated for each FlowFile whether to treat its content as array " + - "of records (in the case of \"true\") or as single record (in the case of \"false\".") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .description("This property defines whether the reader expects a FlowFile to consist of a single Record or a series of Records with a \"wrapper element\". Because XML does not " + + "provide for a way to read a series of XML documents from a stream directly, it is common to combine many XML documents by concatenating them and then wrapping the entire " + + "XML blob with a \"wrapper element\". This property dictates whether the reader expects a FlowFile to consist of a single Record or a series of Records with a \"wrapper element\" " + + "that will be ignored.") .allowableValues(RECORD_SINGLE, RECORD_ARRAY, RECORD_EVALUATE) .defaultValue(RECORD_SINGLE.getValue()) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .required(false) + .required(true) .build(); public static final PropertyDescriptor ATTRIBUTE_PREFIX = new PropertyDescriptor.Builder() @@ -122,7 +124,16 @@ public class XMLReader extends SchemaRegistryService implements RecordReaderFact final String contentFieldName = context.getProperty(CONTENT_FIELD_NAME).isSet() ? context.getProperty(CONTENT_FIELD_NAME).evaluateAttributeExpressions(variables).getValue().trim() : null; - final boolean isArray = Boolean.parseBoolean(context.getProperty(RECORD_FORMAT).evaluateAttributeExpressions(variables).getValue()); + final boolean isArray; + final String recordFormat = context.getProperty(RECORD_FORMAT).evaluateAttributeExpressions(variables).getValue().trim(); + if ("true".equalsIgnoreCase(recordFormat)) { + isArray = true; + } else if ("false".equalsIgnoreCase(recordFormat)) { + isArray = false; + } else { + throw new IOException("Cannot parse XML Records because the '" + RECORD_FORMAT.getDisplayName() + "' property evaluates to '" + + recordFormat + "', which is neither 'true' nor 'false'"); + } return new XMLRecordReader(in, schema, isArray, attributePrefix, contentFieldName, dateFormat, timeFormat, timestampFormat, logger); } http://git-wip-us.apache.org/repos/asf/nifi/blob/0e736f59/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java index e1c767e..e2e11e5 100755 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLReader.java @@ -17,12 +17,7 @@ package org.apache.nifi.xml; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.schema.access.SchemaAccessUtils; -import org.apache.nifi.util.MockFlowFile; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.Test; +import static junit.framework.TestCase.assertEquals; import java.io.FileInputStream; import java.io.IOException; @@ -30,10 +25,15 @@ import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Arrays; -import java.util.HashMap; +import java.util.Collections; import java.util.List; -import static junit.framework.TestCase.assertEquals; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.schema.access.SchemaAccessUtils; +import org.apache.nifi.util.MockFlowFile; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Test; public class TestXMLReader { @@ -66,9 +66,7 @@ public class TestXMLReader { runner.enableControllerService(reader); InputStream is = new FileInputStream("src/test/resources/xml/people.xml"); - runner.enqueue(is, new HashMap<String,String>() {{ - put(EVALUATE_IS_ARRAY, "true"); - }}); + runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true")); runner.run(); List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS); @@ -86,9 +84,7 @@ public class TestXMLReader { runner.enableControllerService(reader); InputStream is = new FileInputStream("src/test/resources/xml/people.xml"); - runner.enqueue(is, new HashMap<String,String>() {{ - put(EVALUATE_IS_ARRAY, "true"); - }}); + runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true")); runner.run(); List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS); @@ -106,13 +102,11 @@ public class TestXMLReader { runner.enableControllerService(reader); InputStream is = new FileInputStream("src/test/resources/xml/person.xml"); - runner.enqueue(is, new HashMap<String,String>() {{ - put(EVALUATE_IS_ARRAY, "true"); - }}); + runner.enqueue(is, Collections.singletonMap(EVALUATE_IS_ARRAY, "true")); runner.run(); List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS); - List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n")); + List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n")); assertEquals(1, records.size()); } @@ -127,13 +121,11 @@ public class TestXMLReader { runner.enableControllerService(reader); InputStream is = new FileInputStream("src/test/resources/xml/people.xml"); - runner.enqueue(is, new HashMap<String,String>() {{ - put(ATTRIBUTE_PREFIX, "ATTR_"); - }}); + runner.enqueue(is, Collections.singletonMap(ATTRIBUTE_PREFIX, "ATTR_")); runner.run(); List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS); - List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n")); + List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n")); assertEquals(4, records.size()); assertEquals("MapRecord[{COUNTRY=USA, ATTR_ID=P1, NAME=Cleve Butler, AGE=42}]", records.get(0)); @@ -152,13 +144,11 @@ public class TestXMLReader { runner.enableControllerService(reader); InputStream is = new FileInputStream("src/test/resources/xml/people_tag_in_characters.xml"); - runner.enqueue(is, new HashMap<String,String>() {{ - put(CONTENT_NAME, "CONTENT"); - }}); + runner.enqueue(is, Collections.singletonMap(CONTENT_NAME, "CONTENT")); runner.run(); List<MockFlowFile> flowFile = runner.getFlowFilesForRelationship(TestXMLReaderProcessor.SUCCESS); - List<String> records = Arrays.asList((new String(runner.getContentAsByteArray(flowFile.get(0)))).split("\n")); + List<String> records = Arrays.asList(new String(runner.getContentAsByteArray(flowFile.get(0))).split("\n")); assertEquals(5, records.size()); assertEquals("MapRecord[{ID=P1, NAME=MapRecord[{CONTENT=Cleve Butler, ATTR=attr content, INNER=inner content}], AGE=42}]", records.get(0));
