http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DataTypeUtils.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DataTypeUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DataTypeUtils.java deleted file mode 100644 index de207f4..0000000 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DataTypeUtils.java +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.serialization; - -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; - -import org.apache.nifi.serialization.record.DataType; -import org.apache.nifi.serialization.record.RecordField; -import org.apache.nifi.serialization.record.RecordFieldType; -import org.apache.nifi.serialization.record.RecordSchema; - -public class DataTypeUtils { - - public static Double toDouble(final Object value, final Double defaultValue) { - if (value == null) { - return null; - } - - if (value instanceof Number) { - return ((Number) value).doubleValue(); - } - - if (value instanceof String) { - return Double.parseDouble((String) value); - } - - return defaultValue; - } - - public static Float toFloat(final Object value, final Float defaultValue) { - if (value == null) { - return null; - } - - if (value instanceof Number) { - return ((Number) value).floatValue(); - } - - if (value instanceof String) { - return Float.parseFloat((String) value); - } - - return defaultValue; - } - - public static Long toLong(final Object value, final Long defaultValue) { - if (value == null) { - return null; - } - - if (value instanceof Number) { - return ((Number) value).longValue(); - } - - if (value instanceof String) { - return Long.parseLong((String) value); - } - - return defaultValue; - } - - - - public static Integer toInteger(final Object value, final Integer defaultValue) { - if (value == null) { - return null; - } - - if (value instanceof Number) { - return ((Number) value).intValue(); - } - - if (value instanceof String) { - return Integer.parseInt((String) value); - } - - return defaultValue; - } - - - /** - * Deduces the type of RecordFieldType that should be used for a value of the given type, - * or returns <code>null</code> if the value is null - * - * @param value the value whose type should be deduced - * @return the type of RecordFieldType that should be used for a value of the given type, - * or <code>null</code> if the value is null - */ - public static DataType inferDataType(final Object value) { - if (value == null) { - return null; - } - - if (value instanceof String) { - return RecordFieldType.STRING.getDataType(); - } - if (value instanceof Long) { - return RecordFieldType.LONG.getDataType(); - } - if (value instanceof Integer) { - return RecordFieldType.INT.getDataType(); - } - if (value instanceof Double) { - return RecordFieldType.DOUBLE.getDataType(); - } - if (value instanceof Float) { - return RecordFieldType.FLOAT.getDataType(); - } - if (value instanceof Boolean) { - return RecordFieldType.BOOLEAN.getDataType(); - } - if (value instanceof Byte) { - return RecordFieldType.BYTE.getDataType(); - } - if (value instanceof Character) { - return RecordFieldType.CHAR.getDataType(); - } - if (value instanceof Short) { - return RecordFieldType.SHORT.getDataType(); - } - if (value instanceof Date) { - return RecordFieldType.DATE.getDataType(); - } - if (value instanceof Object[] || value instanceof List) { - return RecordFieldType.ARRAY.getDataType(); - } - if (value instanceof Map) { - @SuppressWarnings("unchecked") - final Map<String, Object> map = (Map<String, Object>) value; - final RecordSchema childSchema = determineSchema(map); - return RecordFieldType.RECORD.getDataType(childSchema); - } - - return RecordFieldType.RECORD.getDataType(); - } - - public static RecordSchema determineSchema(final Map<String, Object> valueMap) { - final List<RecordField> fields = new ArrayList<>(valueMap.size()); - for (final Map.Entry<String, Object> entry : valueMap.entrySet()) { - final DataType valueType = inferDataType(entry.getValue()); - final String fieldName = entry.getKey(); - final RecordField field = new RecordField(fieldName, valueType); - fields.add(field); - } - return new SimpleRecordSchema(fields); - } -}
http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeTextRecordSetWriter.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeTextRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeTextRecordSetWriter.java new file mode 100644 index 0000000..5545090 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeTextRecordSetWriter.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.serialization; + +import java.util.Arrays; +import java.util.List; + +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; + +public abstract class DateTimeTextRecordSetWriter extends AbstractControllerService { + + private volatile String dateFormat; + private volatile String timeFormat; + private volatile String timestampFormat; + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return Arrays.asList(DateTimeUtils.DATE_FORMAT, DateTimeUtils.TIME_FORMAT, DateTimeUtils.TIMESTAMP_FORMAT); + } + + @OnEnabled + public void captureValues(final ConfigurationContext context) { + this.dateFormat = context.getProperty(DateTimeUtils.DATE_FORMAT).getValue(); + this.timeFormat = context.getProperty(DateTimeUtils.TIME_FORMAT).getValue(); + this.timestampFormat = context.getProperty(DateTimeUtils.TIMESTAMP_FORMAT).getValue(); + } + + protected String getDateFormat() { + return dateFormat; + } + + protected String getTimeFormat() { + return timeFormat; + } + + protected String getTimestampFormat() { + return timestampFormat; + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeUtils.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeUtils.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeUtils.java new file mode 100644 index 0000000..d5ab8c5 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/DateTimeUtils.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.serialization; + +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.serialization.record.RecordFieldType; + +public class DateTimeUtils { + public static final PropertyDescriptor DATE_FORMAT = new PropertyDescriptor.Builder() + .name("Date Format") + .description("Specifies the format to use when reading/writing Date fields") + .expressionLanguageSupported(false) + .defaultValue(RecordFieldType.DATE.getDefaultFormat()) + .addValidator(new SimpleDateFormatValidator()) + .required(true) + .build(); + + public static final PropertyDescriptor TIME_FORMAT = new PropertyDescriptor.Builder() + .name("Time Format") + .description("Specifies the format to use when reading/writing Time fields") + .expressionLanguageSupported(false) + .defaultValue(RecordFieldType.TIME.getDefaultFormat()) + .addValidator(new SimpleDateFormatValidator()) + .required(true) + .build(); + + public static final PropertyDescriptor TIMESTAMP_FORMAT = new PropertyDescriptor.Builder() + .name("Timestamp Format") + .description("Specifies the format to use when reading/writing Timestamp fields") + .expressionLanguageSupported(false) + .defaultValue(RecordFieldType.TIMESTAMP.getDefaultFormat()) + .addValidator(new SimpleDateFormatValidator()) + .required(true) + .build(); +} http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/SchemaRegistryRecordReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/SchemaRegistryRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/SchemaRegistryRecordReader.java new file mode 100644 index 0000000..ee25e64 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/SchemaRegistryRecordReader.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.serialization; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.annotation.lifecycle.OnEnabled; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.PropertyValue; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.controller.AbstractControllerService; +import org.apache.nifi.controller.ConfigurationContext; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.schemaregistry.services.SchemaRegistry; +import org.apache.nifi.serialization.record.RecordSchema; + +public abstract class SchemaRegistryRecordReader extends AbstractControllerService { + + protected static final PropertyDescriptor REQUIRED_SCHEMA_REGISTRY = new PropertyDescriptor.Builder() + .name("Schema Registry") + .description("Specifies the Controller Service to use for the Schema Registry") + .identifiesControllerService(SchemaRegistry.class) + .required(true) + .build(); + + protected static final PropertyDescriptor OPTIONAL_SCHEMA_REGISTRY = new PropertyDescriptor.Builder() + .fromPropertyDescriptor(REQUIRED_SCHEMA_REGISTRY) + .required(false) + .build(); + + protected static final PropertyDescriptor REQUIRED_SCHEMA_NAME = new PropertyDescriptor.Builder() + .name("Schema Name") + .description("Name of the Schema that is stored in the Schema Registry") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .required(true) + .build(); + + protected static final PropertyDescriptor OPTIONAL_SCHEMA_NAME = new PropertyDescriptor.Builder() + .fromPropertyDescriptor(REQUIRED_SCHEMA_NAME) + .required(false) + .build(); + + + private volatile SchemaRegistry schemaRegistry; + private volatile PropertyValue schemaName; + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + final List<PropertyDescriptor> properties = new ArrayList<>(2); + if (isSchemaRequired()) { + properties.add(REQUIRED_SCHEMA_REGISTRY); + properties.add(REQUIRED_SCHEMA_NAME); + } else { + properties.add(OPTIONAL_SCHEMA_REGISTRY); + properties.add(OPTIONAL_SCHEMA_NAME); + } + + return properties; + } + + @OnEnabled + public void storeRegistryValues(final ConfigurationContext context) { + schemaRegistry = context.getProperty(REQUIRED_SCHEMA_REGISTRY).asControllerService(SchemaRegistry.class); + schemaName = context.getProperty(REQUIRED_SCHEMA_NAME); + } + + public RecordSchema getSchema(final FlowFile flowFile) { + final String evaluatedSchemaName = schemaName.evaluateAttributeExpressions(flowFile).getValue(); + final RecordSchema schema = schemaRegistry.retrieveSchema(evaluatedSchemaName); + return schema; + } + + @Override + protected Collection<ValidationResult> customValidate(final ValidationContext validationContext) { + if (validationContext.getProperty(OPTIONAL_SCHEMA_REGISTRY).isSet() && !validationContext.getProperty(OPTIONAL_SCHEMA_NAME).isSet()) { + return Collections.singleton(new ValidationResult.Builder() + .subject("Schema Registry") + .explanation("If the Schema Registry is configured, the Schema name must also be configured") + .valid(false) + .build()); + } + + return Collections.emptyList(); + } + + protected boolean isSchemaRequired() { + return true; + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/UserTypeOverrideRowReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/UserTypeOverrideRowReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/UserTypeOverrideRowReader.java deleted file mode 100644 index be0b8ad..0000000 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/serialization/UserTypeOverrideRowReader.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.serialization; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import org.apache.nifi.annotation.lifecycle.OnEnabled; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.controller.AbstractControllerService; -import org.apache.nifi.controller.ConfigurationContext; -import org.apache.nifi.serialization.DataTypeValidator; -import org.apache.nifi.serialization.record.DataType; -import org.apache.nifi.serialization.record.RecordFieldType; - -public abstract class UserTypeOverrideRowReader extends AbstractControllerService { - private volatile Map<String, DataType> fieldTypeOverrides; - - @Override - protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { - return new PropertyDescriptor.Builder() - .name(propertyDescriptorName) - .dynamic(true) - .addValidator(new DataTypeValidator()) - .build(); - } - - @OnEnabled - public void createFieldTypeOverrides(final ConfigurationContext context) { - final Map<String, DataType> overrides = new HashMap<>(context.getProperties().size()); - for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) { - if (!entry.getKey().isDynamic()) { - continue; - } - - final String fieldName = entry.getKey().getName(); - final String dataTypeName = entry.getValue(); - if (dataTypeName == null) { - continue; - } - - final DataType dataType; - final String[] splits = dataTypeName.split("\\:"); - if (splits.length == 2) { - final RecordFieldType fieldType = RecordFieldType.of(splits[0]); - final String format = splits[1]; - dataType = fieldType.getDataType(format); - } else { - final RecordFieldType fieldType = RecordFieldType.of(dataTypeName); - dataType = fieldType.getDataType(); - } - - overrides.put(fieldName, dataType); - } - - this.fieldTypeOverrides = Collections.unmodifiableMap(overrides); - } - - protected Map<String, DataType> getFieldTypeOverrides() { - return fieldTypeOverrides; - } -} http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java index 07da00e..598a8c4 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java @@ -33,10 +33,10 @@ import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.serialization.RecordSetWriter; import org.apache.nifi.serialization.RecordSetWriterFactory; -@Tags({"text", "freeform", "expression", "language", "el", "resultset", "writer", "serialize"}) -@CapabilityDescription("Writes the contents of a Database ResultSet as free-form text. The configured " +@Tags({"text", "freeform", "expression", "language", "el", "record", "recordset", "resultset", "writer", "serialize"}) +@CapabilityDescription("Writes the contents of a RecordSet as free-form text. The configured " + "text is able to make use of the Expression Language to reference each of the columns that are available " - + "in the ResultSet. Each record in the ResultSet will be separated by a single newline character.") + + "in a Record. Each record in the RecordSet will be separated by a single newline character.") public class FreeFormTextRecordSetWriter extends AbstractControllerService implements RecordSetWriterFactory { static final PropertyDescriptor TEXT = new PropertyDescriptor.Builder() .name("Text") http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html index e6dfd0c..fe748a7 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html @@ -17,83 +17,103 @@ <head> <meta charset="utf-8"/> <title>CSVReader</title> - <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + <link rel="stylesheet" href="/nifi-docs/css/component-usage.css" type="text/css"/> </head> <body> <p> The CSVReader Controller Service, expects input in such a way that the first line of a FlowFile specifies the name of each column in the data. Following the first line, the rest of the FlowFile is expected to be valid CSV data from which - to form appropriate Records. By default, the schema for a FlowFile is inferred by extracting the name of each column from - the first line of the CSV and assumes that all columns are of type <code>string</code>. Of course, we may want to treat some - columns as a data type other than <code>string</code>. This can be accomplished by adding a user-defined property where the - name of the property is the same as the name of a CSV column and the value of the property is the data type to use. + to form appropriate Records. The reader allows for customization of the CSV Format, such as which character should be used + to separate CSV fields, which character should be used for quoting and when to quote fields, which character should denote + a comment, etc. </p> - - <p> - When specifying a data type for a field, the following values are valid: - </p> - - <ul> - <li><b>string</b></li> - <li><b>boolean</b></li> - <li><b>byte</b></li> - <li><b>char</b></li> - <li><b>short</b></li> - <li><b>int</b></li> - <li><b>bigint</b></li> - <li><b>long</b></li> - <li><b>float</b></li> - <li><b>double</b></li> - <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is - <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: - <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information).</li> - <li><b>object</b> - <i>This data type does not apply to CSV data.</i></li> - <li><b>array</b> - <i>This data type does not apply to CSV data.</i></li> - </ul> - + + + <h2>Schemas and Type Coercion</h2> + + <p> + When a record is parsed from incoming data, it is separated into fields. Each of these fields is then looked up against the + configured schema (by field name) in order to determine what the type of the data should be. If the field is not present in + the schema, that field is omitted from the Record. If the field is found in the schema, the data type of the received data + is compared against the data type specified in the schema. If the types match, the value of that field is used as-is. If the + schema indicates that the field should be of a different type, then the Controller Service will attempt to coerce the data + into the type specified by the schema. If the field cannot be coerced into the specified type, an Exception will be thrown. + </p> + + <p> + The following rules apply when attempting to coerce a field value from one data type to another: + </p> + + <ul> + <li>Any data type can be coerced into a String type.</li> + <li>Any numeric data type (Byte, Short, Int, Long, Float, Double) can be coerced into any other numeric data type.</li> + <li>Any numeric value can be coerced into a Date, Time, or Timestamp type, by assuming that the Long value is the number of + milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>A String value can be coerced into a Date, Time, or Timestamp type, if its format matches the configured "Date Format," "Time Format," + or "Timestamp Format."</li> + <li>A String value can be coerced into a numeric value if the value is of the appropriate type. For example, the String value + <code>8</code> can be coerced into any numeric type. However, the String value <code>8.2</code> can be coerced into a Double or Float + type but not an Integer.</li> + <li>A String value of "true" or "false" (regardless of case) can be coerced into a Boolean value.</li> + <li>A String value that is not empty can be coerced into a Char type. If the String contains more than 1 character, the first character is used + and the rest of the characters are ignored.</li> + <li>Any "date/time" type (Date, Time, Timestamp) can be coerced into any other "date/time" type.</li> + <li>Any "date/time" type can be coerced into a Long type, representing the number of milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>Any "date/time" type can be coerced into a String. The format of the String is whatever DateFormat is configured for the corresponding + property (Date Format, Time Format, Timestamp Format property).</li> + </ul> + + <p> + If none of the above rules apply when attempting to coerce a value from one data type to another, the coercion will fail and an Exception + will be thrown. + </p> + + + + <h2>Examples</h2> + <p> As an example, consider a FlowFile whose contents consists of the following: </p> - + <code> - id, name, balance, notes<br /> - 1, John, 48.23, "Our very<br /> + id, name, balance, join_date, notes<br /> + 1, John, 48.23, 04/03/2007 "Our very<br /> first customer!"<br /> - 2, Jane, 1245.89,<br /> - 3, Frank Franklin, "48481.29",<br /> + 2, Jane, 1245.89, 08/22/2009,<br /> + 3, Frank Franklin, "48481.29", 04/04/2016,<br /> </code> <p> - Additionally, let's consider that this Controller Service is configured with the following user-defined properties: + Additionally, let's consider that this Controller Service is configured with the Schema Registry pointing to an AvroSchemaRegistry and the schema is + configured as the following: </p> - <table> - <head> - <th>Property Name</th> - <th>Property Value</th> - </head> - <body> - <tr> - <td>balance</td> - <td><code>float</code></td> - </tr> - </body> - </table> - + <code> + <pre> + { + "namespace": "nifi", + "name": "balances", + "type": "record", + "fields": [ + { "name": "id", "type": "int" }, + { "name": "name": "type": "string" }, + { "name": "balance": "type": "double" }, + { "name": "join_date", "type": { + "type": "int", + "logicalType": "date" + }, + { "name": "notes": "type": "string" } + ] + } + </pre> + </code> + <p> + In the example above, we see that the 'join_date' column is a Date type. In order for the CSV Reader to be able to properly parse a value as a date, + we need to provide the reader with the date format to use. In this example, we would configure the Date Format property to be <code>MM/dd/yyyy</code> + to indicate that it is a two-digit month, followed by a two-digit day, followed by a four-digit year - each separated by a slash. In this case, the result will be that this FlowFile consists of 3 different records. The first record will contain the following values: </p> @@ -116,6 +136,10 @@ first customer!"<br /> <td>48.23</td> </tr> <tr> + <td>join_date</td> + <td>04/03/2007</td> + </tr> + <tr> <td>notes</td> <td>Our very<br />first customer!</td> </tr> @@ -145,6 +169,10 @@ first customer!"<br /> <td>1245.89</td> </tr> <tr> + <td>join_date</td> + <td>08/22/2009</td> + </tr> + <tr> <td>notes</td> <td></td> </tr> @@ -174,6 +202,10 @@ first customer!"<br /> <td>48481.29</td> </tr> <tr> + <td>join_date</td> + <td>04/04/2016</td> + </tr> + <tr> <td>notes</td> <td></td> </tr> http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html index 3a41f47..97423ec 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html @@ -17,18 +17,23 @@ <head> <meta charset="utf-8"/> <title>GrokReader</title> - <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + <link rel="stylesheet" href="/nifi-docs/css/component-usage.css" type="text/css"/> </head> <body> <p> - The GrokReader Controller Service, provides a means for parsing and structuring input that is + The GrokReader Controller Service provides a means for parsing and structuring input that is made up of unstructured text, such as log files. Grok allows users to add a naming construct to Regular Expressions such that they can be composed in order to create expressions that are easier - to manage and work with. This Controller Service consists of one Required Property and one Optional - Property. The Optional Property is named <code>Grok Pattern File</code> and specifies the filename of + to manage and work with. This Controller Service consists of one Required Property and a few Optional + Properties. The is named <code>Grok Pattern File</code> property specifies the filename of a file that contains Grok Patterns that can be used for parsing log data. If not specified, a default - patterns file will be used. Its contains are provided below. + patterns file will be used. Its contents are provided below. There are also properties for specifying + the schema to use when parsing data. The schema is not required. However, when data is parsed + a Record is created that contains all of the fields present in the Grok Expression (explained below), + and all fields are of type String. If a schema is chosen, the field can be declared to be a different, + compatible type, such as number. Additionally, if the schema does not contain one of the fields in the + parsed data, that field will be ignored. This can be used to filter out fields that are not of interest. </p> <p> @@ -48,45 +53,49 @@ to a field named <code>STACK_TRACE</code>. Otherwise, the line is appended to the last field defined in the Grok Expression. This is done because typically the last field is a 'message' type of field, which can consist of new-lines. </p> - - <p> - By default, all fields that are extracted are considered to be of type <code>string</code>. This can be overridden - by adding a user-defined property where the name of the property matches the name of the field that is present in the - configured Grok Expression. The value of the user-defined property is the data type to use. - When specifying a data type for a field, the following values are valid: - </p> - - <ul> - <li><b>string</b></li> - <li><b>boolean</b></li> - <li><b>byte</b></li> - <li><b>char</b></li> - <li><b>short</b></li> - <li><b>int</b></li> - <li><b>bigint</b></li> - <li><b>long</b></li> - <li><b>float</b></li> - <li><b>double</b></li> - <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is - <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: - <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information).</li> - <li><b>object</b> - <i>This data type does not apply to CSV data.</i></li> - <li><b>array</b> - <i>This data type does not apply to CSV data.</i></li> - </ul> - - + + + <h2>Schemas and Type Coercion</h2> + + <p> + When a record is parsed from incoming data, it is separated into fields. Each of these fields is then looked up against the + configured schema (by field name) in order to determine what the type of the data should be. If the field is not present in + the schema, that field is omitted from the Record. If the field is found in the schema, the data type of the received data + is compared against the data type specified in the schema. If the types match, the value of that field is used as-is. If the + schema indicates that the field should be of a different type, then the Controller Service will attempt to coerce the data + into the type specified by the schema. If the field cannot be coerced into the specified type, an Exception will be thrown. + </p> + + <p> + The following rules apply when attempting to coerce a field value from one data type to another: + </p> + + <ul> + <li>Any data type can be coerced into a String type.</li> + <li>Any numeric data type (Byte, Short, Int, Long, Float, Double) can be coerced into any other numeric data type.</li> + <li>Any numeric value can be coerced into a Date, Time, or Timestamp type, by assuming that the Long value is the number of + milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>A String value can be coerced into a Date, Time, or Timestamp type, if its format matches the configured "Date Format," "Time Format," + or "Timestamp Format."</li> + <li>A String value can be coerced into a numeric value if the value is of the appropriate type. For example, the String value + <code>8</code> can be coerced into any numeric type. However, the String value <code>8.2</code> can be coerced into a Double or Float + type but not an Integer.</li> + <li>A String value of "true" or "false" (regardless of case) can be coerced into a Boolean value.</li> + <li>A String value that is not empty can be coerced into a Char type. If the String contains more than 1 character, the first character is used + and the rest of the characters are ignored.</li> + <li>Any "date/time" type (Date, Time, Timestamp) can be coerced into any other "date/time" type.</li> + <li>Any "date/time" type can be coerced into a Long type, representing the number of milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>Any "date/time" type can be coerced into a String. The format of the String is whatever DateFormat is configured for the corresponding + property (Date Format, Time Format, Timestamp Format property).</li> + </ul> + + <p> + If none of the above rules apply when attempting to coerce a value from one data type to another, the coercion will fail and an Exception + will be thrown. + </p> + + + <h2> Examples </h2> http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html index 2b69f7e..aceb54d 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html @@ -17,15 +17,15 @@ <head> <meta charset="utf-8"/> <title>JsonPathReader</title> - <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + <link rel="stylesheet" href="/nifi-docs/css/component-usage.css" type="text/css"/> </head> <body> <p> The JsonPathReader Controller Service, parses FlowFiles that are in the JSON format. User-defined properties - specify how to extract all relevant fields from the JSON in order to create a row-oriented record. The Controller + specify how to extract all relevant fields from the JSON in order to create a Record. The Controller Service will not be valid unless at least one JSON Path is provided. Unlike the - <a href="../org.apache.nifi.json.FlatJsonReader/additionalDetails.html">FlatJsonReader</a> Controller Service, this + <a href="../org.apache.nifi.json.JsonTreeReader/additionalDetails.html">JsonTreeReader</a> Controller Service, this service will return a record that contains only those fields that have been configured via JSON Path. </p> @@ -39,129 +39,143 @@ Supplying a JSON Path is accomplished by adding a user-defined property where the name of the property becomes the name of the field in the Record that is returned. The value of the property must be a valid JSON Path expression. This JSON Path will be evaluated against each top-level JSON Object in the FlowFile, and the result will be the value of the field whose - name is specified by the property name. By default, the type of each field is inferred automatically based on the values of - the first JSON Object encountered for the FlowFile. This can be overridden by changing the name of the user-defined property - by adding a colon (:) and specifying the data type. For example: <code>balance:double</code> or <code>dob:date:MM/dd/yyyy</code>. - In this case, the data type and option format are not included in the field name. So for the aforementioned examples, we would - end up with field names <code>balance</code> and <code>dob</code>. + name is specified by the property name. If any JSON Path is given but no field is present in the Schema with the proper name, + then the field will be skipped. </p> <p> - When specifying a data type for a field, the following values are valid: - </p> - - <ul> - <li><b>string</b></li> - <li><b>boolean</b></li> - <li><b>byte</b></li> - <li><b>char</b></li> - <li><b>short</b></li> - <li><b>int</b></li> - <li><b>bigint</b></li> - <li><b>long</b></li> - <li><b>float</b></li> - <li><b>double</b></li> - <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is - <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: - <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information).</li> - <li><b>object</b> - The value will be returned as a <code>Map<String, Object></code>. The types of the values in the Map - are always inferred. The type used for the values may not be the same for each record. For example, consider the following - JSON array: - <br /><br /> - <code> - [{ - id: 17, - name: "John", - child: { - id: "1" - }, - siblingIds: [4, "8"] - }, - <br />{ - id: 98, - name: "Jane", - child: { - id: 2 - }, - siblingIds: [] - }] - </code> - <br /><br /> - In this case, the <code>child</code> element would be inferred to be of type <code>object</code>. Since nested types - are inferred on a per-record basis, for the first record, the <code>child</code> field would return a <code>Map</code> - where the value of the <code>id</code> entry is a <code>string</code>. However, for the second record, the <code>child</code> - field would return a <code>Map</code> where the value of the <code>id</code> entry is an <code>int</code>. - <br /> - Moreover, the <code>siblingIds</code> of the John will be an <code>array</code> where the first element is an <code>int</code> - and the second element is a <code>string</code>. The <code>siblingIds</code> of Jane will be an empty array. - </li> - <li><b>array</b> - An array of values. The types of the values are always inferred and may not be the same for each element - in the array, or for two arrays from different JSON objects.</li> - </ul> + This Controller Service must be configured with a schema. Each JSON Path that is evaluated and is found in the "root level" + of the schema will produce a Field in the Record. I.e., the schema should match the Record that is created by evaluating all + of the JSON Paths. It should not match the "incoming JSON" that is read from the FlowFile. + </p> + + + <h2>Schemas and Type Coercion</h2> + + <p> + When a record is parsed from incoming data, it is separated into fields. Each of these fields is then looked up against the + configured schema (by field name) in order to determine what the type of the data should be. If the field is not present in + the schema, that field is omitted from the Record. If the field is found in the schema, the data type of the received data + is compared against the data type specified in the schema. If the types match, the value of that field is used as-is. If the + schema indicates that the field should be of a different type, then the Controller Service will attempt to coerce the data + into the type specified by the schema. If the field cannot be coerced into the specified type, an Exception will be thrown. + </p> + + <p> + The following rules apply when attempting to coerce a field value from one data type to another: + </p> + + <ul> + <li>Any data type can be coerced into a String type.</li> + <li>Any numeric data type (Byte, Short, Int, Long, Float, Double) can be coerced into any other numeric data type.</li> + <li>Any numeric value can be coerced into a Date, Time, or Timestamp type, by assuming that the Long value is the number of + milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>A String value can be coerced into a Date, Time, or Timestamp type, if its format matches the configured "Date Format," "Time Format," + or "Timestamp Format."</li> + <li>A String value can be coerced into a numeric value if the value is of the appropriate type. For example, the String value + <code>8</code> can be coerced into any numeric type. However, the String value <code>8.2</code> can be coerced into a Double or Float + type but not an Integer.</li> + <li>A String value of "true" or "false" (regardless of case) can be coerced into a Boolean value.</li> + <li>A String value that is not empty can be coerced into a Char type. If the String contains more than 1 character, the first character is used + and the rest of the characters are ignored.</li> + <li>Any "date/time" type (Date, Time, Timestamp) can be coerced into any other "date/time" type.</li> + <li>Any "date/time" type can be coerced into a Long type, representing the number of milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>Any "date/time" type can be coerced into a String. The format of the String is whatever DateFormat is configured for the corresponding + property (Date Format, Time Format, Timestamp Format property).</li> + </ul> + + <p> + If none of the above rules apply when attempting to coerce a value from one data type to another, the coercion will fail and an Exception + will be thrown. + </p> + + + <h2>Examples</h2> <p> As an example, consider a FlowFile whose content contains the following JSON: </p> <code> - [{ - id: 17, - name: "John", - child: { - id: "1" - }, - siblingIds: [4, "8"] - }, - <br />{ - id: 98, - name: "Jane", - child: { - id: 2 - }, - gender: "F", - siblingIds: [] - }] + <pre> +[{ + "id": 17, + "name": "John", + "child": { + "id": "1" + }, + "siblingIds": [4, 8], + "siblings": [ + { "name": "Jeremy", "id": 4 }, + { "name": "Julia", "id": 8} + ] + }, + { + "id": 98, + "name": "Jane", + "child": { + "id": 2 + }, + "gender": "F", + "siblingIds": [], + "siblings": [] + }] + </pre> + </code> + + <p> + And the following schema has been configured: + </p> + + <code> + <pre> +{ + "namespace": "nifi", + "name": "person", + "type": "record", + "fields": [ + { "name": "id", "type": "int" }, + { "name": "name", "type": "string" }, + { "name": "childId", "type": "long" }, + { "name": "gender", "type": "string" }, + { "name": "siblingNames", "type": { + "type": "array", + "items": "string" + }} + ] +} + </pre> </code> <p> If we configure this Controller Service with the following user-defined properties: <table> - <head> + <tr> <th>Property Name</th> <th>Property Value</th> - </head> - <body> - <tr> - <td>id</td> - <td><code>$.id</code></td> - </tr> - <tr> - <td>name</td> - <td><code>$.name</code></td> - </tr> - <tr> - <td>childId:long</td> - <td><code>$.child.id</code></td> - </tr> - <tr> - <td>gender:string</td> - <td><code>$.gender</code></td> - </tr> - </body> + </tr> + <tr> + <td>id</td> + <td><code>$.id</code></td> + </tr> + <tr> + <td>name</td> + <td><code>$.name</code></td> + </tr> + <tr> + <td>childId</td> + <td><code>$.child.id</code></td> + </tr> + <tr> + <td>gender</td> + <td><code>$.gender</code></td> + </tr> + <tr> + <td>siblingNames</td> + <td><code>$.siblings[*].name</code></td> + </tr> </table> </p> @@ -169,28 +183,30 @@ In this case, the FlowFile will generate two Records. The first record will consist of the following key/value pairs: <table> - <head> - <th>Field Name</th> - <th>Field Value</th> - </head> - <body> - <tr> - <td>id</td> - <td>17</td> - </tr> - <tr> - <td>name</td> - <td>John</td> - </tr> - <tr> - <td>childId</td> - <td>1</td> - </tr> - <tr> - <td>gender</td> - <td><i>null</i></td> - </tr> - </body> + <tr> + <th>Field Name</th> + <th>Field Value</th> + </tr> + <tr> + <td>id</td> + <td>17</td> + </tr> + <tr> + <td>name</td> + <td>John</td> + </tr> + <tr> + <td>childId</td> + <td>1</td> + </tr> + <tr> + <td>gender</td> + <td><i>null</i></td> + </tr> + <tr> + <td>siblingNames</td> + <td><i>array of two elements: </i><code>Jeremy</code><i> and </i><code>Julia</code></td> + </tr> </table> </p> @@ -198,28 +214,30 @@ The second record will consist of the following key/value pairs: <table> - <head> + <tr> <th>Field Name</th> <th>Field Value</th> - </head> - <body> - <tr> - <td>id</td> - <td>98</td> - </tr> - <tr> - <td>name</td> - <td>Jane</td> - </tr> - <tr> - <td>childId</td> - <td>2</td> - </tr> - <tr> - <td>gender</td> - <td>F</td> - </tr> - </body> + </tr> + <tr> + <td>id</td> + <td>98</td> + </tr> + <tr> + <td>name</td> + <td>Jane</td> + </tr> + <tr> + <td>childId</td> + <td>2</td> + </tr> + <tr> + <td>gender</td> + <td>F</td> + </tr> + <tr> + <td>siblingNames</td> + <td><i>empty array</i></td> + </tr> </table> </p> http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html index 7d6be7a..90980d1 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html @@ -17,86 +17,223 @@ <head> <meta charset="utf-8"/> <title>JsonTreeReader</title> - <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + <link rel="stylesheet" href="/nifi-docs/css/component-usage.css" type="text/css"/> </head> <body> <p> - The JsonTreeReader Controller Service, by default, derives the schema for a FlowFile - based on the first JSON Object in the FlowFile. For each field found, the data type - is inferred. However, the type of a field can be overridden by adding a user-defined property to - the Controller Service. The name of the property should be the same as the name of the - JSON field. The value of the property denotes the data type of the corresponding field. - If no JSON field is found with a matching name, then a field will be added to the schema, - and a <code>null</code> value will be used for any record for which the JSON field - is not present. If a field is found with a matching name, but the type is different, - the Controller Service will attempt to coerce the value into the user-defined type. If unable - to do so, an Exception will be thrown. + The JsonTreeReader Controller Service reads a JSON Object and creates a Record object for the entire + JSON Object tree. The Controller Service must be configured with a Schema that describes the structure + of the JSON data. If any field exists in the JSON that is not in the schema, that field will be skipped. + If the schema contains a field for which no JSON field exists, a null value will be used in the Record. </p> <p> - When specifying a data type for a field, the following values are valid: + If the root element of the JSON is a JSON Array, each JSON Object within that array will be treated as + its own separate Record. If the root element is a JSON Object, the JSON will all be treated as a single + Record. </p> + + + <h2>Schemas and Type Coercion</h2> + + <p> + When a record is parsed from incoming data, it is separated into fields. Each of these fields is then looked up against the + configured schema (by field name) in order to determine what the type of the data should be. If the field is not present in + the schema, that field is omitted from the Record. If the field is found in the schema, the data type of the received data + is compared against the data type specified in the schema. If the types match, the value of that field is used as-is. If the + schema indicates that the field should be of a different type, then the Controller Service will attempt to coerce the data + into the type specified by the schema. If the field cannot be coerced into the specified type, an Exception will be thrown. + </p> + + <p> + The following rules apply when attempting to coerce a field value from one data type to another: + </p> + + <ul> + <li>Any data type can be coerced into a String type.</li> + <li>Any numeric data type (Byte, Short, Int, Long, Float, Double) can be coerced into any other numeric data type.</li> + <li>Any numeric value can be coerced into a Date, Time, or Timestamp type, by assuming that the Long value is the number of + milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>A String value can be coerced into a Date, Time, or Timestamp type, if its format matches the configured "Date Format," "Time Format," + or "Timestamp Format."</li> + <li>A String value can be coerced into a numeric value if the value is of the appropriate type. For example, the String value + <code>8</code> can be coerced into any numeric type. However, the String value <code>8.2</code> can be coerced into a Double or Float + type but not an Integer.</li> + <li>A String value of "true" or "false" (regardless of case) can be coerced into a Boolean value.</li> + <li>A String value that is not empty can be coerced into a Char type. If the String contains more than 1 character, the first character is used + and the rest of the characters are ignored.</li> + <li>Any "date/time" type (Date, Time, Timestamp) can be coerced into any other "date/time" type.</li> + <li>Any "date/time" type can be coerced into a Long type, representing the number of milliseconds since epoch (Midnight GMT, January 1, 1970).</li> + <li>Any "date/time" type can be coerced into a String. The format of the String is whatever DateFormat is configured for the corresponding + property (Date Format, Time Format, Timestamp Format property).</li> + </ul> + + <p> + If none of the above rules apply when attempting to coerce a value from one data type to another, the coercion will fail and an Exception + will be thrown. + </p> + + + <h2>Examples</h2> + + <p> + As an example, consider the following JSON is read: + </p> +<code> +<pre> +[{ + "id": 17, + "name": "John", + "child": { + "id": "1" + }, + "dob": "10-29-1982" + "siblings": [ + { "name": "Jeremy", "id": 4 }, + { "name": "Julia", "id": 8} + ] + }, + { + "id": 98, + "name": "Jane", + "child": { + "id": 2 + }, + "dob": "08-30-1984" + "gender": "F", + "siblingIds": [], + "siblings": [] + }] +</pre> +</code> + + <p> + Also, consider that the schema that is configured for this JSON is as follows (assuming that the AvroSchemaRegistry + Controller Service is chosen to denote the Schema: + </p> + +<code> +<pre> +{ + "namespace": "nifi", + "name": "person", + "type": "record", + "fields": [ + { "name": "id", "type": "int" }, + { "name": "name", "type": "string" }, + { "name": "gender", "type": "string" }, + { "name": "dob", "type": { + "type": "int", + "logicalType": "date" + }}, + { "name": "siblings", "type": { + "type": "array", + "items": { + "type": "record", + "fields": [ + { "name": "name", "type": "string" } + ] + } + }} + ] +} +</pre> +</code> + + <p> + Let us also assume that this Controller Service is configured with the "Date Format" property set to "MM-dd-yyyy", as this + matches the date format used for our JSON data. This will result in the JSON creating two separate records, because the root + element is a JSON array with two elements. + </p> + + <p> + The first Record will consist of the following values: + </p> + + <table> + <tr> + <th>Field Name</th> + <th>Field Value</th> + </tr> + <tr> + <td>id</td> + <td>17</td> + </tr> + <tr> + <td>name</td> + <td>John</td> + </tr> + <tr> + <td>gender</td> + <td><i>null</i></td> + </tr> + <tr> + <td>dob</td> + <td>11-30-1983</td> + </tr> + <tr> + <td>siblings</td> + <td> + <i>array with two elements, each of which is itself a Record:</i> + <br /> + <table> + <tr> + <th>Field Name</th> + <th>Field Value</th> + </tr> + <tr> + <td>name</td> + <td>Jeremy</td> + </tr> + </table> + <br /> + <i>and:</i> + <br /> + <table> + <tr> + <th>Field Name</th> + <th>Field Value</th> + </tr> + <tr> + <td>name</td> + <td>Julia</td> + </tr> + </table> + </td> + </tr> + </table> + + <p> + The second Record will consist of the following values: + </p> + + <table> + <tr> + <th>Field Name</th> + <th>Field Value</th> + </tr> + <tr> + <td>id</td> + <td>98</td> + </tr> + <tr> + <td>name</td> + <td>Jane</td> + </tr> + <tr> + <td>gender</td> + <td>F</td> + </tr> + <tr> + <td>dob</td> + <td>08-30-1984</td> + </tr> + <tr> + <td>siblings</td> + <td><i>empty array</i></td> + </tr> + </table> - <ul> - <li><b>string</b></li> - <li><b>boolean</b></li> - <li><b>byte</b></li> - <li><b>char</b></li> - <li><b>short</b></li> - <li><b>int</b></li> - <li><b>bigint</b></li> - <li><b>long</b></li> - <li><b>float</b></li> - <li><b>double</b></li> - <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden - by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information). - </li> - <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is - <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: - <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is - that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> - SimpleDateFormat Patterns</a> for more information).</li> - <li><b>object</b> - The value will be returned as a <code>Map<String, Object></code>. The types of the values in the Map - are always inferred. The type used for the values may not be the same for each record. For example, consider the following - JSON array: - <br /><br /> - <code> - [{ - id: 17, - name: "John", - child: { - id: "1" - }, - siblingIds: [4, "8"] - }, - <br />{ - id: 98, - name: "Jane", - child: { - id: 2 - }, - siblingIds: [] - }] - </code> - <br /><br /> - In this case, the <code>child</code> element would be inferred to be of type <code>object</code>. Since nested types - are inferred on a per-record basis, for the first record, the <code>child</code> field would return a <code>Map</code> - where the value of the <code>id</code> entry is a <code>string</code>. However, for the second record, the <code>child</code> - field would return a <code>Map</code> where the value of the <code>id</code> entry is an <code>int</code>. - <br /> - Moreover, the <code>siblingIds</code> of the John will be an <code>array</code> where the first element is an <code>int</code> - and the second element is a <code>string</code>. The <code>siblingIds</code> of Jane will be an empty array. - </li> - <li><b>array</b> - An array of values. The types of the values are always inferred and may not be the same for each element - in the array, or for two arrays from different JSON objects.</li> - </ul> </body> </html> http://git-wip-us.apache.org/repos/asf/nifi/blob/68c592ea/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java index 2ec3441..dfbd721 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java @@ -23,15 +23,20 @@ import static org.junit.Assert.assertNull; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TimeZone; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -52,46 +57,100 @@ import org.junit.Test; public class TestAvroRecordReader { + + @Test + public void testLogicalTypes() throws IOException, ParseException, MalformedRecordException { + final Schema schema = new Schema.Parser().parse(new File("src/test/resources/avro/logical-types.avsc")); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + final String expectedTime = "2017-04-04 14:20:33.000"; + final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); + df.setTimeZone(TimeZone.getTimeZone("gmt")); + final long timeLong = df.parse(expectedTime).getTime(); + + final long secondsSinceMidnight = 33 + (20 * 60) + (14 * 60 * 60); + final long millisSinceMidnight = secondsSinceMidnight * 1000L; + + + final byte[] serialized; + final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); + try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); + final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) { + + final GenericRecord record = new GenericData.Record(schema); + record.put("timeMillis", millisSinceMidnight); + record.put("timeMicros", millisSinceMidnight * 1000L); + record.put("timestampMillis", timeLong); + record.put("timestampMicros", timeLong * 1000L); + record.put("date", 17261); + + writer.append(record); + writer.flush(); + + serialized = baos.toByteArray(); + } + + try (final InputStream in = new ByteArrayInputStream(serialized)) { + final AvroRecordReader reader = new AvroRecordReader(in); + final RecordSchema recordSchema = reader.getSchema(); + + assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMillis").get().getFieldType()); + assertEquals(RecordFieldType.TIME, recordSchema.getDataType("timeMicros").get().getFieldType()); + assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMillis").get().getFieldType()); + assertEquals(RecordFieldType.TIMESTAMP, recordSchema.getDataType("timestampMicros").get().getFieldType()); + assertEquals(RecordFieldType.DATE, recordSchema.getDataType("date").get().getFieldType()); + + final Record record = reader.nextRecord(); + assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMillis")); + assertEquals(new java.sql.Time(millisSinceMidnight), record.getValue("timeMicros")); + assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMillis")); + assertEquals(new java.sql.Timestamp(timeLong), record.getValue("timestampMicros")); + assertEquals(new java.sql.Date(timeLong).toString(), record.getValue("date").toString()); + } + } + @Test + @SuppressWarnings({"unchecked", "rawtypes"}) public void testDataTypes() throws IOException, MalformedRecordException { final List<Field> accountFields = new ArrayList<>(); - accountFields.add(new Field("accountId", Schema.create(Type.LONG), null, null)); - accountFields.add(new Field("accountName", Schema.create(Type.STRING), null, null)); + accountFields.add(new Field("accountId", Schema.create(Type.LONG), null, (Object) null)); + accountFields.add(new Field("accountName", Schema.create(Type.STRING), null, (Object) null)); final Schema accountSchema = Schema.createRecord("account", null, null, false); accountSchema.setFields(accountFields); final List<Field> catFields = new ArrayList<>(); - catFields.add(new Field("catTailLength", Schema.create(Type.INT), null, null)); - catFields.add(new Field("catName", Schema.create(Type.STRING), null, null)); + catFields.add(new Field("catTailLength", Schema.create(Type.INT), null, (Object) null)); + catFields.add(new Field("catName", Schema.create(Type.STRING), null, (Object) null)); final Schema catSchema = Schema.createRecord("cat", null, null, false); catSchema.setFields(catFields); final List<Field> dogFields = new ArrayList<>(); - dogFields.add(new Field("dogTailLength", Schema.create(Type.INT), null, null)); - dogFields.add(new Field("dogName", Schema.create(Type.STRING), null, null)); + dogFields.add(new Field("dogTailLength", Schema.create(Type.INT), null, (Object) null)); + dogFields.add(new Field("dogName", Schema.create(Type.STRING), null, (Object) null)); final Schema dogSchema = Schema.createRecord("dog", null, null, false); dogSchema.setFields(dogFields); final List<Field> fields = new ArrayList<>(); - fields.add(new Field("name", Schema.create(Type.STRING), null, null)); - fields.add(new Field("age", Schema.create(Type.INT), null, null)); - fields.add(new Field("balance", Schema.create(Type.DOUBLE), null, null)); - fields.add(new Field("rate", Schema.create(Type.FLOAT), null, null)); - fields.add(new Field("debt", Schema.create(Type.BOOLEAN), null, null)); - fields.add(new Field("nickname", Schema.create(Type.NULL), null, null)); - fields.add(new Field("binary", Schema.create(Type.BYTES), null, null)); - fields.add(new Field("fixed", Schema.createFixed("fixed", null, null, 5), null, null)); - fields.add(new Field("map", Schema.createMap(Schema.create(Type.STRING)), null, null)); - fields.add(new Field("array", Schema.createArray(Schema.create(Type.LONG)), null, null)); - fields.add(new Field("account", accountSchema, null, null)); + fields.add(new Field("name", Schema.create(Type.STRING), null, (Object) null)); + fields.add(new Field("age", Schema.create(Type.INT), null, (Object) null)); + fields.add(new Field("balance", Schema.create(Type.DOUBLE), null, (Object) null)); + fields.add(new Field("rate", Schema.create(Type.FLOAT), null, (Object) null)); + fields.add(new Field("debt", Schema.create(Type.BOOLEAN), null, (Object) null)); + fields.add(new Field("nickname", Schema.create(Type.NULL), null, (Object) null)); + fields.add(new Field("binary", Schema.create(Type.BYTES), null, (Object) null)); + fields.add(new Field("fixed", Schema.createFixed("fixed", null, null, 5), null, (Object) null)); + fields.add(new Field("map", Schema.createMap(Schema.create(Type.STRING)), null, (Object) null)); + fields.add(new Field("array", Schema.createArray(Schema.create(Type.LONG)), null, (Object) null)); + fields.add(new Field("account", accountSchema, null, (Object) null)); fields.add(new Field("desiredbalance", Schema.createUnion( // test union of NULL and other type with no value Arrays.asList(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))), - null, null)); + null, (Object) null)); fields.add(new Field("dreambalance", Schema.createUnion( // test union of NULL and other type with a value Arrays.asList(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))), - null, null)); - fields.add(new Field("favAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, null)); - fields.add(new Field("otherFavAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, null)); + null, (Object) null)); + fields.add(new Field("favAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, (Object) null)); + fields.add(new Field("otherFavAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, (Object) null)); final Schema schema = Schema.createRecord("record", null, null, false); schema.setFields(fields); @@ -103,6 +162,12 @@ public class TestAvroRecordReader { map.put("greeting", "hello"); map.put("salutation", "good-bye"); + final List<RecordField> mapFields = new ArrayList<>(); + mapFields.add(new RecordField("greeting", RecordFieldType.STRING.getDataType())); + mapFields.add(new RecordField("salutation", RecordFieldType.STRING.getDataType())); + final RecordSchema mapSchema = new SimpleRecordSchema(mapFields); + final Record expectedRecord = new MapRecord(mapSchema, (Map) map); + final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) { @@ -168,9 +233,9 @@ public class TestAvroRecordReader { assertEquals(0.045F, values[3]); assertEquals(false, values[4]); assertEquals(null, values[5]); - assertArrayEquals("binary".getBytes(StandardCharsets.UTF_8), (byte[]) values[6]); - assertArrayEquals("fixed".getBytes(StandardCharsets.UTF_8), (byte[]) values[7]); - assertEquals(map, values[8]); + assertArrayEquals(toObjectArray("binary".getBytes(StandardCharsets.UTF_8)), (Object[]) values[6]); + assertArrayEquals(toObjectArray("fixed".getBytes(StandardCharsets.UTF_8)), (Object[]) values[7]); + assertEquals(expectedRecord, values[8]); assertArrayEquals(new Object[] {1L, 2L}, (Object[]) values[9]); final Map<String, Object> accountValues = new HashMap<>(); @@ -215,6 +280,14 @@ public class TestAvroRecordReader { } } + private Object[] toObjectArray(final byte[] bytes) { + final Object[] array = new Object[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + array[i] = Byte.valueOf(bytes[i]); + } + return array; + } + public static enum Status { GOOD, BAD; }
