http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextWriter.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextWriter.java new file mode 100644 index 0000000..781f41f --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextWriter.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.text; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.nifi.components.PropertyValue; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.WriteResult; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; + +public class FreeFormTextWriter implements RecordSetWriter { + private static final byte NEW_LINE = (byte) '\n'; + private final PropertyValue propertyValue; + private final Charset charset; + + public FreeFormTextWriter(final PropertyValue textPropertyValue, final Charset characterSet) { + propertyValue = textPropertyValue; + charset = characterSet; + } + + @Override + public WriteResult write(final RecordSet recordSet, final OutputStream out) throws IOException { + int count = 0; + + try { + final RecordSchema schema = recordSet.getSchema(); + final String[] colNames = getColumnNames(schema); + + Record record; + while ((record = recordSet.next()) != null) { + count++; + write(record, out, colNames); + } + } catch (final Exception e) { + throw new ProcessException(e); + } + + return WriteResult.of(count, Collections.emptyMap()); + } + + private String[] getColumnNames(final RecordSchema schema) { + final int numCols = schema.getFieldCount(); + final String[] columnNames = new String[numCols]; + for (int i = 0; i < numCols; i++) { + columnNames[i] = schema.getField(i).getFieldName(); + } + + return columnNames; + } + + @Override + public WriteResult write(final Record record, final OutputStream out) throws IOException { + write(record, out, getColumnNames(record.getSchema())); + return WriteResult.of(1, Collections.emptyMap()); + } + + private void write(final Record record, final OutputStream out, final String[] columnNames) throws IOException { + final int numCols = columnNames.length; + final Map<String, String> values = new HashMap<>(numCols); + for (int i = 0; i < numCols; i++) { + final String columnName = columnNames[i]; + final String columnValue = record.getAsString(columnName); + values.put(columnName, columnValue); + } + + final String evaluated = propertyValue.evaluateAttributeExpressions(values).getValue(); + out.write(evaluated.getBytes(charset)); + out.write(NEW_LINE); + } + + @Override + public String getMimeType() { + return "text/plain"; + } +}
http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService new file mode 100644 index 0000000..628dbe5 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/META-INF/services/org.apache.nifi.controller.ControllerService @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.nifi.avro.AvroReader +org.apache.nifi.avro.AvroRecordSetWriter + +org.apache.nifi.json.JsonTreeReader +org.apache.nifi.json.JsonPathReader +org.apache.nifi.json.JsonRecordSetWriter + +org.apache.nifi.csv.CSVReader +org.apache.nifi.csv.CSVRecordSetWriter + +org.apache.nifi.grok.GrokReader + +org.apache.nifi.text.FreeFormTextRecordSetWriter \ No newline at end of file http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/default-grok-patterns.txt ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/default-grok-patterns.txt b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/default-grok-patterns.txt new file mode 100644 index 0000000..4b110e8 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/default-grok-patterns.txt @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +# Log Levels +LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)|FINE|FINER|FINEST|CONFIG + +# Syslog Dates: Month Day HH:MM:SS +SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} +PROG (?:[\w._/%-]+) +SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? +SYSLOGHOST %{IPORHOST} +SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> +HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} + +# Months: January, Feb, 3, 03, 12, December +MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b +MONTHNUM (?:0?[1-9]|1[0-2]) +MONTHNUM2 (?:0[1-9]|1[0-2]) +MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) + +# Days: Monday, Tue, Thu, etc... +DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) + +# Years? +YEAR (?>\d\d){1,2} +HOUR (?:2[0123]|[01]?[0-9]) +MINUTE (?:[0-5][0-9]) +# '60' is a leap second in most time standards and thus is valid. +SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) +TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) + +# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) +DATE_US_MONTH_DAY_YEAR %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} +DATE_US_YEAR_MONTH_DAY %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY} +DATE_US %{DATE_US_MONTH_DAY_YEAR}|%{DATE_US_YEAR_MONTH_DAY} +DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} +ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) +ISO8601_SECOND (?:%{SECOND}|60) +TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? +DATE %{DATE_US}|%{DATE_EU} +DATESTAMP %{DATE}[- ]%{TIME} +TZ (?:[PMCE][SD]T|UTC) +DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} +DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} +DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} +DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} + + +POSINT \b(?:[1-9][0-9]*)\b +NONNEGINT \b(?:[0-9]+)\b +WORD \b\w+\b +NOTSPACE \S+ +SPACE \s* +DATA .*? +GREEDYDATA .* +QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) +UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} + +USERNAME [a-zA-Z0-9._-]+ +USER %{USERNAME} +INT (?:[+-]?(?:[0-9]+)) +BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) +NUMBER (?:%{BASE10NUM}) +BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+)) +BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b + +# Networking +MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) +CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) +WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) +COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) +IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? +IPV4 (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9]) +IP (?:%{IPV6}|%{IPV4}) +HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b) +HOST %{HOSTNAME} +IPORHOST (?:%{HOSTNAME}|%{IP}) +HOSTPORT %{IPORHOST}:%{POSINT} + +# paths +PATH (?:%{UNIXPATH}|%{WINPATH}) +UNIXPATH (?>/(?>[\w_%!$@:.,-]+|\\.)*)+ +TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) +WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ +URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? +URIHOST %{IPORHOST}(?::%{POSINT:port})? +# uripath comes loosely from RFC1738, but mostly from what Firefox +# doesn't turn into %XX +URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ +#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? +URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]]* +URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? +URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? + +# Shortcuts +QS %{QUOTEDSTRING} + +# Log formats +SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: +COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) +COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html new file mode 100644 index 0000000..e6dfd0c --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.csv.CSVReader/additionalDetails.html @@ -0,0 +1,185 @@ +<!DOCTYPE html> +<html lang="en"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <head> + <meta charset="utf-8"/> + <title>CSVReader</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + </head> + + <body> + <p> + The CSVReader Controller Service, expects input in such a way that the first line of a FlowFile specifies the name of + each column in the data. Following the first line, the rest of the FlowFile is expected to be valid CSV data from which + to form appropriate Records. By default, the schema for a FlowFile is inferred by extracting the name of each column from + the first line of the CSV and assumes that all columns are of type <code>string</code>. Of course, we may want to treat some + columns as a data type other than <code>string</code>. This can be accomplished by adding a user-defined property where the + name of the property is the same as the name of a CSV column and the value of the property is the data type to use. + </p> + + <p> + When specifying a data type for a field, the following values are valid: + </p> + + <ul> + <li><b>string</b></li> + <li><b>boolean</b></li> + <li><b>byte</b></li> + <li><b>char</b></li> + <li><b>short</b></li> + <li><b>int</b></li> + <li><b>bigint</b></li> + <li><b>long</b></li> + <li><b>float</b></li> + <li><b>double</b></li> + <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is + <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: + <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information).</li> + <li><b>object</b> - <i>This data type does not apply to CSV data.</i></li> + <li><b>array</b> - <i>This data type does not apply to CSV data.</i></li> + </ul> + + <p> + As an example, consider a FlowFile whose contents consists of the following: + </p> + + <code> + id, name, balance, notes<br /> + 1, John, 48.23, "Our very<br /> +first customer!"<br /> + 2, Jane, 1245.89,<br /> + 3, Frank Franklin, "48481.29",<br /> + </code> + + <p> + Additionally, let's consider that this Controller Service is configured with the following user-defined properties: + </p> + + <table> + <head> + <th>Property Name</th> + <th>Property Value</th> + </head> + <body> + <tr> + <td>balance</td> + <td><code>float</code></td> + </tr> + </body> + </table> + + <p> + In this case, the result will be that this FlowFile consists of 3 different records. The first record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>id</td> + <td>1</td> + </tr> + <tr> + <td>name</td> + <td>John</td> + </tr> + <tr> + <td>balance</td> + <td>48.23</td> + </tr> + <tr> + <td>notes</td> + <td>Our very<br />first customer!</td> + </tr> + </body> + </table> + + <p> + The second record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>id</td> + <td>2</td> + </tr> + <tr> + <td>name</td> + <td>Jane</td> + </tr> + <tr> + <td>balance</td> + <td>1245.89</td> + </tr> + <tr> + <td>notes</td> + <td></td> + </tr> + </body> + </table> + + <p> + The third record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>id</td> + <td>3</td> + </tr> + <tr> + <td>name</td> + <td>Frank Franklin</td> + </tr> + <tr> + <td>balance</td> + <td>48481.29</td> + </tr> + <tr> + <td>notes</td> + <td></td> + </tr> + </body> + </table> + + + </body> +</html> http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html new file mode 100644 index 0000000..3a41f47 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.grok.GrokReader/additionalDetails.html @@ -0,0 +1,396 @@ +<!DOCTYPE html> +<html lang="en"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <head> + <meta charset="utf-8"/> + <title>GrokReader</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + </head> + + <body> + <p> + The GrokReader Controller Service, provides a means for parsing and structuring input that is + made up of unstructured text, such as log files. Grok allows users to add a naming construct to + Regular Expressions such that they can be composed in order to create expressions that are easier + to manage and work with. This Controller Service consists of one Required Property and one Optional + Property. The Optional Property is named <code>Grok Pattern File</code> and specifies the filename of + a file that contains Grok Patterns that can be used for parsing log data. If not specified, a default + patterns file will be used. Its contains are provided below. + </p> + + <p> + The Required Property is named <code>Grok Expression</code> and specifies how to parse each + incoming record. This is done by providing a Grok Expression such as: + <code>%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} \[%{DATA:thread}\] %{DATA:class} %{GREEDYDATA:message}</code>. + This Expression will parse Apache NiFi log messages. This is accomplished by specifying that a line begins + with the <code>TIMESTAMP_ISO8601</code> pattern (which is a Regular Expression defined in the default + Grok Patterns File). The value that matches this pattern is then given the name <code>timestamp</code>. As a result, + the value that matches this pattern will be assigned to a field named <code>timestamp</code> in the Record that + produced by this Controller Service. + </p> + + <p> + If a line is encountered in the FlowFile that does not match the configured Grok Expression, it is assumed that the line + is part of the previous message. If the line is the start of a stack trace, then the entire stack trace is read in and assigned + to a field named <code>STACK_TRACE</code>. Otherwise, the line is appended to the last field defined in the Grok Expression. This + is done because typically the last field is a 'message' type of field, which can consist of new-lines. + </p> + + <p> + By default, all fields that are extracted are considered to be of type <code>string</code>. This can be overridden + by adding a user-defined property where the name of the property matches the name of the field that is present in the + configured Grok Expression. The value of the user-defined property is the data type to use. + When specifying a data type for a field, the following values are valid: + </p> + + <ul> + <li><b>string</b></li> + <li><b>boolean</b></li> + <li><b>byte</b></li> + <li><b>char</b></li> + <li><b>short</b></li> + <li><b>int</b></li> + <li><b>bigint</b></li> + <li><b>long</b></li> + <li><b>float</b></li> + <li><b>double</b></li> + <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is + <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: + <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information).</li> + <li><b>object</b> - <i>This data type does not apply to CSV data.</i></li> + <li><b>array</b> - <i>This data type does not apply to CSV data.</i></li> + </ul> + + + <h2> + Examples + </h2> + + <p> + As an example, consider that this Controller Service is configured with the following properties: + </p> + + <table> + <head> + <th>Property Name</th> + <th>Property Value</th> + </head> + <body> + <tr> + <td>Grok Expression</td> + <td><code>%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} \[%{DATA:thread}\] %{DATA:class} %{GREEDYDATA:message}</code></td> + </tr> + </body> + </table> + + <p> + Additionally, let's consider a FlowFile whose contents consists of the following: + </p> + + <code><pre> +2016-08-04 13:26:32,473 INFO [Leader Election Notification Thread-1] o.a.n.c.l.e.CuratorLeaderElectionManager org.apache.nifi.controller.leader.election.CuratorLeaderElectionManager$ElectionListener@1fa27ea5 has been interrupted; no longer leader for role 'Cluster Coordinator' +2016-08-04 13:26:32,474 ERROR [Leader Election Notification Thread-2] o.apache.nifi.controller.FlowController One +Two +Three +org.apache.nifi.exception.UnitTestException: Testing to ensure we are able to capture stack traces + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_45] + at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [na:1.8.0_45] + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_45] + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [na:1.8.0_45] + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_45] + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_45] + at java.lang.Thread.run(Thread.java:745) [na:1.8.0_45] +Caused by: org.apache.nifi.exception.UnitTestException: Testing to ensure we are able to capture stack traces + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + ... 12 common frames omitted +2016-08-04 13:26:35,475 WARN [Curator-Framework-0] org.apache.curator.ConnectionState Connection attempt unsuccessful after 3008 (greater than max timeout of 3000). Resetting connection and trying again with a new connection. + </pre></code> + + <p> + In this case, the result will be that this FlowFile consists of 3 different records. The first record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>timestamp</td> + <td>2016-08-04 13:26:32,473</td> + </tr> + <tr> + <td>level</td> + <td>INFO</td> + </tr> + <tr> + <td>thread</td> + <td>Leader Election Notification Thread-1</td> + </tr> + <tr> + <td>class</td> + <td>o.a.n.c.l.e.CuratorLeaderElectionManager</td> + </tr> + <tr> + <td>message</td> + <td>org.apache.nifi.controller.leader.election.CuratorLeaderElectionManager$ElectionListener@1fa27ea5 has been interrupted; no longer leader for role 'Cluster Coordinator'</td> + </tr> + <tr> + <td>STACK_TRACE</td> + <td><i>null</i></td> + </tr> + </body> + </table> + + <p> + The second record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>timestamp</td> + <td>2016-08-04 13:26:32,474</td> + </tr> + <tr> + <td>level</td> + <td>ERROR</td> + </tr> + <tr> + <td>thread</td> + <td>Leader Election Notification Thread-2</td> + </tr> + <tr> + <td>class</td> + <td>o.apache.nifi.controller.FlowController</td> + </tr> + <tr> + <td>message</td> + <td>One<br /> +Two<br /> +Three</td> + </tr> + <tr> + <td>STACK_TRACE</td> + <td> +<pre> +org.apache.nifi.exception.UnitTestException: Testing to ensure we are able to capture stack traces + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [na:1.8.0_45] + at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) [na:1.8.0_45] + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) [na:1.8.0_45] + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) [na:1.8.0_45] + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [na:1.8.0_45] + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [na:1.8.0_45] + at java.lang.Thread.run(Thread.java:745) [na:1.8.0_45] +Caused by: org.apache.nifi.exception.UnitTestException: Testing to ensure we are able to capture stack traces + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + at org.apache.nifi.cluster.coordination.node.NodeClusterCoordinator.getElectedActiveCoordinatorAddress(NodeClusterCoordinator.java:185) + ... 12 common frames omitted +</pre></td> + </tr> + </body> + </table> + + <p> + The third record will contain the following values: + </p> + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>timestamp</td> + <td>2016-08-04 13:26:35,475</td> + </tr> + <tr> + <td>level</td> + <td>WARN</td> + </tr> + <tr> + <td>thread</td> + <td>Curator-Framework-0</td> + </tr> + <tr> + <td>class</td> + <td>org.apache.curator.ConnectionState</td> + </tr> + <tr> + <td>message</td> + <td>Connection attempt unsuccessful after 3008 (greater than max timeout of 3000). Resetting connection and trying again with a new connection.</td> + </tr> + <tr> + <td>STACK_TRACE</td> + <td><i>null</i></td> + </tr> + </body> + </table> + + + <h2> + </h2> + + <h2>Default Patterns</h2> + + <p> + The following patterns are available in the default Grok Pattern File: + </p> + + <code> + <pre> +# Log Levels +LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)|FINE|FINER|FINEST|CONFIG + +# Syslog Dates: Month Day HH:MM:SS +SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} +PROG (?:[\w._/%-]+) +SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? +SYSLOGHOST %{IPORHOST} +SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> +HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} + +# Months: January, Feb, 3, 03, 12, December +MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b +MONTHNUM (?:0?[1-9]|1[0-2]) +MONTHNUM2 (?:0[1-9]|1[0-2]) +MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) + +# Days: Monday, Tue, Thu, etc... +DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) + +# Years? +YEAR (?>\d\d){1,2} +HOUR (?:2[0123]|[01]?[0-9]) +MINUTE (?:[0-5][0-9]) +# '60' is a leap second in most time standards and thus is valid. +SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) +TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) + +# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) +DATE_US_MONTH_DAY_YEAR %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} +DATE_US_YEAR_MONTH_DAY %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY} +DATE_US %{DATE_US_MONTH_DAY_YEAR}|%{DATE_US_YEAR_MONTH_DAY} +DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} +ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) +ISO8601_SECOND (?:%{SECOND}|60) +TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? +DATE %{DATE_US}|%{DATE_EU} +DATESTAMP %{DATE}[- ]%{TIME} +TZ (?:[PMCE][SD]T|UTC) +DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} +DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} +DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} +DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} + + +POSINT \b(?:[1-9][0-9]*)\b +NONNEGINT \b(?:[0-9]+)\b +WORD \b\w+\b +NOTSPACE \S+ +SPACE \s* +DATA .*? +GREEDYDATA .* +QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) +UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} + +USERNAME [a-zA-Z0-9._-]+ +USER %{USERNAME} +INT (?:[+-]?(?:[0-9]+)) +BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) +NUMBER (?:%{BASE10NUM}) +BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+)) +BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b + +# Networking +MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) +CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) +WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) +COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) +IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5 ]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? +IPV4 (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9]) +IP (?:%{IPV6}|%{IPV4}) +HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b) +HOST %{HOSTNAME} +IPORHOST (?:%{HOSTNAME}|%{IP}) +HOSTPORT %{IPORHOST}:%{POSINT} + +# paths +PATH (?:%{UNIXPATH}|%{WINPATH}) +UNIXPATH (?>/(?>[\w_%!$@:.,-]+|\\.)*)+ +TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) +WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ +URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? +URIHOST %{IPORHOST}(?::%{POSINT:port})? +# uripath comes loosely from RFC1738, but mostly from what Firefox +# doesn't turn into %XX +URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ +#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? +URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]]* +URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? +URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? + +# Shortcuts +QS %{QUOTEDSTRING} + +# Log formats +SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: +COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) +COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent} + </pre> + </code> + + </body> +</html> http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html new file mode 100644 index 0000000..2b69f7e --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonPathReader/additionalDetails.html @@ -0,0 +1,227 @@ +<!DOCTYPE html> +<html lang="en"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <head> + <meta charset="utf-8"/> + <title>JsonPathReader</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + </head> + + <body> + <p> + The JsonPathReader Controller Service, parses FlowFiles that are in the JSON format. User-defined properties + specify how to extract all relevant fields from the JSON in order to create a row-oriented record. The Controller + Service will not be valid unless at least one JSON Path is provided. Unlike the + <a href="../org.apache.nifi.json.FlatJsonReader/additionalDetails.html">FlatJsonReader</a> Controller Service, this + service will return a record that contains only those fields that have been configured via JSON Path. + </p> + + <p> + If the root of the FlowFile's JSON is a JSON Array, each JSON Object found in that array will be treated as a separate + Record, not as a single record made up of an array. If the root of the FlowFile's JSON is a JSON Object, it will be + evaluated as a single Record. + </p> + + <p> + Supplying a JSON Path is accomplished by adding a user-defined property where the name of the property becomes the name + of the field in the Record that is returned. The value of the property must be a valid JSON Path expression. This JSON Path + will be evaluated against each top-level JSON Object in the FlowFile, and the result will be the value of the field whose + name is specified by the property name. By default, the type of each field is inferred automatically based on the values of + the first JSON Object encountered for the FlowFile. This can be overridden by changing the name of the user-defined property + by adding a colon (:) and specifying the data type. For example: <code>balance:double</code> or <code>dob:date:MM/dd/yyyy</code>. + In this case, the data type and option format are not included in the field name. So for the aforementioned examples, we would + end up with field names <code>balance</code> and <code>dob</code>. + </p> + + <p> + When specifying a data type for a field, the following values are valid: + </p> + + <ul> + <li><b>string</b></li> + <li><b>boolean</b></li> + <li><b>byte</b></li> + <li><b>char</b></li> + <li><b>short</b></li> + <li><b>int</b></li> + <li><b>bigint</b></li> + <li><b>long</b></li> + <li><b>float</b></li> + <li><b>double</b></li> + <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is + <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: + <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information).</li> + <li><b>object</b> - The value will be returned as a <code>Map<String, Object></code>. The types of the values in the Map + are always inferred. The type used for the values may not be the same for each record. For example, consider the following + JSON array: + <br /><br /> + <code> + [{ + id: 17, + name: "John", + child: { + id: "1" + }, + siblingIds: [4, "8"] + }, + <br />{ + id: 98, + name: "Jane", + child: { + id: 2 + }, + siblingIds: [] + }] + </code> + <br /><br /> + In this case, the <code>child</code> element would be inferred to be of type <code>object</code>. Since nested types + are inferred on a per-record basis, for the first record, the <code>child</code> field would return a <code>Map</code> + where the value of the <code>id</code> entry is a <code>string</code>. However, for the second record, the <code>child</code> + field would return a <code>Map</code> where the value of the <code>id</code> entry is an <code>int</code>. + <br /> + Moreover, the <code>siblingIds</code> of the John will be an <code>array</code> where the first element is an <code>int</code> + and the second element is a <code>string</code>. The <code>siblingIds</code> of Jane will be an empty array. + </li> + <li><b>array</b> - An array of values. The types of the values are always inferred and may not be the same for each element + in the array, or for two arrays from different JSON objects.</li> + </ul> + + + <p> + As an example, consider a FlowFile whose content contains the following JSON: + </p> + + <code> + [{ + id: 17, + name: "John", + child: { + id: "1" + }, + siblingIds: [4, "8"] + }, + <br />{ + id: 98, + name: "Jane", + child: { + id: 2 + }, + gender: "F", + siblingIds: [] + }] + </code> + + <p> + If we configure this Controller Service with the following user-defined properties: + + <table> + <head> + <th>Property Name</th> + <th>Property Value</th> + </head> + <body> + <tr> + <td>id</td> + <td><code>$.id</code></td> + </tr> + <tr> + <td>name</td> + <td><code>$.name</code></td> + </tr> + <tr> + <td>childId:long</td> + <td><code>$.child.id</code></td> + </tr> + <tr> + <td>gender:string</td> + <td><code>$.gender</code></td> + </tr> + </body> + </table> + </p> + + <p> + In this case, the FlowFile will generate two Records. The first record will consist of the following key/value pairs: + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>id</td> + <td>17</td> + </tr> + <tr> + <td>name</td> + <td>John</td> + </tr> + <tr> + <td>childId</td> + <td>1</td> + </tr> + <tr> + <td>gender</td> + <td><i>null</i></td> + </tr> + </body> + </table> + </p> + + <p> + The second record will consist of the following key/value pairs: + + <table> + <head> + <th>Field Name</th> + <th>Field Value</th> + </head> + <body> + <tr> + <td>id</td> + <td>98</td> + </tr> + <tr> + <td>name</td> + <td>Jane</td> + </tr> + <tr> + <td>childId</td> + <td>2</td> + </tr> + <tr> + <td>gender</td> + <td>F</td> + </tr> + </body> + </table> + </p> + + </body> +</html> http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html new file mode 100644 index 0000000..7d6be7a --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/resources/docs/org.apache.nifi.json.JsonTreeReader/additionalDetails.html @@ -0,0 +1,102 @@ +<!DOCTYPE html> +<html lang="en"> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <head> + <meta charset="utf-8"/> + <title>JsonTreeReader</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css"/> + </head> + + <body> + <p> + The JsonTreeReader Controller Service, by default, derives the schema for a FlowFile + based on the first JSON Object in the FlowFile. For each field found, the data type + is inferred. However, the type of a field can be overridden by adding a user-defined property to + the Controller Service. The name of the property should be the same as the name of the + JSON field. The value of the property denotes the data type of the corresponding field. + If no JSON field is found with a matching name, then a field will be added to the schema, + and a <code>null</code> value will be used for any record for which the JSON field + is not present. If a field is found with a matching name, but the type is different, + the Controller Service will attempt to coerce the value into the user-defined type. If unable + to do so, an Exception will be thrown. + </p> + + <p> + When specifying a data type for a field, the following values are valid: + </p> + + <ul> + <li><b>string</b></li> + <li><b>boolean</b></li> + <li><b>byte</b></li> + <li><b>char</b></li> + <li><b>short</b></li> + <li><b>int</b></li> + <li><b>bigint</b></li> + <li><b>long</b></li> + <li><b>float</b></li> + <li><b>double</b></li> + <li><b>date</b> - A date with no time field. By default, the format used is <code>yyyy-MM-dd</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>date:MM/dd/yyyy</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>time</b> - A time with no date field. By default, the format used is <code>HH:mm:ss</code>. This can be overridden + by adding a colon (:) followed by the desired format. For example: <code>time:hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information). + </li> + <li><b>timestamp</b> - A field that represents both a date and time. By default, the format used is + <code>yyyy-MM-dd HH:mm:ss</code>. This can be overridden by adding a colon (:) followed by the desired format. For example: + <code>MM/dd/yyyy hh:mm:ss a</code>. The format to use is + that of Java's SimpleDateFormat (see <a href="http://docs.oracle.com/javase/8/docs/api/java/text/SimpleDateFormat.html"> + SimpleDateFormat Patterns</a> for more information).</li> + <li><b>object</b> - The value will be returned as a <code>Map<String, Object></code>. The types of the values in the Map + are always inferred. The type used for the values may not be the same for each record. For example, consider the following + JSON array: + <br /><br /> + <code> + [{ + id: 17, + name: "John", + child: { + id: "1" + }, + siblingIds: [4, "8"] + }, + <br />{ + id: 98, + name: "Jane", + child: { + id: 2 + }, + siblingIds: [] + }] + </code> + <br /><br /> + In this case, the <code>child</code> element would be inferred to be of type <code>object</code>. Since nested types + are inferred on a per-record basis, for the first record, the <code>child</code> field would return a <code>Map</code> + where the value of the <code>id</code> entry is a <code>string</code>. However, for the second record, the <code>child</code> + field would return a <code>Map</code> where the value of the <code>id</code> entry is an <code>int</code>. + <br /> + Moreover, the <code>siblingIds</code> of the John will be an <code>array</code> where the first element is an <code>int</code> + and the second element is a <code>string</code>. The <code>siblingIds</code> of Jane will be an empty array. + </li> + <li><b>array</b> - An array of values. The types of the values are always inferred and may not be the same for each element + in the array, or for two arrays from different JSON objects.</li> + </ul> + </body> +</html> http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java new file mode 100644 index 0000000..2ec3441 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/avro/TestAvroRecordReader.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.avro; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.Schema.Type; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.apache.nifi.serialization.MalformedRecordException; +import org.apache.nifi.serialization.SimpleRecordSchema; +import org.apache.nifi.serialization.record.MapRecord; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordField; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.RecordSchema; +import org.junit.Test; + +public class TestAvroRecordReader { + + @Test + public void testDataTypes() throws IOException, MalformedRecordException { + final List<Field> accountFields = new ArrayList<>(); + accountFields.add(new Field("accountId", Schema.create(Type.LONG), null, null)); + accountFields.add(new Field("accountName", Schema.create(Type.STRING), null, null)); + final Schema accountSchema = Schema.createRecord("account", null, null, false); + accountSchema.setFields(accountFields); + + final List<Field> catFields = new ArrayList<>(); + catFields.add(new Field("catTailLength", Schema.create(Type.INT), null, null)); + catFields.add(new Field("catName", Schema.create(Type.STRING), null, null)); + final Schema catSchema = Schema.createRecord("cat", null, null, false); + catSchema.setFields(catFields); + + final List<Field> dogFields = new ArrayList<>(); + dogFields.add(new Field("dogTailLength", Schema.create(Type.INT), null, null)); + dogFields.add(new Field("dogName", Schema.create(Type.STRING), null, null)); + final Schema dogSchema = Schema.createRecord("dog", null, null, false); + dogSchema.setFields(dogFields); + + final List<Field> fields = new ArrayList<>(); + fields.add(new Field("name", Schema.create(Type.STRING), null, null)); + fields.add(new Field("age", Schema.create(Type.INT), null, null)); + fields.add(new Field("balance", Schema.create(Type.DOUBLE), null, null)); + fields.add(new Field("rate", Schema.create(Type.FLOAT), null, null)); + fields.add(new Field("debt", Schema.create(Type.BOOLEAN), null, null)); + fields.add(new Field("nickname", Schema.create(Type.NULL), null, null)); + fields.add(new Field("binary", Schema.create(Type.BYTES), null, null)); + fields.add(new Field("fixed", Schema.createFixed("fixed", null, null, 5), null, null)); + fields.add(new Field("map", Schema.createMap(Schema.create(Type.STRING)), null, null)); + fields.add(new Field("array", Schema.createArray(Schema.create(Type.LONG)), null, null)); + fields.add(new Field("account", accountSchema, null, null)); + fields.add(new Field("desiredbalance", Schema.createUnion( // test union of NULL and other type with no value + Arrays.asList(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))), + null, null)); + fields.add(new Field("dreambalance", Schema.createUnion( // test union of NULL and other type with a value + Arrays.asList(Schema.create(Type.NULL), Schema.create(Type.DOUBLE))), + null, null)); + fields.add(new Field("favAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, null)); + fields.add(new Field("otherFavAnimal", Schema.createUnion(Arrays.asList(catSchema, dogSchema)), null, null)); + + final Schema schema = Schema.createRecord("record", null, null, false); + schema.setFields(fields); + + final byte[] source; + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + final Map<String, String> map = new HashMap<>(); + map.put("greeting", "hello"); + map.put("salutation", "good-bye"); + + final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); + try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); + final DataFileWriter<GenericRecord> writer = dataFileWriter.create(schema, baos)) { + + final GenericRecord record = new GenericData.Record(schema); + record.put("name", "John"); + record.put("age", 33); + record.put("balance", 1234.56D); + record.put("rate", 0.045F); + record.put("debt", false); + record.put("binary", ByteBuffer.wrap("binary".getBytes(StandardCharsets.UTF_8))); + record.put("fixed", new GenericData.Fixed(Schema.create(Type.BYTES), "fixed".getBytes(StandardCharsets.UTF_8))); + record.put("map", map); + record.put("array", Arrays.asList(1L, 2L)); + record.put("dreambalance", 10_000_000.00D); + + final GenericRecord accountRecord = new GenericData.Record(accountSchema); + accountRecord.put("accountId", 83L); + accountRecord.put("accountName", "Checking"); + record.put("account", accountRecord); + + final GenericRecord catRecord = new GenericData.Record(catSchema); + catRecord.put("catTailLength", 1); + catRecord.put("catName", "Meow"); + record.put("otherFavAnimal", catRecord); + + final GenericRecord dogRecord = new GenericData.Record(dogSchema); + dogRecord.put("dogTailLength", 14); + dogRecord.put("dogName", "Fido"); + record.put("favAnimal", dogRecord); + + writer.append(record); + } + + source = baos.toByteArray(); + + try (final InputStream in = new ByteArrayInputStream(source)) { + final AvroRecordReader reader = new AvroRecordReader(in); + final RecordSchema recordSchema = reader.getSchema(); + assertEquals(15, recordSchema.getFieldCount()); + + assertEquals(RecordFieldType.STRING, recordSchema.getDataType("name").get().getFieldType()); + assertEquals(RecordFieldType.INT, recordSchema.getDataType("age").get().getFieldType()); + assertEquals(RecordFieldType.DOUBLE, recordSchema.getDataType("balance").get().getFieldType()); + assertEquals(RecordFieldType.FLOAT, recordSchema.getDataType("rate").get().getFieldType()); + assertEquals(RecordFieldType.BOOLEAN, recordSchema.getDataType("debt").get().getFieldType()); + assertEquals(RecordFieldType.RECORD, recordSchema.getDataType("nickname").get().getFieldType()); + assertEquals(RecordFieldType.ARRAY, recordSchema.getDataType("binary").get().getFieldType()); + assertEquals(RecordFieldType.ARRAY, recordSchema.getDataType("fixed").get().getFieldType()); + assertEquals(RecordFieldType.RECORD, recordSchema.getDataType("map").get().getFieldType()); + assertEquals(RecordFieldType.ARRAY, recordSchema.getDataType("array").get().getFieldType()); + assertEquals(RecordFieldType.RECORD, recordSchema.getDataType("account").get().getFieldType()); + assertEquals(RecordFieldType.DOUBLE, recordSchema.getDataType("desiredbalance").get().getFieldType()); + assertEquals(RecordFieldType.DOUBLE, recordSchema.getDataType("dreambalance").get().getFieldType()); + assertEquals(RecordFieldType.CHOICE, recordSchema.getDataType("favAnimal").get().getFieldType()); + assertEquals(RecordFieldType.CHOICE, recordSchema.getDataType("otherFavAnimal").get().getFieldType()); + + final Object[] values = reader.nextRecord().getValues(); + assertEquals(15, values.length); + assertEquals("John", values[0]); + assertEquals(33, values[1]); + assertEquals(1234.56D, values[2]); + assertEquals(0.045F, values[3]); + assertEquals(false, values[4]); + assertEquals(null, values[5]); + assertArrayEquals("binary".getBytes(StandardCharsets.UTF_8), (byte[]) values[6]); + assertArrayEquals("fixed".getBytes(StandardCharsets.UTF_8), (byte[]) values[7]); + assertEquals(map, values[8]); + assertArrayEquals(new Object[] {1L, 2L}, (Object[]) values[9]); + + final Map<String, Object> accountValues = new HashMap<>(); + accountValues.put("accountName", "Checking"); + accountValues.put("accountId", 83L); + + final List<RecordField> accountRecordFields = new ArrayList<>(); + accountRecordFields.add(new RecordField("accountId", RecordFieldType.LONG.getDataType())); + accountRecordFields.add(new RecordField("accountName", RecordFieldType.STRING.getDataType())); + + final RecordSchema accountRecordSchema = new SimpleRecordSchema(accountRecordFields); + final Record mapRecord = new MapRecord(accountRecordSchema, accountValues); + + assertEquals(mapRecord, values[10]); + + assertNull(values[11]); + assertEquals(10_000_000.0D, values[12]); + + final Map<String, Object> dogMap = new HashMap<>(); + dogMap.put("dogName", "Fido"); + dogMap.put("dogTailLength", 14); + + final List<RecordField> dogRecordFields = new ArrayList<>(); + dogRecordFields.add(new RecordField("dogTailLength", RecordFieldType.INT.getDataType())); + dogRecordFields.add(new RecordField("dogName", RecordFieldType.STRING.getDataType())); + final RecordSchema dogRecordSchema = new SimpleRecordSchema(dogRecordFields); + final Record dogRecord = new MapRecord(dogRecordSchema, dogMap); + + assertEquals(dogRecord, values[13]); + + final Map<String, Object> catMap = new HashMap<>(); + catMap.put("catName", "Meow"); + catMap.put("catTailLength", 1); + + final List<RecordField> catRecordFields = new ArrayList<>(); + catRecordFields.add(new RecordField("catTailLength", RecordFieldType.INT.getDataType())); + catRecordFields.add(new RecordField("catName", RecordFieldType.STRING.getDataType())); + final RecordSchema catRecordSchema = new SimpleRecordSchema(catRecordFields); + final Record catRecord = new MapRecord(catRecordSchema, catMap); + + assertEquals(catRecord, values[14]); + } + } + + public static enum Status { + GOOD, BAD; + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java new file mode 100644 index 0000000..1e53d89 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVRecordReader.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.csv; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.serialization.MalformedRecordException; +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.RecordSchema; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestCSVRecordReader { + private final DataType stringDataType = RecordFieldType.STRING.getDataType(); + private final DataType doubleDataType = RecordFieldType.DOUBLE.getDataType(); + private final DataType timeDataType = RecordFieldType.TIME.getDataType(); + + @Test + public void testSimpleParse() throws IOException, MalformedRecordException { + final Map<String, DataType> overrides = new HashMap<>(); + overrides.put("balance", doubleDataType); + overrides.put("other", timeDataType); + + try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/single-bank-account.csv"))) { + final CSVRecordReader reader = new CSVRecordReader(fis, null, overrides); + + final RecordSchema schema = reader.getSchema(); + verifyFields(schema); + + final Object[] record = reader.nextRecord().getValues(); + final Object[] expectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"}; + Assert.assertArrayEquals(expectedValues, record); + + assertNull(reader.nextRecord()); + } + } + + @Test + public void testMultipleRecords() throws IOException, MalformedRecordException { + final Map<String, DataType> overrides = new HashMap<>(); + overrides.put("balance", doubleDataType); + + try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/multi-bank-account.csv"))) { + final CSVRecordReader reader = new CSVRecordReader(fis, null, overrides); + + final RecordSchema schema = reader.getSchema(); + verifyFields(schema); + + final Object[] firstRecord = reader.nextRecord().getValues(); + final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"}; + Assert.assertArrayEquals(firstExpectedValues, firstRecord); + + final Object[] secondRecord = reader.nextRecord().getValues(); + final Object[] secondExpectedValues = new Object[] {"2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA"}; + Assert.assertArrayEquals(secondExpectedValues, secondRecord); + + assertNull(reader.nextRecord()); + } + } + + @Test + public void testExtraWhiteSpace() throws IOException, MalformedRecordException { + final Map<String, DataType> overrides = new HashMap<>(); + overrides.put("balance", doubleDataType); + + try (final InputStream fis = new FileInputStream(new File("src/test/resources/csv/extra-white-space.csv"))) { + final CSVRecordReader reader = new CSVRecordReader(fis, Mockito.mock(ComponentLog.class), overrides); + + final RecordSchema schema = reader.getSchema(); + verifyFields(schema); + + final Object[] firstRecord = reader.nextRecord().getValues(); + final Object[] firstExpectedValues = new Object[] {"1", "John Doe", 4750.89D, "123 My Street", "My City", "MS", "11111", "USA"}; + Assert.assertArrayEquals(firstExpectedValues, firstRecord); + + final Object[] secondRecord = reader.nextRecord().getValues(); + final Object[] secondExpectedValues = new Object[] {"2", "Jane Doe", 4820.09D, "321 Your Street", "Your City", "NY", "33333", "USA"}; + Assert.assertArrayEquals(secondExpectedValues, secondRecord); + + assertNull(reader.nextRecord()); + } + } + + private void verifyFields(final RecordSchema schema) { + final List<String> fieldNames = schema.getFieldNames(); + final List<String> expectedFieldNames = Arrays.asList("id", "name", "balance", "address", "city", "state", "zipCode", "country"); + assertEquals(expectedFieldNames, fieldNames); + + final List<DataType> dataTypes = schema.getDataTypes(); + final List<DataType> expectedDataTypes = Arrays.asList(stringDataType, stringDataType, doubleDataType, + stringDataType, stringDataType, stringDataType, stringDataType, stringDataType); + assertEquals(expectedDataTypes, dataTypes); + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/a88d3bfa/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestWriteCSVResult.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestWriteCSVResult.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestWriteCSVResult.java new file mode 100644 index 0000000..04f8479 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestWriteCSVResult.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.csv; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.nifi.serialization.SimpleRecordSchema; +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.MapRecord; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordField; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; +import org.junit.Test; + + +public class TestWriteCSVResult { + + @Test + public void testDataTypes() throws IOException { + final WriteCSVResult result = new WriteCSVResult(RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat()); + + final StringBuilder headerBuilder = new StringBuilder(); + final List<RecordField> fields = new ArrayList<>(); + for (final RecordFieldType fieldType : RecordFieldType.values()) { + if (fieldType == RecordFieldType.CHOICE) { + final List<DataType> possibleTypes = new ArrayList<>(); + possibleTypes.add(RecordFieldType.INT.getDataType()); + possibleTypes.add(RecordFieldType.LONG.getDataType()); + + fields.add(new RecordField(fieldType.name().toLowerCase(), fieldType.getDataType(possibleTypes))); + } else { + fields.add(new RecordField(fieldType.name().toLowerCase(), fieldType.getDataType())); + } + + headerBuilder.append('"').append(fieldType.name().toLowerCase()).append('"').append(","); + } + final RecordSchema schema = new SimpleRecordSchema(fields); + + final long now = System.currentTimeMillis(); + final Map<String, Object> valueMap = new HashMap<>(); + valueMap.put("string", "string"); + valueMap.put("boolean", true); + valueMap.put("byte", (byte) 1); + valueMap.put("char", 'c'); + valueMap.put("short", (short) 8); + valueMap.put("int", 9); + valueMap.put("bigint", BigInteger.valueOf(8L)); + valueMap.put("long", 8L); + valueMap.put("float", 8.0F); + valueMap.put("double", 8.0D); + valueMap.put("date", new Date(now)); + valueMap.put("time", new Time(now)); + valueMap.put("timestamp", new Timestamp(now)); + valueMap.put("object", null); + valueMap.put("choice", 48L); + valueMap.put("array", null); + + final Record record = new MapRecord(schema, valueMap); + final RecordSet rs = RecordSet.of(schema, record); + + final String output; + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + result.write(rs, baos); + output = new String(baos.toByteArray(), StandardCharsets.UTF_8); + } + + headerBuilder.deleteCharAt(headerBuilder.length() - 1); + final String headerLine = headerBuilder.toString(); + + final String[] splits = output.split("\n"); + assertEquals(2, splits.length); + assertEquals(headerLine, splits[0]); + + final String values = splits[1]; + final StringBuilder expectedBuilder = new StringBuilder(); + expectedBuilder.append("\"string\",\"true\",\"1\",\"c\",\"8\",\"9\",\"8\",\"8\",\"8.0\",\"8.0\","); + + final String dateValue = new SimpleDateFormat(RecordFieldType.DATE.getDefaultFormat()).format(now); + final String timeValue = new SimpleDateFormat(RecordFieldType.TIME.getDefaultFormat()).format(now); + final String timestampValue = new SimpleDateFormat(RecordFieldType.TIMESTAMP.getDefaultFormat()).format(now); + + expectedBuilder.append('"').append(dateValue).append('"').append(','); + expectedBuilder.append('"').append(timeValue).append('"').append(','); + expectedBuilder.append('"').append(timestampValue).append('"').append(','); + expectedBuilder.append(",\"48\","); + final String expectedValues = expectedBuilder.toString(); + + assertEquals(expectedValues, values); + } + +}
