Hi,

I'm including a template here that mimics the problem I'm having.  I
generate a one record csv and try to convert it to a JSON file with a
nested section for phones.  The phone section of the JSON file converts as
null instead of having the phone attributes.
If we can resolve this problem, I would also like to map phone1 and phone2
attributes to phone[0] ... phone[n].

Can this be completed with the record writers?

On Thu, Feb 8, 2018 at 10:25 PM, Charlie Frasure <charliefras...@gmail.com>
wrote:

> Hi,
>
> I'm having trouble taking a flat record and building an avro schema that
> lets me nest it.
> I found this example schema, but after using a ConvertRecord or
> UpdateRecord, I receive the equivalent of [{"parent" = null}]
>
> {"type": "record", "name": "CustomerInput", "namespace":
> "org.apache.example", "fields": [
>         {"name": "id", "type": "string"},
>         {"name": "companyName", "type": ["null", "string"], "default":
> null},
>         {"name": "revenue", "type": ["null", "string"], "default": null},
>         {"name" : "parent", "type" : [ "null",
>             {"type" : "record", "name" : "parent", "fields" : [
>                 {"name" : "name", "type" : ["null", "string"], "default" :
> null},
>                 {"name" : "id", "type" : "string"}
>             ] }
>         ], "default" : null }
> ]}
>
> This example found online is meant to union a person schema with an
> address schema, creating an "Address" type, but this resulted in a "Could
> not parse incoming data" error.
>
> [
>   {"type": "record", "name": "Address", "fields": [
>       {"name": "streetaddress", "type": "string"},
>       {"name": "city", "type": "string"}
>   ]},
>   {"type": "record", "name": "person", "fields": [
>       {"name": "firstname","type": "string"},
>       {"name": "lastname", "type": "string"},
>       {"name": "address", "type": "Address"}
>   ]}
> ]
>
> I would also like to be able to include multiples of these sub-records as
> well, such that I could have 0 to n addresses with distinct address types
> associated to one person.
> Is this possible with the record processors, or do I need to focus on Jolt
> / ConvertAvro?
>
> Thanks,
> Charlie
>
>
>
<?xml version="1.0" ?>
<template encoding-version="1.1">
  <description></description>
  <groupId>e4b4ef98-8c04-3dd5-80ee-05e81eaddad0</groupId>
  <name>Flat to nested</name>
  <snippet>
    <processGroups>
      <id>540530b7-99bf-3cd1-0000-000000000000</id>
      <parentGroupId>dddba225-0d20-3983-0000-000000000000</parentGroupId>
      <position>
        <x>0.0</x>
        <y>0.0</y>
      </position>
      <comments></comments>
      <contents>
        <connections>
          <id>931d1d4e-5b71-3dc8-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
          <backPressureObjectThreshold>10000</backPressureObjectThreshold>
          <bends>
            <x>1221.632568359375</x>
            <y>1276.725341796875</y>
          </bends>
          <destination>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>4e2c7357-e7c4-37c9-0000-000000000000</id>
            <type>PROCESSOR</type>
          </destination>
          <flowFileExpiration>0 sec</flowFileExpiration>
          <labelIndex>0</labelIndex>
          <name></name>
          <selectedRelationships>failure</selectedRelationships>
          <source>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>e4bb3737-d97c-3806-0000-000000000000</id>
            <type>PROCESSOR</type>
          </source>
          <zIndex>0</zIndex>
        </connections>
        <connections>
          <id>dd1ade75-7aec-321f-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
          <backPressureObjectThreshold>10000</backPressureObjectThreshold>
          <bends>
            <x>1012.83984375</x>
            <y>1278.0989990234375</y>
          </bends>
          <destination>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>4e2c7357-e7c4-37c9-0000-000000000000</id>
            <type>PROCESSOR</type>
          </destination>
          <flowFileExpiration>0 sec</flowFileExpiration>
          <labelIndex>1</labelIndex>
          <name></name>
          <selectedRelationships>success</selectedRelationships>
          <source>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>e4bb3737-d97c-3806-0000-000000000000</id>
            <type>PROCESSOR</type>
          </source>
          <zIndex>0</zIndex>
        </connections>
        <connections>
          <id>1d67ccea-0ab9-3aaa-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
          <backPressureObjectThreshold>10000</backPressureObjectThreshold>
          <destination>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>e4bb3737-d97c-3806-0000-000000000000</id>
            <type>PROCESSOR</type>
          </destination>
          <flowFileExpiration>0 sec</flowFileExpiration>
          <labelIndex>1</labelIndex>
          <name></name>
          <selectedRelationships>success</selectedRelationships>
          <source>
            <groupId>540530b7-99bf-3cd1-0000-000000000000</groupId>
            <id>cef8de6f-8b6f-3b81-0000-000000000000</id>
            <type>PROCESSOR</type>
          </source>
          <zIndex>0</zIndex>
        </connections>
        <controllerServices>
          <id>709c2b2f-91ea-3896-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <bundle>
            <artifact>nifi-record-serialization-services-nar</artifact>
            <group>org.apache.nifi</group>
            <version>1.4.0</version>
          </bundle>
          <comments></comments>
          <descriptors>
            <entry>
              <key>Schema Write Strategy</key>
              <value>
                <name>Schema Write Strategy</name>
              </value>
            </entry>
            <entry>
              <key>schema-access-strategy</key>
              <value>
                <name>schema-access-strategy</name>
              </value>
            </entry>
            <entry>
              <key>schema-registry</key>
              <value>
                <identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
                <name>schema-registry</name>
              </value>
            </entry>
            <entry>
              <key>schema-name</key>
              <value>
                <name>schema-name</name>
              </value>
            </entry>
            <entry>
              <key>schema-text</key>
              <value>
                <name>schema-text</name>
              </value>
            </entry>
            <entry>
              <key>Date Format</key>
              <value>
                <name>Date Format</name>
              </value>
            </entry>
            <entry>
              <key>Time Format</key>
              <value>
                <name>Time Format</name>
              </value>
            </entry>
            <entry>
              <key>Timestamp Format</key>
              <value>
                <name>Timestamp Format</name>
              </value>
            </entry>
            <entry>
              <key>Pretty Print JSON</key>
              <value>
                <name>Pretty Print JSON</name>
              </value>
            </entry>
          </descriptors>
          <name>JsonRecordSetWriter (nested)</name>
          <persistsState>false</persistsState>
          <properties>
            <entry>
              <key>Schema Write Strategy</key>
              <value>full-schema-attribute</value>
            </entry>
            <entry>
              <key>schema-access-strategy</key>
              <value>schema-text-property</value>
            </entry>
            <entry>
              <key>schema-registry</key>
            </entry>
            <entry>
              <key>schema-name</key>
            </entry>
            <entry>
              <key>schema-text</key>
              <value>${avro.schema.nested}</value>
            </entry>
            <entry>
              <key>Date Format</key>
            </entry>
            <entry>
              <key>Time Format</key>
            </entry>
            <entry>
              <key>Timestamp Format</key>
            </entry>
            <entry>
              <key>Pretty Print JSON</key>
              <value>true</value>
            </entry>
          </properties>
          <state>ENABLED</state>
          <type>org.apache.nifi.json.JsonRecordSetWriter</type>
        </controllerServices>
        <controllerServices>
          <id>7ef070a5-cd50-3060-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <bundle>
            <artifact>nifi-record-serialization-services-nar</artifact>
            <group>org.apache.nifi</group>
            <version>1.4.0</version>
          </bundle>
          <comments></comments>
          <descriptors>
            <entry>
              <key>schema-access-strategy</key>
              <value>
                <name>schema-access-strategy</name>
              </value>
            </entry>
            <entry>
              <key>schema-registry</key>
              <value>
                <identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
                <name>schema-registry</name>
              </value>
            </entry>
            <entry>
              <key>schema-name</key>
              <value>
                <name>schema-name</name>
              </value>
            </entry>
            <entry>
              <key>schema-text</key>
              <value>
                <name>schema-text</name>
              </value>
            </entry>
            <entry>
              <key>Date Format</key>
              <value>
                <name>Date Format</name>
              </value>
            </entry>
            <entry>
              <key>Time Format</key>
              <value>
                <name>Time Format</name>
              </value>
            </entry>
            <entry>
              <key>Timestamp Format</key>
              <value>
                <name>Timestamp Format</name>
              </value>
            </entry>
            <entry>
              <key>CSV Format</key>
              <value>
                <name>CSV Format</name>
              </value>
            </entry>
            <entry>
              <key>Value Separator</key>
              <value>
                <name>Value Separator</name>
              </value>
            </entry>
            <entry>
              <key>Skip Header Line</key>
              <value>
                <name>Skip Header Line</name>
              </value>
            </entry>
            <entry>
              <key>ignore-csv-header</key>
              <value>
                <name>ignore-csv-header</name>
              </value>
            </entry>
            <entry>
              <key>Quote Character</key>
              <value>
                <name>Quote Character</name>
              </value>
            </entry>
            <entry>
              <key>Escape Character</key>
              <value>
                <name>Escape Character</name>
              </value>
            </entry>
            <entry>
              <key>Comment Marker</key>
              <value>
                <name>Comment Marker</name>
              </value>
            </entry>
            <entry>
              <key>Null String</key>
              <value>
                <name>Null String</name>
              </value>
            </entry>
            <entry>
              <key>Trim Fields</key>
              <value>
                <name>Trim Fields</name>
              </value>
            </entry>
          </descriptors>
          <name>CSVReader (flat)</name>
          <persistsState>false</persistsState>
          <properties>
            <entry>
              <key>schema-access-strategy</key>
            </entry>
            <entry>
              <key>schema-registry</key>
            </entry>
            <entry>
              <key>schema-name</key>
            </entry>
            <entry>
              <key>schema-text</key>
            </entry>
            <entry>
              <key>Date Format</key>
            </entry>
            <entry>
              <key>Time Format</key>
            </entry>
            <entry>
              <key>Timestamp Format</key>
            </entry>
            <entry>
              <key>CSV Format</key>
            </entry>
            <entry>
              <key>Value Separator</key>
            </entry>
            <entry>
              <key>Skip Header Line</key>
              <value>true</value>
            </entry>
            <entry>
              <key>ignore-csv-header</key>
            </entry>
            <entry>
              <key>Quote Character</key>
            </entry>
            <entry>
              <key>Escape Character</key>
            </entry>
            <entry>
              <key>Comment Marker</key>
            </entry>
            <entry>
              <key>Null String</key>
            </entry>
            <entry>
              <key>Trim Fields</key>
            </entry>
          </properties>
          <state>ENABLED</state>
          <type>org.apache.nifi.csv.CSVReader</type>
        </controllerServices>
        <processors>
          <id>cef8de6f-8b6f-3b81-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <position>
            <x>943.6695223509159</x>
            <y>906.5872054425058</y>
          </position>
          <bundle>
            <artifact>nifi-standard-nar</artifact>
            <group>org.apache.nifi</group>
            <version>1.4.0</version>
          </bundle>
          <config>
            <bulletinLevel>WARN</bulletinLevel>
            <comments></comments>
            <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
            <descriptors>
              <entry>
                <key>File Size</key>
                <value>
                  <name>File Size</name>
                </value>
              </entry>
              <entry>
                <key>Batch Size</key>
                <value>
                  <name>Batch Size</name>
                </value>
              </entry>
              <entry>
                <key>Data Format</key>
                <value>
                  <name>Data Format</name>
                </value>
              </entry>
              <entry>
                <key>Unique FlowFiles</key>
                <value>
                  <name>Unique FlowFiles</name>
                </value>
              </entry>
              <entry>
                <key>generate-ff-custom-text</key>
                <value>
                  <name>generate-ff-custom-text</name>
                </value>
              </entry>
              <entry>
                <key>character-set</key>
                <value>
                  <name>character-set</name>
                </value>
              </entry>
              <entry>
                <key>avro.schema.nested</key>
                <value>
                  <name>avro.schema.nested</name>
                </value>
              </entry>
            </descriptors>
            <executionNode>ALL</executionNode>
            <lossTolerant>false</lossTolerant>
            <penaltyDuration>30 sec</penaltyDuration>
            <properties>
              <entry>
                <key>File Size</key>
                <value>0B</value>
              </entry>
              <entry>
                <key>Batch Size</key>
                <value>1</value>
              </entry>
              <entry>
                <key>Data Format</key>
                <value>Text</value>
              </entry>
              <entry>
                <key>Unique FlowFiles</key>
                <value>false</value>
              </entry>
              <entry>
                <key>generate-ff-custom-text</key>
                <value>person,gender,phone1,phone1type,phone2,phone2type
john,m,123-456-7890,mobile,234-567-8901,home</value>
              </entry>
              <entry>
                <key>character-set</key>
                <value>UTF-8</value>
              </entry>
              <entry>
                <key>avro.schema.nested</key>
                <value>{"type": "record", "name": "people", "namespace": "nifi", "fields": [
        {"name": "person", "type": "string"},
        {"name": "gender", "type": ["null", "string"], "default": null},
        {"name" : "phone", "type" : [ 
			{"type" : "record", "name" : "phone", "fields" : [ 
				{"name" : "phone1", "type" : ["null", "string"], "default" : null}, 
				{"name" : "phone1type", "type" : "string"},
				{"name" : "phone2", "type" : ["null", "string"], "default" : null}, 
				{"name" : "phone2type", "type" : "string"} 
			] }
		]}
]}
</value>
              </entry>
            </properties>
            <runDurationMillis>0</runDurationMillis>
            <schedulingPeriod>999 days</schedulingPeriod>
            <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
            <yieldDuration>1 sec</yieldDuration>
          </config>
          <name>Some flat CSV GenerateFlowFile</name>
          <relationships>
            <autoTerminate>false</autoTerminate>
            <name>success</name>
          </relationships>
          <state>STOPPED</state>
          <style></style>
          <type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
        </processors>
        <processors>
          <id>e4bb3737-d97c-3806-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <position>
            <x>948.6800000015248</x>
            <y>1105.328500308678</y>
          </position>
          <bundle>
            <artifact>nifi-standard-nar</artifact>
            <group>org.apache.nifi</group>
            <version>1.4.0</version>
          </bundle>
          <config>
            <bulletinLevel>WARN</bulletinLevel>
            <comments></comments>
            <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
            <descriptors>
              <entry>
                <key>record-reader</key>
                <value>
                  <identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService>
                  <name>record-reader</name>
                </value>
              </entry>
              <entry>
                <key>record-writer</key>
                <value>
                  <identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService>
                  <name>record-writer</name>
                </value>
              </entry>
              <entry>
                <key>replacement-value-strategy</key>
                <value>
                  <name>replacement-value-strategy</name>
                </value>
              </entry>
              <entry>
                <key>//phone2</key>
                <value>
                  <name>//phone2</name>
                </value>
              </entry>
            </descriptors>
            <executionNode>ALL</executionNode>
            <lossTolerant>false</lossTolerant>
            <penaltyDuration>30 sec</penaltyDuration>
            <properties>
              <entry>
                <key>record-reader</key>
                <value>7ef070a5-cd50-3060-0000-000000000000</value>
              </entry>
              <entry>
                <key>record-writer</key>
                <value>709c2b2f-91ea-3896-0000-000000000000</value>
              </entry>
              <entry>
                <key>replacement-value-strategy</key>
                <value>literal-value</value>
              </entry>
              <entry>
                <key>//phone2</key>
                <value>'none'</value>
              </entry>
            </properties>
            <runDurationMillis>0</runDurationMillis>
            <schedulingPeriod>0 sec</schedulingPeriod>
            <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
            <yieldDuration>1 sec</yieldDuration>
          </config>
          <name>UpdateRecord</name>
          <relationships>
            <autoTerminate>false</autoTerminate>
            <name>failure</name>
          </relationships>
          <relationships>
            <autoTerminate>false</autoTerminate>
            <name>success</name>
          </relationships>
          <state>STOPPED</state>
          <style></style>
          <type>org.apache.nifi.processors.standard.UpdateRecord</type>
        </processors>
        <processors>
          <id>4e2c7357-e7c4-37c9-0000-000000000000</id>
          <parentGroupId>540530b7-99bf-3cd1-0000-000000000000</parentGroupId>
          <position>
            <x>948.3079413533465</x>
            <y>1326.7243007240004</y>
          </position>
          <bundle>
            <artifact>nifi-standard-nar</artifact>
            <group>org.apache.nifi</group>
            <version>1.4.0</version>
          </bundle>
          <config>
            <bulletinLevel>WARN</bulletinLevel>
            <comments></comments>
            <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
            <descriptors>
              <entry>
                <key>Log Level</key>
                <value>
                  <name>Log Level</name>
                </value>
              </entry>
              <entry>
                <key>Log Payload</key>
                <value>
                  <name>Log Payload</name>
                </value>
              </entry>
              <entry>
                <key>Attributes to Log</key>
                <value>
                  <name>Attributes to Log</name>
                </value>
              </entry>
              <entry>
                <key>attributes-to-log-regex</key>
                <value>
                  <name>attributes-to-log-regex</name>
                </value>
              </entry>
              <entry>
                <key>Attributes to Ignore</key>
                <value>
                  <name>Attributes to Ignore</name>
                </value>
              </entry>
              <entry>
                <key>attributes-to-ignore-regex</key>
                <value>
                  <name>attributes-to-ignore-regex</name>
                </value>
              </entry>
              <entry>
                <key>Log prefix</key>
                <value>
                  <name>Log prefix</name>
                </value>
              </entry>
              <entry>
                <key>character-set</key>
                <value>
                  <name>character-set</name>
                </value>
              </entry>
            </descriptors>
            <executionNode>ALL</executionNode>
            <lossTolerant>false</lossTolerant>
            <penaltyDuration>30 sec</penaltyDuration>
            <properties>
              <entry>
                <key>Log Level</key>
                <value>info</value>
              </entry>
              <entry>
                <key>Log Payload</key>
                <value>false</value>
              </entry>
              <entry>
                <key>Attributes to Log</key>
              </entry>
              <entry>
                <key>attributes-to-log-regex</key>
                <value>.*</value>
              </entry>
              <entry>
                <key>Attributes to Ignore</key>
              </entry>
              <entry>
                <key>attributes-to-ignore-regex</key>
              </entry>
              <entry>
                <key>Log prefix</key>
              </entry>
              <entry>
                <key>character-set</key>
                <value>UTF-8</value>
              </entry>
            </properties>
            <runDurationMillis>0</runDurationMillis>
            <schedulingPeriod>0 sec</schedulingPeriod>
            <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
            <yieldDuration>1 sec</yieldDuration>
          </config>
          <name>LogAttribute</name>
          <relationships>
            <autoTerminate>false</autoTerminate>
            <name>success</name>
          </relationships>
          <state>DISABLED</state>
          <style></style>
          <type>org.apache.nifi.processors.standard.LogAttribute</type>
        </processors>
      </contents>
      <name>FlatToNested</name>
    </processGroups>
  </snippet>
  <timestamp>02/09/2018 16:25:17 GMT</timestamp>
</template>

Reply via email to