I don't really understand what you're asking for...
In attachment you have nifi template,
avro template is:
{
"name": "aRecord",
"type": "record",
"namespace": "a",
"fields": [
{
"name": "a",
"type": {
"name": "bRecord",
"type":"record",
"fields": [
{ "name": "b", "type": "string"}
]
}
}
]
}
and incorrectly validated json file is:
{"a":{}}
In given flow it's validated as valid, although required filed b is
missing. ConvertJsonToAvro on the other hand rejects the very same json
using the very same avro schema.
Is this all you need? If not, what do you need from me? I probably don't
have 'reproducible repository' -- I don't even know what that is.
Martin.
2017-12-06 11:07 GMT+01:00 Juan Pablo Gardella <[email protected]>
:
> Could you share a reproducible repo or files?
>
> El mié., 6 de dic. de 2017 07:00, Martin Mucha <[email protected]>
> escribió:
>
>> Hi,
>>
>> I have JSON like:
>>
>> {
>> "a": {
>> "b": "1"
>> }
>> }
>>
>> and corresponding avro schema (written for the sake of this e-mail, need
>> not to be 100% accurate)
>>
>> {
>> "name": "aRecord",
>> "type": "record",
>> "namespace": "a",
>> "fields": [
>> {
>> "name": "a",
>> "type": {
>> "name": "bRecord",
>> "type":"record",
>> "fields": [
>> { "name": "b", "type": "string"}
>> ]
>> }
>> }
>>
>> ]
>> }
>>
>> In ConvertJsonToAvro processor, json missing field "b":
>>
>> {"a":{}}
>>
>> will be rejected, while in ValidateRecord it will be accepted as valid
>> (which is not valid according to schema). Is there anything I can do about
>> it? Is it bug?
>>
>> thanks,
>> Martin.
>>
>
<?xml version="1.0" ?>
<template encoding-version="1.1">
<description></description>
<groupId>fdd4f7bb-015f-1000-ffef-071ce070cc59</groupId>
<name>fail</name>
<snippet>
<connections>
<id>87fc9251-c4a0-3aa3-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>cb1aa26f-0116-3def-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>invalid</selectedRelationships>
<source>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>43196d5d-8337-3bc7-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>9f53c803-9bff-35a0-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>580f3309-656f-3450-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>failure</selectedRelationships>
<source>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>43196d5d-8337-3bc7-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>04756719-eca8-3bfb-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>43196d5d-8337-3bc7-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>b3355507-4d1e-34d1-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<connections>
<id>5fa955a2-c99b-3f96-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>9ba255d6-3adf-3fac-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>valid</selectedRelationships>
<source>
<groupId>94076eff-d950-3ea7-0000-000000000000</groupId>
<id>43196d5d-8337-3bc7-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<controllerServices>
<id>966b29cb-5a75-3dd2-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-registry-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>ContactAvroSchema</key>
<value>
<name>ContactAvroSchema</name>
</value>
</entry>
</descriptors>
<name>AvroSchemaRegistry</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>ContactAvroSchema</key>
<value>{
"name": "aRecord",
"type": "record",
"namespace": "a",
"fields": [
{
"name": "a",
"type": {
"name": "bRecord",
"type":"record",
"fields": [
{ "name": "b", "type": "string"}
]
}
}
]
}</value>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.schemaregistry.services.AvroSchemaRegistry</type>
</controllerServices>
<controllerServices>
<id>9b1539c7-a2bf-38d4-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>Schema Write Strategy</key>
<value>
<name>Schema Write Strategy</name>
</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>Date Format</key>
<value>
<name>Date Format</name>
</value>
</entry>
<entry>
<key>Time Format</key>
<value>
<name>Time Format</name>
</value>
</entry>
<entry>
<key>Timestamp Format</key>
<value>
<name>Timestamp Format</name>
</value>
</entry>
<entry>
<key>Pretty Print JSON</key>
<value>
<name>Pretty Print JSON</name>
</value>
</entry>
</descriptors>
<name>Contact Canonical JsonRecordSetWriter</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>Schema Write Strategy</key>
<value>schema-name</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>inherit-record-schema</value>
</entry>
<entry>
<key>schema-registry</key>
<value>966b29cb-5a75-3dd2-0000-000000000000</value>
</entry>
<entry>
<key>schema-name</key>
<value>ContactAvroSchema</value>
</entry>
<entry>
<key>schema-text</key>
<value>${avro.schema}</value>
</entry>
<entry>
<key>Date Format</key>
</entry>
<entry>
<key>Time Format</key>
</entry>
<entry>
<key>Timestamp Format</key>
</entry>
<entry>
<key>Pretty Print JSON</key>
<value>false</value>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.json.JsonRecordSetWriter</type>
</controllerServices>
<controllerServices>
<id>0f43c7e4-c275-3ebd-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<comments></comments>
<descriptors>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>Date Format</key>
<value>
<name>Date Format</name>
</value>
</entry>
<entry>
<key>Time Format</key>
<value>
<name>Time Format</name>
</value>
</entry>
<entry>
<key>Timestamp Format</key>
<value>
<name>Timestamp Format</name>
</value>
</entry>
</descriptors>
<name>Contact Canonical JsonTreeReader</name>
<persistsState>false</persistsState>
<properties>
<entry>
<key>schema-access-strategy</key>
<value>schema-name</value>
</entry>
<entry>
<key>schema-registry</key>
<value>966b29cb-5a75-3dd2-0000-000000000000</value>
</entry>
<entry>
<key>schema-name</key>
<value>ContactAvroSchema</value>
</entry>
<entry>
<key>schema-text</key>
<value>${avro.schema}</value>
</entry>
<entry>
<key>Date Format</key>
</entry>
<entry>
<key>Time Format</key>
</entry>
<entry>
<key>Timestamp Format</key>
</entry>
</properties>
<state>ENABLED</state>
<type>org.apache.nifi.json.JsonTreeReader</type>
</controllerServices>
<processors>
<id>9ba255d6-3adf-3fac-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<position>
<x>1174.4052408574717</x>
<y>392.9707185930878</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>log-level</key>
<value>
<name>log-level</name>
</value>
</entry>
<entry>
<key>log-prefix</key>
<value>
<name>log-prefix</name>
</value>
</entry>
<entry>
<key>log-message</key>
<value>
<name>log-message</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>log-level</key>
<value>error</value>
</entry>
<entry>
<key>log-prefix</key>
</entry>
<entry>
<key>log-message</key>
<value>parsing of ${filename} was valid. Body: ${mycontent}</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Log message is valid</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogMessage</type>
</processors>
<processors>
<id>b3355507-4d1e-34d1-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<position>
<x>509.8235972073709</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Input Directory</key>
<value>
<name>Input Directory</name>
</value>
</entry>
<entry>
<key>File Filter</key>
<value>
<name>File Filter</name>
</value>
</entry>
<entry>
<key>Path Filter</key>
<value>
<name>Path Filter</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Keep Source File</key>
<value>
<name>Keep Source File</name>
</value>
</entry>
<entry>
<key>Recurse Subdirectories</key>
<value>
<name>Recurse Subdirectories</name>
</value>
</entry>
<entry>
<key>Polling Interval</key>
<value>
<name>Polling Interval</name>
</value>
</entry>
<entry>
<key>Ignore Hidden Files</key>
<value>
<name>Ignore Hidden Files</name>
</value>
</entry>
<entry>
<key>Minimum File Age</key>
<value>
<name>Minimum File Age</name>
</value>
</entry>
<entry>
<key>Maximum File Age</key>
<value>
<name>Maximum File Age</name>
</value>
</entry>
<entry>
<key>Minimum File Size</key>
<value>
<name>Minimum File Size</name>
</value>
</entry>
<entry>
<key>Maximum File Size</key>
<value>
<name>Maximum File Size</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Input Directory</key>
<value>/tmp/input</value>
</entry>
<entry>
<key>File Filter</key>
<value>[^\.].*</value>
</entry>
<entry>
<key>Path Filter</key>
</entry>
<entry>
<key>Batch Size</key>
<value>10</value>
</entry>
<entry>
<key>Keep Source File</key>
<value>false</value>
</entry>
<entry>
<key>Recurse Subdirectories</key>
<value>true</value>
</entry>
<entry>
<key>Polling Interval</key>
<value>0 sec</value>
</entry>
<entry>
<key>Ignore Hidden Files</key>
<value>true</value>
</entry>
<entry>
<key>Minimum File Age</key>
<value>0 sec</value>
</entry>
<entry>
<key>Maximum File Age</key>
</entry>
<entry>
<key>Minimum File Size</key>
<value>0 B</value>
</entry>
<entry>
<key>Maximum File Size</key>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GetFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.GetFile</type>
</processors>
<processors>
<id>cb1aa26f-0116-3def-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<position>
<x>750.4311227798462</x>
<y>808.7906746005999</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>log-level</key>
<value>
<name>log-level</name>
</value>
</entry>
<entry>
<key>log-prefix</key>
<value>
<name>log-prefix</name>
</value>
</entry>
<entry>
<key>log-message</key>
<value>
<name>log-message</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>log-level</key>
<value>error</value>
</entry>
<entry>
<key>log-prefix</key>
</entry>
<entry>
<key>log-message</key>
<value>parsing of ${filename} was invalid. Body: ${mycontent}</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>Log JSON is invalid</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogMessage</type>
</processors>
<processors>
<id>43196d5d-8337-3bc7-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<position>
<x>480.37048903805</x>
<y>309.0747289815986</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>record-reader</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordReaderFactory</identifiesControllerService>
<name>record-reader</name>
</value>
</entry>
<entry>
<key>record-writer</key>
<value>
<identifiesControllerService>org.apache.nifi.serialization.RecordSetWriterFactory</identifiesControllerService>
<name>record-writer</name>
</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>
<name>schema-access-strategy</name>
</value>
</entry>
<entry>
<key>schema-registry</key>
<value>
<identifiesControllerService>org.apache.nifi.schemaregistry.services.SchemaRegistry</identifiesControllerService>
<name>schema-registry</name>
</value>
</entry>
<entry>
<key>schema-name</key>
<value>
<name>schema-name</name>
</value>
</entry>
<entry>
<key>schema-text</key>
<value>
<name>schema-text</name>
</value>
</entry>
<entry>
<key>allow-extra-fields</key>
<value>
<name>allow-extra-fields</name>
</value>
</entry>
<entry>
<key>strict-type-checking</key>
<value>
<name>strict-type-checking</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>record-reader</key>
<value>0f43c7e4-c275-3ebd-0000-000000000000</value>
</entry>
<entry>
<key>record-writer</key>
<value>9b1539c7-a2bf-38d4-0000-000000000000</value>
</entry>
<entry>
<key>schema-access-strategy</key>
<value>reader-schema</value>
</entry>
<entry>
<key>schema-registry</key>
</entry>
<entry>
<key>schema-name</key>
<value>${schema.name}</value>
</entry>
<entry>
<key>schema-text</key>
<value>${avro.schema}</value>
</entry>
<entry>
<key>allow-extra-fields</key>
<value>false</value>
</entry>
<entry>
<key>strict-type-checking</key>
<value>true</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>ValidateRecord</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>failure</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>invalid</name>
</relationships>
<relationships>
<autoTerminate>false</autoTerminate>
<name>valid</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.ValidateRecord</type>
</processors>
<processors>
<id>580f3309-656f-3450-0000-000000000000</id>
<parentGroupId>94076eff-d950-3ea7-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>752.3893074130999</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>log-level</key>
<value>
<name>log-level</name>
</value>
</entry>
<entry>
<key>log-prefix</key>
<value>
<name>log-prefix</name>
</value>
</entry>
<entry>
<key>log-message</key>
<value>
<name>log-message</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>log-level</key>
<value>error</value>
</entry>
<entry>
<key>log-prefix</key>
</entry>
<entry>
<key>log-message</key>
<value>parsing of ${filename} was failed</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogMessage</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>RUNNING</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogMessage</type>
</processors>
</snippet>
<timestamp>12/06/2017 11:19:59 CET</timestamp>
</template>