This is an automated email from the ASF dual-hosted git repository.
mattyb149 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new 2895bac NIFI-8512: When converting to/from Avro UNION type, we can be
more efficient when the UNION consists of a Null type and one other type by
determinine the non-null type and just using that. Also eliminated a call to
List.stream() and related .collect() call by using an existing method that
performs the logic without the very expensive call to stream()
2895bac is described below
commit 2895bac2c088aa66c8b86ecba680f80e1323105a
Author: Mark Payne <[email protected]>
AuthorDate: Mon May 3 17:49:36 2021 -0400
NIFI-8512: When converting to/from Avro UNION type, we can be more
efficient when the UNION consists of a Null type and one other type by
determinine the non-null type and just using that. Also eliminated a call to
List.stream() and related .collect() call by using an existing method that
performs the logic without the very expensive call to stream()
Signed-off-by: Matthew Burgess <[email protected]>
This closes #5051
---
.../src/main/java/org/apache/nifi/avro/AvroTypeUtil.java | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java
index 024f1b5..3eb26e3 100644
---
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java
+++
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-avro-record-utils/src/main/java/org/apache/nifi/avro/AvroTypeUtil.java
@@ -76,7 +76,6 @@ import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
-import java.util.stream.Collectors;
public class AvroTypeUtil {
private static final Logger logger =
LoggerFactory.getLogger(AvroTypeUtil.class);
@@ -900,10 +899,19 @@ public class AvroTypeUtil {
private static Object convertUnionFieldValue(final Object originalValue,
final Schema fieldSchema, final Function<Schema, Object> conversion, final
String fieldName) {
boolean foundNonNull = false;
+ // It is an extremely common case to have a UNION type because a field
can be NULL or some other type. In this situation,
+ // we will have two possible types, and one of them will be null. When
this happens, we can be much more efficient by simply
+ // determining the non-null type and converting to that.
+ final List<Schema> schemaTypes = fieldSchema.getTypes();
+ if (schemaTypes.size() == 2 && (schemaTypes.get(0).getType() ==
Type.NULL || schemaTypes.get(1).getType() == Type.NULL)) {
+ final Schema nonNullType = schemaTypes.get(0).getType() ==
Type.NULL ? schemaTypes.get(1) : schemaTypes.get(0);
+ return conversion.apply(nonNullType);
+ }
+
Optional<Schema> mostSuitableType = DataTypeUtils.findMostSuitableType(
originalValue,
- fieldSchema.getTypes().stream().filter(schema ->
schema.getType() != Type.NULL).collect(Collectors.toList()),
- subSchema -> AvroTypeUtil.determineDataType(subSchema)
+ getNonNullSubSchemas(fieldSchema),
+ AvroTypeUtil::determineDataType
);
if (mostSuitableType.isPresent()) {
return conversion.apply(mostSuitableType.get());