gianm closed pull request #6654: faster flattening for non-existent paths URL: https://github.com/apache/incubator-druid/pull/6654
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java new file mode 100644 index 00000000000..a98999a1b72 --- /dev/null +++ b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.data.input.impl; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider; + +import java.util.Collection; +import java.util.Collections; + +// +// + +/** + * Custom json-path JsonProvider override to circumvent slow performance when encountering null paths as described in + * https://github.com/json-path/JsonPath/issues/396 + * + * Note that this only avoids errors for map properties, avoiding the exception on array paths is not possible without + * patching json-path itself + */ +public class FastJacksonJsonNodeJsonProvider extends JacksonJsonNodeJsonProvider +{ + @Override + public boolean isMap(Object obj) + { + return obj == null || super.isMap(obj); + } + + @Override + public Object getMapValue(Object obj, String key) + { + if (obj == null) { + return null; + } else { + ObjectNode jsonObject = (ObjectNode) obj; + Object o = jsonObject.get(key); + if (!jsonObject.has(key)) { + return null; + } else { + return unwrap(o); + } + } + } + + @Override + public Collection<String> getPropertyKeys(final Object o) + { + if (o == null) { + return Collections.emptySet(); + } + return super.getPropertyKeys(o); + } +} diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index d16280a8831..8d58451f34b 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -24,10 +24,10 @@ import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.Option; -import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import net.thisptr.jackson.jq.JsonQuery; import net.thisptr.jackson.jq.exception.JsonQueryException; +import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider; import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nullable; @@ -45,7 +45,7 @@ { private static final Configuration JSONPATH_CONFIGURATION = Configuration.builder() - .jsonProvider(new JacksonJsonNodeJsonProvider()) + .jsonProvider(new FastJacksonJsonNodeJsonProvider()) .mappingProvider(new JacksonMappingProvider()) .options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS)) .build(); @@ -119,7 +119,9 @@ private Object valueConversionFunction(JsonNode val) if (val.isArray()) { List<Object> newList = new ArrayList<>(); for (JsonNode entry : val) { - newList.add(valueConversionFunction(entry)); + if (!entry.isNull()) { + newList.add(valueConversionFunction(entry)); + } } return newList; } diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java index 7168076b265..4dd13b601a6 100644 --- a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java +++ b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java @@ -91,6 +91,7 @@ public void testParseRowWithConditional() true, ImmutableList.of( new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo") ) ), @@ -99,6 +100,7 @@ public void testParseRowWithConditional() final Map<String, Object> expected = new HashMap<>(); expected.put("foo", new ArrayList()); + expected.put("baz", null); expected.put("bar", Collections.singletonList("test")); final Parser<String, Object> parser = parseSpec.makeParser(); diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java index facdaa043a7..0b55d762f00 100644 --- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java +++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java @@ -34,6 +34,7 @@ import java.nio.ByteBuffer; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -140,6 +141,9 @@ private Object transformValue(final Object field) if (field instanceof Utf8) { return field.toString(); } + if (field instanceof List) { + return ((List) field).stream().filter(Objects::nonNull).collect(Collectors.toList()); + } return field; } } diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java index eb41632fee8..4beb7578dcd 100644 --- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java +++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java @@ -27,6 +27,7 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -98,7 +99,9 @@ public int length(final Object o) @Override public Collection<String> getPropertyKeys(final Object o) { - if (o instanceof Map) { + if (o == null) { + return Collections.emptySet(); + } else if (o instanceof Map) { return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet()); } else if (o instanceof GenericRecord) { return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet()); @@ -138,7 +141,9 @@ public void setArrayIndex(final Object o, final int i, final Object o1) @Override public Object getMapValue(final Object o, final String s) { - if (o instanceof GenericRecord) { + if (o == null) { + return null; + } else if (o instanceof GenericRecord) { return ((GenericRecord) o).get(s); } else if (o instanceof Map) { return ((Map) o).get(s); @@ -172,7 +177,7 @@ public void removeProperty(final Object o, final Object o1) @Override public boolean isMap(final Object o) { - return o instanceof Map || o instanceof GenericRecord; + return o == null || o instanceof Map || o instanceof GenericRecord; } @Override diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java index cd2612ebd01..2ba939b162a 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java @@ -30,6 +30,7 @@ import javax.annotation.Nullable; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -100,7 +101,7 @@ private Object finalizeConversion(Object o) if (ParquetGroupConverter.isWrappedListPrimitive(o)) { return converter.unwrapListPrimitive(o); } else if (o instanceof List) { - List<Object> asList = (List<Object>) o; + List<Object> asList = ((List<Object>) o).stream().filter(Objects::nonNull).collect(Collectors.toList()); if (asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) { return asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList()); } diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java index 78e3f3355f7..b422e97d7ed 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java @@ -26,6 +26,7 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -64,7 +65,7 @@ public boolean isArray(final Object o) @Override public boolean isMap(final Object o) { - return o instanceof Map || o instanceof Group; + return o == null || o instanceof Map || o instanceof Group; } @Override @@ -93,7 +94,9 @@ public int length(final Object o) @Override public Collection<String> getPropertyKeys(final Object o) { - if (o instanceof Map) { + if (o == null) { + return Collections.emptySet(); + } else if (o instanceof Map) { return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet()); } else if (o instanceof Group) { return ((Group) o).getType().getFields().stream().map(f -> f.getName()).collect(Collectors.toSet()); @@ -105,7 +108,9 @@ public int length(final Object o) @Override public Object getMapValue(final Object o, final String s) { - if (o instanceof Map) { + if (o == null) { + return null; + } else if (o instanceof Map) { return ((Map) o).get(s); } else if (o instanceof Group) { Group g = (Group) o; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
