This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git
The following commit(s) were added to refs/heads/master by this push:
new 8f8a569 faster flattening for non-existent paths (#6654)
8f8a569 is described below
commit 8f8a569aa2e8589f33f256b9d660a4a4e44403cf
Author: Clint Wylie <[email protected]>
AuthorDate: Tue Nov 27 14:14:11 2018 -0800
faster flattening for non-existent paths (#6654)
* faster flattening for non-existent properties to circumvent upstream
json-path issue
* fix json provider
* revert to using null instead of undefined
---
.../impl/FastJacksonJsonNodeJsonProvider.java | 70 ++++++++++++++++++++++
.../util/common/parsers/JSONFlattenerMaker.java | 8 ++-
.../druid/data/input/impl/JSONParseSpecTest.java | 2 +
.../druid/data/input/avro/AvroFlattenerMaker.java | 4 ++
.../data/input/avro/GenericAvroJsonProvider.java | 11 +++-
.../parquet/simple/ParquetGroupFlattenerMaker.java | 3 +-
.../parquet/simple/ParquetGroupJsonProvider.java | 11 +++-
7 files changed, 99 insertions(+), 10 deletions(-)
diff --git
a/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java
b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java
new file mode 100644
index 0000000..a98999a
--- /dev/null
+++
b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.data.input.impl;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
+
+import java.util.Collection;
+import java.util.Collections;
+
+//
+//
+
+/**
+ * Custom json-path JsonProvider override to circumvent slow performance when
encountering null paths as described in
+ * https://github.com/json-path/JsonPath/issues/396
+ *
+ * Note that this only avoids errors for map properties, avoiding the
exception on array paths is not possible without
+ * patching json-path itself
+ */
+public class FastJacksonJsonNodeJsonProvider extends
JacksonJsonNodeJsonProvider
+{
+ @Override
+ public boolean isMap(Object obj)
+ {
+ return obj == null || super.isMap(obj);
+ }
+
+ @Override
+ public Object getMapValue(Object obj, String key)
+ {
+ if (obj == null) {
+ return null;
+ } else {
+ ObjectNode jsonObject = (ObjectNode) obj;
+ Object o = jsonObject.get(key);
+ if (!jsonObject.has(key)) {
+ return null;
+ } else {
+ return unwrap(o);
+ }
+ }
+ }
+
+ @Override
+ public Collection<String> getPropertyKeys(final Object o)
+ {
+ if (o == null) {
+ return Collections.emptySet();
+ }
+ return super.getPropertyKeys(o);
+ }
+}
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
index d16280a..8d58451 100644
---
a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
+++
b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java
@@ -24,10 +24,10 @@ import com.google.common.collect.FluentIterable;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
-import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import net.thisptr.jackson.jq.JsonQuery;
import net.thisptr.jackson.jq.exception.JsonQueryException;
+import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider;
import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
@@ -45,7 +45,7 @@ public class JSONFlattenerMaker implements
ObjectFlatteners.FlattenerMaker<JsonN
{
private static final Configuration JSONPATH_CONFIGURATION =
Configuration.builder()
- .jsonProvider(new JacksonJsonNodeJsonProvider())
+ .jsonProvider(new FastJacksonJsonNodeJsonProvider())
.mappingProvider(new JacksonMappingProvider())
.options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS))
.build();
@@ -119,7 +119,9 @@ public class JSONFlattenerMaker implements
ObjectFlatteners.FlattenerMaker<JsonN
if (val.isArray()) {
List<Object> newList = new ArrayList<>();
for (JsonNode entry : val) {
- newList.add(valueConversionFunction(entry));
+ if (!entry.isNull()) {
+ newList.add(valueConversionFunction(entry));
+ }
}
return newList;
}
diff --git
a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
index 7168076..4dd13b6 100644
--- a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
+++ b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java
@@ -91,6 +91,7 @@ public class JSONParseSpecTest
true,
ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo",
"$.[?(@.maybe_object)].maybe_object.foo.test"),
+ new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz",
"$.maybe_object_2.foo.test"),
new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar",
"$.[?(@.something_else)].something_else.foo")
)
),
@@ -99,6 +100,7 @@ public class JSONParseSpecTest
final Map<String, Object> expected = new HashMap<>();
expected.put("foo", new ArrayList());
+ expected.put("baz", null);
expected.put("bar", Collections.singletonList("test"));
final Parser<String, Object> parser = parseSpec.makeParser();
diff --git
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
index facdaa0..0b55d76 100644
---
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
+++
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
@@ -34,6 +34,7 @@ import
org.apache.druid.java.util.common.parsers.ObjectFlatteners;
import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.List;
+import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -140,6 +141,9 @@ public class AvroFlattenerMaker implements
ObjectFlatteners.FlattenerMaker<Gener
if (field instanceof Utf8) {
return field.toString();
}
+ if (field instanceof List) {
+ return ((List)
field).stream().filter(Objects::nonNull).collect(Collectors.toList());
+ }
return field;
}
}
diff --git
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
index eb41632..4beb757 100644
---
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
+++
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/GenericAvroJsonProvider.java
@@ -27,6 +27,7 @@ import org.apache.avro.generic.GenericRecord;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -98,7 +99,9 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public Collection<String> getPropertyKeys(final Object o)
{
- if (o instanceof Map) {
+ if (o == null) {
+ return Collections.emptySet();
+ } else if (o instanceof Map) {
return ((Map<Object, Object>)
o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
} else if (o instanceof GenericRecord) {
return ((GenericRecord)
o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet());
@@ -138,7 +141,9 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public Object getMapValue(final Object o, final String s)
{
- if (o instanceof GenericRecord) {
+ if (o == null) {
+ return null;
+ } else if (o instanceof GenericRecord) {
return ((GenericRecord) o).get(s);
} else if (o instanceof Map) {
return ((Map) o).get(s);
@@ -172,7 +177,7 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public boolean isMap(final Object o)
{
- return o instanceof Map || o instanceof GenericRecord;
+ return o == null || o instanceof Map || o instanceof GenericRecord;
}
@Override
diff --git
a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
index cd2612e..2ba939b 100644
---
a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
+++
b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java
@@ -30,6 +30,7 @@ import org.apache.parquet.schema.Type;
import javax.annotation.Nullable;
import java.util.EnumSet;
import java.util.List;
+import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@@ -100,7 +101,7 @@ public class ParquetGroupFlattenerMaker implements
ObjectFlatteners.FlattenerMak
if (ParquetGroupConverter.isWrappedListPrimitive(o)) {
return converter.unwrapListPrimitive(o);
} else if (o instanceof List) {
- List<Object> asList = (List<Object>) o;
+ List<Object> asList = ((List<Object>)
o).stream().filter(Objects::nonNull).collect(Collectors.toList());
if
(asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) {
return
asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList());
}
diff --git
a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
index 78e3f33..b422e97 100644
---
a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
+++
b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java
@@ -26,6 +26,7 @@ import org.apache.parquet.example.data.Group;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -64,7 +65,7 @@ public class ParquetGroupJsonProvider implements JsonProvider
@Override
public boolean isMap(final Object o)
{
- return o instanceof Map || o instanceof Group;
+ return o == null || o instanceof Map || o instanceof Group;
}
@Override
@@ -93,7 +94,9 @@ public class ParquetGroupJsonProvider implements JsonProvider
@Override
public Collection<String> getPropertyKeys(final Object o)
{
- if (o instanceof Map) {
+ if (o == null) {
+ return Collections.emptySet();
+ } else if (o instanceof Map) {
return ((Map<Object, Object>)
o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
} else if (o instanceof Group) {
return ((Group) o).getType().getFields().stream().map(f ->
f.getName()).collect(Collectors.toSet());
@@ -105,7 +108,9 @@ public class ParquetGroupJsonProvider implements
JsonProvider
@Override
public Object getMapValue(final Object o, final String s)
{
- if (o instanceof Map) {
+ if (o == null) {
+ return null;
+ } else if (o instanceof Map) {
return ((Map) o).get(s);
} else if (o instanceof Group) {
Group g = (Group) o;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]