This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c2c887a allow to extract values from array of objects with
jsonPathArray (#7208)
c2c887a is described below
commit c2c887afe2445d61f97130e82b6fcaa5019d5174
Author: Xiaobing <[email protected]>
AuthorDate: Wed Jul 28 12:33:58 2021 -0700
allow to extract values from array of objects with jsonPathArray (#7208)
Allow to extract values from array of objects with jsonPathArray.
And add jsonPathArrayDefaultEmpty to allow skip json records without target
path, instead of aborting the ingestion.
---
.../common/function/scalar/JsonFunctions.java | 48 ++++++++-
.../pinot/common/function/JsonFunctionsTest.java | 78 ++++++++++++++
.../scalar/ArrayAwareJacksonJsonProviderTest.java | 114 +++++++++++++++++++++
3 files changed, 236 insertions(+), 4 deletions(-)
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
index 37027ff..82536d6 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/JsonFunctions.java
@@ -19,6 +19,7 @@
package org.apache.pinot.common.function.scalar;
import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.annotations.VisibleForTesting;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
@@ -26,6 +27,7 @@ import com.jayway.jsonpath.spi.json.JacksonJsonProvider;
import com.jayway.jsonpath.spi.json.JsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import com.jayway.jsonpath.spi.mapper.MappingProvider;
+import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
@@ -48,7 +50,7 @@ import org.apache.pinot.spi.utils.JsonUtils;
public class JsonFunctions {
static {
Configuration.setDefaults(new Configuration.Defaults() {
- private final JsonProvider jsonProvider = new JacksonJsonProvider();
+ private final JsonProvider jsonProvider = new
ArrayAwareJacksonJsonProvider();
private final MappingProvider mappingProvider = new
JacksonMappingProvider();
@Override
@@ -109,12 +111,18 @@ public class JsonFunctions {
if (object instanceof String) {
return convertObjectToArray(JsonPath.read((String) object, jsonPath));
}
- if (object instanceof Object[]) {
- return
convertObjectToArray(JsonPath.read(JsonUtils.objectToString(object), jsonPath));
- }
return convertObjectToArray(JsonPath.read(object, jsonPath));
}
+ @ScalarFunction
+ public static Object[] jsonPathArrayDefaultEmpty(Object object, String
jsonPath) {
+ try {
+ return jsonPathArray(object, jsonPath);
+ } catch (Exception e) {
+ return new Object[0];
+ }
+ }
+
private static Object[] convertObjectToArray(Object arrayObject) {
if (arrayObject instanceof List) {
return ((List) arrayObject).toArray();
@@ -199,4 +207,36 @@ public class JsonFunctions {
return defaultValue;
}
}
+
+ @VisibleForTesting
+ static class ArrayAwareJacksonJsonProvider extends JacksonJsonProvider {
+ @Override
+ public boolean isArray(Object obj) {
+ return (obj instanceof List) || (obj instanceof Object[]);
+ }
+
+ @Override
+ public Object getArrayIndex(Object obj, int idx) {
+ if (obj instanceof Object[]) {
+ return ((Object[]) obj)[idx];
+ }
+ return super.getArrayIndex(obj, idx);
+ }
+
+ @Override
+ public int length(Object obj) {
+ if (obj instanceof Object[]) {
+ return ((Object[]) obj).length;
+ }
+ return super.length(obj);
+ }
+
+ @Override
+ public Iterable<?> toIterable(Object obj) {
+ if (obj instanceof Object[]) {
+ return Arrays.asList((Object[]) obj);
+ }
+ return super.toIterable(obj);
+ }
+ }
}
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
index cbfe2e4..47f56d5 100644
---
a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
+++
b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java
@@ -19,7 +19,9 @@
package org.apache.pinot.common.function;
import com.fasterxml.jackson.core.JsonProcessingException;
+import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.jayway.jsonpath.PathNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -28,6 +30,7 @@ import org.apache.pinot.common.function.scalar.JsonFunctions;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
public class JsonFunctionsTest {
@@ -95,6 +98,81 @@ public class JsonFunctionsTest {
}
@Test
+ public void testJsonFunctionExtractingArrayWithMissingField()
+ throws JsonProcessingException {
+ String jsonString = "{\"name\": \"Pete\", \"age\": 24}";
+
+ try {
+ assertEquals(JsonFunctions.jsonPathArray(jsonString,
"$.subjects[*].name"), new String[]{});
+ fail();
+ } catch (PathNotFoundException e) {
+ assertEquals(e.getMessage(), "Missing property in path $['subjects']");
+ }
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].name"), new String[]{});
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].grade"), new String[]{});
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].homework_grades"), new Object[]{});
+
+ // jsonPathArrayDefaultEmpty should work fine with existing fields.
+ jsonString = "{\n" +
+ " \"name\": \"Pete\",\n" +
+ " \"age\": 24,\n" +
+ " \"subjects\": [\n" +
+ " {\n" +
+ " \"name\": \"maths\",\n" +
+ " \"homework_grades\": [80, 85, 90, 95, 100],\n" +
+ " \"grade\": \"A\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"name\": \"english\",\n" +
+ " \"homework_grades\": [60, 65, 70, 85, 90],\n" +
+ " \"grade\": \"B\"\n" +
+ " }\n" +
+ " ]\n" +
+ "}";
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].name"), new String[]{"maths", "english"});
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].grade"), new String[]{"A", "B"});
+ assertEquals(JsonFunctions.jsonPathArrayDefaultEmpty(jsonString,
"$.subjects[*].homework_grades"),
+ new Object[]{Arrays.asList(80, 85, 90, 95, 100), Arrays.asList(60, 65,
70, 85, 90)});
+ }
+
+ @Test
+ public void testJsonFunctionExtractingArrayWithObjectArray()
+ throws JsonProcessingException {
+ // ImmutableList works fine with JsonPath with default
JacksonJsonProvider. But on ingestion
+ // path, JSONRecordExtractor converts all Collections in parsed JSON
object to Object[].
+ // Object[] doesn't work with default JsonPath, where "$.commits[*].sha"
would return empty,
+ // and "$.commits[1].sha" led to exception `Filter: [1]['sha'] can only be
applied to arrays`.
+ // Those failure could be reproduced by using the default
JacksonJsonProvider for JsonPath.
+ Map<String, Object> rawData = ImmutableMap.of("commits",
+ ImmutableList.of(ImmutableMap.of("sha", 123, "name", "k"),
ImmutableMap.of("sha", 456, "name", "j")));
+ assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new
Integer[]{123, 456});
+ assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new
Integer[]{456});
+
+ // ArrayAwareJacksonJsonProvider should fix this issue.
+ rawData = ImmutableMap.of("commits",
+ new Object[]{ImmutableMap.of("sha", 123, "name", "k"),
ImmutableMap.of("sha", 456, "name", "j")});
+ assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[*].sha"), new
Integer[]{123, 456});
+ assertEquals(JsonFunctions.jsonPathArray(rawData, "$.commits[1].sha"), new
Integer[]{456});
+ }
+
+ @Test
+ public void testJsonFunctionExtractingArrayWithTopLevelObjectArray()
+ throws JsonProcessingException {
+ // JSON formatted string works fine with JsonPath, and we used to
serialize Object[]
+ // to JSON formatted string for JsonPath to work.
+ String rawDataInStr = "[{\"sha\": 123, \"name\": \"k\"}, {\"sha\": 456,
\"name\": \"j\"}]";
+ assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[*].sha"), new
Integer[]{123, 456});
+ assertEquals(JsonFunctions.jsonPathArray(rawDataInStr, "$.[1].sha"), new
Integer[]{456});
+
+ // ArrayAwareJacksonJsonProvider can work with Array directly, thus no
need to serialize
+ // Object[] any more.
+ Object[] rawDataInAry =
+ new Object[]{ImmutableMap.of("sha", 123, "name", "kk"),
ImmutableMap.of("sha", 456, "name", "jj")};
+ assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[*].sha"), new
Integer[]{123, 456});
+ assertEquals(JsonFunctions.jsonPathArray(rawDataInAry, "$.[1].sha"), new
Integer[]{456});
+ }
+
+ @Test
public void testJsonFunctionOnJsonArray()
throws JsonProcessingException {
String jsonArrayString =
diff --git
a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
new file mode 100644
index 0000000..3f4b71e
--- /dev/null
+++
b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/ArrayAwareJacksonJsonProviderTest.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.function.scalar;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.*;
+
+
+public class ArrayAwareJacksonJsonProviderTest {
+ @Test
+ public void testIsArray() {
+ JsonFunctions.ArrayAwareJacksonJsonProvider jp = new
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+ assertFalse(jp.isArray(null));
+ assertTrue(jp.isArray(new Object[0]));
+ assertTrue(jp.isArray(new ArrayList<>(0)));
+ assertFalse(jp.isArray("I'm a list"));
+ assertFalse(jp.isArray(new HashMap<String, String>()));
+ }
+
+ @Test
+ public void testArrayLength() {
+ JsonFunctions.ArrayAwareJacksonJsonProvider jp = new
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+ Object[] dataInAry = new Object[]{"123", "456"};
+ assertEquals(jp.length(dataInAry), 2);
+
+ List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+ assertEquals(jp.length(dataInList), 3);
+
+ try {
+ jp.length(null);
+ fail();
+ } catch (NullPointerException e) {
+ // It's supposed to get a JsonPathException, but JsonPath library
actually
+ // has a bug leading to NullPointerException while creating the
JsonPathException.
+ assertNull(e.getMessage());
+ }
+ }
+
+ @Test
+ public void testGetArrayIndex() {
+ JsonFunctions.ArrayAwareJacksonJsonProvider jp = new
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+ Object[] dataInAry = new Object[]{"123", "456"};
+ for (int i = 0; i < dataInAry.length; i++) {
+ assertEquals(jp.getArrayIndex(dataInAry, i), dataInAry[i]);
+ }
+ try {
+ jp.getArrayIndex(dataInAry, dataInAry.length);
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ assertEquals(e.getMessage(), "Index 2 out of bounds for length 2");
+ }
+
+ List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+ for (int i = 0; i < dataInList.size(); i++) {
+ assertEquals(jp.getArrayIndex(dataInList, i), dataInList.get(i));
+ }
+ try {
+ jp.getArrayIndex(dataInList, dataInList.size());
+ fail();
+ } catch (IndexOutOfBoundsException e) {
+ assertEquals(e.getMessage(), "Index 3 out of bounds for length 3");
+ }
+ }
+
+ @Test
+ public void testToIterable() {
+ JsonFunctions.ArrayAwareJacksonJsonProvider jp = new
JsonFunctions.ArrayAwareJacksonJsonProvider();
+
+ Object[] dataInAry = new Object[]{"123", "456"};
+ int idx = 0;
+ for (Object v : jp.toIterable(dataInAry)) {
+ assertEquals(v, dataInAry[idx++]);
+ }
+
+ List<Object> dataInList = Arrays.asList("abc", "efg", "hij");
+ idx = 0;
+ for (Object v : jp.toIterable(dataInList)) {
+ assertEquals(v, dataInList.get(idx++));
+ }
+
+ try {
+ jp.toIterable(null);
+ fail();
+ } catch (NullPointerException e) {
+ // It's supposed to get a JsonPathException, but JsonPath library
actually
+ // has a bug leading to NullPointerException while creating the
JsonPathException.
+ assertNull(e.getMessage());
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]