This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new e25ba00470 fix bug in ObjectFlatteners.toMap which caused null values
in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in
web-console sampler UI (#12785)
e25ba00470 is described below
commit e25ba00470c17e6ff50c4d70d96b1c7ba0a1f200
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jul 14 16:52:01 2022 -0700
fix bug in ObjectFlatteners.toMap which caused null values in
avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in
web-console sampler UI (#12785)
* fix bug in ObjectFlatteners.toMap which caused null values in
avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null
* fix parquet test that expected wrong behavior, my bad heh
---
.../apache/druid/data/input/impl/JsonReader.java | 2 +-
.../java/util/common/parsers/ObjectFlattener.java | 15 ++++++
.../java/util/common/parsers/ObjectFlatteners.java | 4 +-
.../util/common/parsers/ObjectFlattenersTest.java | 61 ++++++++++++++++++++++
.../input/parquet/CompatParquetReaderTest.java | 22 ++++----
5 files changed, 91 insertions(+), 13 deletions(-)
diff --git
a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
index 8d0f6678e3..8dee12dc30 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
@@ -47,7 +47,7 @@ import java.util.List;
import java.util.Map;
/**
- * In constract to {@link JsonLineReader} which processes input text line by
line independently,
+ * In contrast to {@link JsonLineReader} which processes input text line by
line independently,
* this class tries to parse the input text as a whole to an array of objects.
*
* The input text can be:
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
index 36e7ca34ac..19b702d83e 100644
---
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
+++
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
@@ -23,7 +23,22 @@ import java.util.Map;
public interface ObjectFlattener<T>
{
+ /**
+ * Transforms an input row object into a {@link Map}, likely based on the
instructions in some {@link JSONPathSpec}.
+ *
+ * This method is used in normal ingestion to extract values into a map to
translate into an
+ * {@link org.apache.druid.data.input.InputRow}
+ */
Map<String, Object> flatten(T obj);
+ /**
+ * Completely transforms an input row into a {@link Map}, including
translating all nested structure into plain java
+ * objects such as {@link Map} and {@link java.util.List}. This method
should translate everything as much as
+ * possible, ignoring any instructions in {@link JSONPathSpec} which might
otherwise limit the amount of
+ * transformation done.
+ *
+ * This method is used by the ingestion "sampler" to provide a "raw" JSON
form of the original input data, regardless
+ * of actual format, so that it can use "inline" JSON datasources and reduce
sampling overhead.
+ */
Map<String, Object> toMap(T obj);
}
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
index 78fd114871..77ae467e85 100644
---
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
+++
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
@@ -251,7 +251,9 @@ public class ObjectFlatteners
Map<String, Object> actualMap = new HashMap<>();
for (String key : jsonProvider.getPropertyKeys(o)) {
Object field = jsonProvider.getMapValue(o, key);
- if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
+ if (field == null) {
+ actualMap.put(key, null);
+ } else if (jsonProvider.isMap(field) || jsonProvider.isArray(field))
{
actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
} else {
actualMap.put(key, finalizeConversionForMap(field));
diff --git
a/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
new file mode 100644
index 0000000000..e48c4dafe8
--- /dev/null
+++
b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.java.util.common.parsers;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Map;
+
+public class ObjectFlattenersTest
+{
+ private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}";
+ private static final ObjectFlattener FLATTENER = ObjectFlatteners.create(
+ new JSONPathSpec(
+ true,
+ ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH,
"extract", "$.bar"))
+ ),
+ new JSONFlattenerMaker(true)
+ );
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ @Test
+ public void testFlatten() throws JsonProcessingException
+ {
+ JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
+ Map<String, Object> flat = FLATTENER.flatten(node);
+ Assert.assertNull(flat.get("foo"));
+ Assert.assertEquals(1L, flat.get("bar"));
+ Assert.assertEquals(1L, flat.get("extract"));
+ }
+
+ @Test
+ public void testToMap() throws JsonProcessingException
+ {
+ JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
+ Map<String, Object> flat = FLATTENER.toMap(node);
+ Assert.assertNull(flat.get("foo"));
+ Assert.assertEquals(1, flat.get("bar"));
+ }
+}
diff --git
a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
index 60173212b5..f94949d6bd 100644
---
a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
+++
b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
@@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends
BaseParquetReaderTest
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n"
+ " \"enumColumn\" : \"SPADES\",\n"
- + " \"maybeStringColumn\" : { },\n"
- + " \"maybeBinaryColumn\" : { },\n"
+ + " \"maybeStringColumn\" : null,\n"
+ + " \"maybeBinaryColumn\" : null,\n"
+ " \"shortColumn\" : 1,\n"
+ " \"byteColumn\" : 0,\n"
- + " \"maybeBoolColumn\" : { },\n"
+ + " \"maybeBoolColumn\" : null,\n"
+ " \"intColumn\" : 2,\n"
+ " \"doubleColumn\" : 0.2,\n"
- + " \"maybeByteColumn\" : { },\n"
+ + " \"maybeByteColumn\" : null,\n"
+ " \"intSetColumn\" : [ 0 ],\n"
+ " \"boolColumn\" : true,\n"
+ " \"binaryColumn\" : \"val_0\",\n"
- + " \"maybeIntColumn\" : { },\n"
+ + " \"maybeIntColumn\" : null,\n"
+ " \"intToStringColumn\" : {\n"
+ " \"0\" : \"val_0\",\n"
+ " \"1\" : \"val_1\",\n"
+ " \"2\" : \"val_2\"\n"
+ " },\n"
- + " \"maybeDoubleColumn\" : { },\n"
- + " \"maybeEnumColumn\" : { },\n"
- + " \"maybeLongColumn\" : { },\n"
+ + " \"maybeDoubleColumn\" : null,\n"
+ + " \"maybeEnumColumn\" : null,\n"
+ + " \"maybeLongColumn\" : null,\n"
+ " \"stringsColumn\" : [ \"arr_0\",
\"arr_1\", \"arr_2\" ],\n"
+ " \"longColumn\" : 0,\n"
+ " \"stringColumn\" : \"val_0\",\n"
- + " \"maybeShortColumn\" : { },\n"
+ + " \"maybeShortColumn\" : null,\n"
+ " \"complexColumn\" : {\n"
+ " \"0\" : [ {\n"
+ " \"nestedStringColumn\" : \"val_0\",\n"
@@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends
BaseParquetReaderTest
);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n"
- + " \"optionalMessage\" : { },\n"
+ + " \"optionalMessage\" : null,\n"
+ " \"requiredPrimitive\" : 9,\n"
- + " \"repeatedPrimitive\" : { },\n"
+ + " \"repeatedPrimitive\" : null,\n"
+ " \"repeatedMessage\" : [ 9, 10 ],\n"
+ " \"optionalPrimitive\" : 10,\n"
+ " \"requiredMessage\" : {\n"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]