This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new e25ba00470 fix bug in ObjectFlatteners.toMap which caused null values 
in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in 
web-console sampler UI (#12785)
e25ba00470 is described below

commit e25ba00470c17e6ff50c4d70d96b1c7ba0a1f200
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jul 14 16:52:01 2022 -0700

    fix bug in ObjectFlatteners.toMap which caused null values in 
avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in 
web-console sampler UI (#12785)
    
    * fix bug in ObjectFlatteners.toMap which caused null values in 
avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null
    * fix parquet test that expected wrong behavior, my bad heh
---
 .../apache/druid/data/input/impl/JsonReader.java   |  2 +-
 .../java/util/common/parsers/ObjectFlattener.java  | 15 ++++++
 .../java/util/common/parsers/ObjectFlatteners.java |  4 +-
 .../util/common/parsers/ObjectFlattenersTest.java  | 61 ++++++++++++++++++++++
 .../input/parquet/CompatParquetReaderTest.java     | 22 ++++----
 5 files changed, 91 insertions(+), 13 deletions(-)

diff --git 
a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java 
b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
index 8d0f6678e3..8dee12dc30 100644
--- a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
+++ b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java
@@ -47,7 +47,7 @@ import java.util.List;
 import java.util.Map;
 
 /**
- * In constract to {@link JsonLineReader} which processes input text line by 
line independently,
+ * In contrast to {@link JsonLineReader} which processes input text line by 
line independently,
  * this class tries to parse the input text as a whole to an array of objects.
  *
  * The input text can be:
diff --git 
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
 
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
index 36e7ca34ac..19b702d83e 100644
--- 
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
+++ 
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java
@@ -23,7 +23,22 @@ import java.util.Map;
 
 public interface ObjectFlattener<T>
 {
+  /**
+   * Transforms an input row object into a {@link Map}, likely based on the 
instructions in some {@link JSONPathSpec}.
+   *
+   * This method is used in normal ingestion to extract values into a map to 
translate into an
+   * {@link org.apache.druid.data.input.InputRow}
+   */
   Map<String, Object> flatten(T obj);
 
+  /**
+   * Completely transforms an input row into a {@link Map}, including 
translating all nested structure into plain java
+   * objects such as {@link Map} and {@link java.util.List}. This method 
should translate everything as much as
+   * possible, ignoring any instructions in {@link JSONPathSpec} which might 
otherwise limit the amount of
+   * transformation done.
+   *
+   * This method is used by the ingestion "sampler" to provide a "raw" JSON 
form of the original input data, regardless
+   * of actual format, so that it can use "inline" JSON datasources and reduce 
sampling overhead.
+   */
   Map<String, Object> toMap(T obj);
 }
diff --git 
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
 
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
index 78fd114871..77ae467e85 100644
--- 
a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
+++ 
b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java
@@ -251,7 +251,9 @@ public class ObjectFlatteners
         Map<String, Object> actualMap = new HashMap<>();
         for (String key : jsonProvider.getPropertyKeys(o)) {
           Object field = jsonProvider.getMapValue(o, key);
-          if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
+          if (field == null) {
+            actualMap.put(key, null);
+          } else if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) 
{
             actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
           } else {
             actualMap.put(key, finalizeConversionForMap(field));
diff --git 
a/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
 
b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
new file mode 100644
index 0000000000..e48c4dafe8
--- /dev/null
+++ 
b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.java.util.common.parsers;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.ImmutableList;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Map;
+
+public class ObjectFlattenersTest
+{
+  private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}";
+  private static final ObjectFlattener FLATTENER = ObjectFlatteners.create(
+      new JSONPathSpec(
+          true,
+          ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, 
"extract", "$.bar"))
+      ),
+      new JSONFlattenerMaker(true)
+  );
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+  @Test
+  public void testFlatten() throws JsonProcessingException
+  {
+    JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
+    Map<String, Object> flat = FLATTENER.flatten(node);
+    Assert.assertNull(flat.get("foo"));
+    Assert.assertEquals(1L, flat.get("bar"));
+    Assert.assertEquals(1L, flat.get("extract"));
+  }
+
+  @Test
+  public void testToMap() throws JsonProcessingException
+  {
+    JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
+    Map<String, Object> flat = FLATTENER.toMap(node);
+    Assert.assertNull(flat.get("foo"));
+    Assert.assertEquals(1, flat.get("bar"));
+  }
+}
diff --git 
a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
 
b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
index 60173212b5..f94949d6bd 100644
--- 
a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
+++ 
b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java
@@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends 
BaseParquetReaderTest
     List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
     final String expectedJson = "{\n"
                                 + "  \"enumColumn\" : \"SPADES\",\n"
-                                + "  \"maybeStringColumn\" : { },\n"
-                                + "  \"maybeBinaryColumn\" : { },\n"
+                                + "  \"maybeStringColumn\" : null,\n"
+                                + "  \"maybeBinaryColumn\" : null,\n"
                                 + "  \"shortColumn\" : 1,\n"
                                 + "  \"byteColumn\" : 0,\n"
-                                + "  \"maybeBoolColumn\" : { },\n"
+                                + "  \"maybeBoolColumn\" : null,\n"
                                 + "  \"intColumn\" : 2,\n"
                                 + "  \"doubleColumn\" : 0.2,\n"
-                                + "  \"maybeByteColumn\" : { },\n"
+                                + "  \"maybeByteColumn\" : null,\n"
                                 + "  \"intSetColumn\" : [ 0 ],\n"
                                 + "  \"boolColumn\" : true,\n"
                                 + "  \"binaryColumn\" : \"val_0\",\n"
-                                + "  \"maybeIntColumn\" : { },\n"
+                                + "  \"maybeIntColumn\" : null,\n"
                                 + "  \"intToStringColumn\" : {\n"
                                 + "    \"0\" : \"val_0\",\n"
                                 + "    \"1\" : \"val_1\",\n"
                                 + "    \"2\" : \"val_2\"\n"
                                 + "  },\n"
-                                + "  \"maybeDoubleColumn\" : { },\n"
-                                + "  \"maybeEnumColumn\" : { },\n"
-                                + "  \"maybeLongColumn\" : { },\n"
+                                + "  \"maybeDoubleColumn\" : null,\n"
+                                + "  \"maybeEnumColumn\" : null,\n"
+                                + "  \"maybeLongColumn\" : null,\n"
                                 + "  \"stringsColumn\" : [ \"arr_0\", 
\"arr_1\", \"arr_2\" ],\n"
                                 + "  \"longColumn\" : 0,\n"
                                 + "  \"stringColumn\" : \"val_0\",\n"
-                                + "  \"maybeShortColumn\" : { },\n"
+                                + "  \"maybeShortColumn\" : null,\n"
                                 + "  \"complexColumn\" : {\n"
                                 + "    \"0\" : [ {\n"
                                 + "      \"nestedStringColumn\" : \"val_0\",\n"
@@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends 
BaseParquetReaderTest
     );
     List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
     final String expectedJson = "{\n"
-                                + "  \"optionalMessage\" : { },\n"
+                                + "  \"optionalMessage\" : null,\n"
                                 + "  \"requiredPrimitive\" : 9,\n"
-                                + "  \"repeatedPrimitive\" : { },\n"
+                                + "  \"repeatedPrimitive\" : null,\n"
                                 + "  \"repeatedMessage\" : [ 9, 10 ],\n"
                                 + "  \"optionalPrimitive\" : 10,\n"
                                 + "  \"requiredMessage\" : {\n"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to