This is an automated email from the ASF dual-hosted git repository.

rohangarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 536415b948 Stop leaking Avro objects from parser (#12828)
536415b948 is described below

commit 536415b9487640cee2dee6e1b31a0bd5b062057b
Author: imply-cheddar <[email protected]>
AuthorDate: Wed Aug 17 14:46:20 2022 -0700

    Stop leaking Avro objects from parser (#12828)
    
    The Avro parsing code leaks some "object" representations.
    We need to convert them into Maps/Lists so that other code
    can understand and expect good things.  Previously, these
    objects were handled with .toString(), but that's not a
    good contract in terms of how to work with objects.
---
 .../druid/data/input/avro/AvroFlattenerMaker.java  | 18 ++++++++-
 .../data/input/AvroStreamInputRowParserTest.java   | 43 ++++++++++++++++++++-
 .../data/input/avro/AvroFlattenerMakerTest.java    | 45 +++++++++++++++++++---
 3 files changed, 98 insertions(+), 8 deletions(-)

diff --git 
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
 
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
index 8526a04c91..3a57d03334 100644
--- 
a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
+++ 
b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java
@@ -36,7 +36,9 @@ import 
org.apache.druid.java.util.common.parsers.ObjectFlatteners;
 
 import java.nio.ByteBuffer;
 import java.util.EnumSet;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.function.Function;
@@ -164,7 +166,7 @@ public class AvroFlattenerMaker implements 
ObjectFlatteners.FlattenerMaker<Gener
     } else if (field instanceof Utf8) {
       return field.toString();
     } else if (field instanceof List) {
-      return ((List<?>) 
field).stream().filter(Objects::nonNull).collect(Collectors.toList());
+      return ((List<?>) 
field).stream().filter(Objects::nonNull).map(this::transformValue).collect(Collectors.toList());
     } else if (field instanceof GenericEnumSymbol) {
       return field.toString();
     } else if (field instanceof GenericFixed) {
@@ -173,6 +175,20 @@ public class AvroFlattenerMaker implements 
ObjectFlatteners.FlattenerMaker<Gener
       } else {
         return ((GenericFixed) field).bytes();
       }
+    } else if (field instanceof Map) {
+      LinkedHashMap<String, Object> retVal = new LinkedHashMap<>();
+      Map<?, ?> fieldMap = (Map<?, ?>) field;
+      for (Map.Entry<?, ?> entry : fieldMap.entrySet()) {
+        retVal.put(String.valueOf(entry.getKey()), 
transformValue(entry.getValue()));
+      }
+      return retVal;
+    } else if (field instanceof GenericRecord) {
+      LinkedHashMap<String, Object> retVal = new LinkedHashMap<>();
+      GenericRecord record = (GenericRecord) field;
+      for (Schema.Field key : record.getSchema().getFields()) {
+        retVal.put(key.name(), transformValue(record.get(key.pos())));
+      }
+      return retVal;
     }
     return field;
   }
diff --git 
a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
 
b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
index 6a37a0bbb5..b04bcca61b 100644
--- 
a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
+++ 
b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/AvroStreamInputRowParserTest.java
@@ -24,6 +24,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.base.Function;
 import com.google.common.base.Splitter;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.apache.avro.Schema;
@@ -64,6 +65,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
@@ -325,7 +327,46 @@ public class AvroStreamInputRowParserTest
           inputRow.getDimension("someStringArray")
       );
 
+      final Object someRecordArrayObj = inputRow.getRaw("someRecordArray");
+      Assert.assertNotNull(someRecordArrayObj);
+      Assert.assertTrue(someRecordArrayObj instanceof List);
+      Assert.assertEquals(1, ((List) someRecordArrayObj).size());
+      final Object recordArrayElementObj = ((List) someRecordArrayObj).get(0);
+      Assert.assertNotNull(recordArrayElementObj);
+      Assert.assertTrue(recordArrayElementObj instanceof LinkedHashMap);
+      LinkedHashMap recordArrayElement = (LinkedHashMap) recordArrayElementObj;
+      Assert.assertEquals("string in record", 
recordArrayElement.get("nestedString"));
     }
+
+    final Object someIntValueMapObj = inputRow.getRaw("someIntValueMap");
+    Assert.assertNotNull(someIntValueMapObj);
+    Assert.assertTrue(someIntValueMapObj instanceof LinkedHashMap);
+    LinkedHashMap someIntValueMap = (LinkedHashMap) someIntValueMapObj;
+    Assert.assertEquals(4, someIntValueMap.size());
+    Assert.assertEquals(1, someIntValueMap.get("1"));
+    Assert.assertEquals(2, someIntValueMap.get("2"));
+    Assert.assertEquals(4, someIntValueMap.get("4"));
+    Assert.assertEquals(8, someIntValueMap.get("8"));
+
+
+    final Object someStringValueMapObj = inputRow.getRaw("someStringValueMap");
+    Assert.assertNotNull(someStringValueMapObj);
+    Assert.assertTrue(someStringValueMapObj instanceof LinkedHashMap);
+    LinkedHashMap someStringValueMap = (LinkedHashMap) someStringValueMapObj;
+    Assert.assertEquals(4, someStringValueMap.size());
+    Assert.assertEquals("1", someStringValueMap.get("1"));
+    Assert.assertEquals("2", someStringValueMap.get("2"));
+    Assert.assertEquals("4", someStringValueMap.get("4"));
+    Assert.assertEquals("8", someStringValueMap.get("8"));
+
+
+    final Object someRecordObj = inputRow.getRaw("someRecord");
+    Assert.assertNotNull(someRecordObj);
+    Assert.assertTrue(someRecordObj instanceof LinkedHashMap);
+    LinkedHashMap someRecord = (LinkedHashMap) someRecordObj;
+    Assert.assertEquals(4892, someRecord.get("subInt"));
+    Assert.assertEquals(1543698L, someRecord.get("subLong"));
+
     // towards Map avro field as druid dimension, need to convert its 
toString() back to HashMap to check equality
     Assert.assertEquals(1, inputRow.getDimension("someIntValueMap").size());
     Assert.assertEquals(
@@ -369,7 +410,7 @@ public class AvroStreamInputRowParserTest
     );
     
Assert.assertEquals(Collections.singletonList(String.valueOf(MyEnum.ENUM1)), 
inputRow.getDimension("someEnum"));
     Assert.assertEquals(
-        Collections.singletonList(String.valueOf(SOME_RECORD_VALUE)),
+        Collections.singletonList(ImmutableMap.of("subInt", 4892, "subLong", 
1543698L).toString()),
         inputRow.getDimension("someRecord")
     );
 
diff --git 
a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
 
b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
index e7d503ab97..1174d2684f 100644
--- 
a/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
+++ 
b/extensions-core/avro-extensions/src/test/java/org/apache/druid/data/input/avro/AvroFlattenerMakerTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.druid.data.input.avro;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.util.Utf8;
 import org.apache.druid.data.input.AvroStreamInputRowParserTest;
 import org.apache.druid.data.input.SomeAvroDatum;
@@ -29,10 +30,12 @@ import org.junit.Assert;
 import org.junit.Test;
 
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 public class AvroFlattenerMakerTest
 {
@@ -214,8 +217,13 @@ public class AvroFlattenerMakerTest
         record.getSomeEnum().toString(),
         flattener.getRootField(record, "someEnum")
     );
+    Map<String, Object> map = new HashMap<>();
+    record.getSomeRecord()
+          .getSchema()
+          .getFields()
+          .forEach(field -> map.put(field.name(), 
record.getSomeRecord().get(field.name())));
     Assert.assertEquals(
-        record.getSomeRecord(),
+        map,
         flattener.getRootField(record, "someRecord")
     );
     Assert.assertEquals(
@@ -230,8 +238,17 @@ public class AvroFlattenerMakerTest
         record.getSomeFloat(),
         flattener.getRootField(record, "someFloat")
     );
-    Assert.assertEquals(
-        record.getSomeRecordArray(),
+    List<Map<String, Object>> list = new ArrayList<>();
+    for (GenericRecord genericRecord : record.getSomeRecordArray()) {
+      Map<String, Object> map1 = new HashMap<>();
+      genericRecord
+          .getSchema()
+          .getFields()
+          .forEach(field -> map1.put(field.name(), 
genericRecord.get(field.name())));
+      list.add(map1);
+    }
+    Assert.assertEquals(
+        list,
         flattener.getRootField(record, "someRecordArray")
     );
   }
@@ -328,8 +345,13 @@ public class AvroFlattenerMakerTest
         record.getSomeEnum().toString(),
         flattener.makeJsonPathExtractor("$.someEnum").apply(record)
     );
+    Map<String, Object> map = new HashMap<>();
+    record.getSomeRecord()
+          .getSchema()
+          .getFields()
+          .forEach(field -> map.put(field.name(), 
record.getSomeRecord().get(field.name())));
     Assert.assertEquals(
-        record.getSomeRecord(),
+        map,
         flattener.makeJsonPathExtractor("$.someRecord").apply(record)
     );
     Assert.assertEquals(
@@ -344,8 +366,19 @@ public class AvroFlattenerMakerTest
         record.getSomeFloat(),
         flattener.makeJsonPathExtractor("$.someFloat").apply(record)
     );
+
+    List<Map<String, Object>> list = new ArrayList<>();
+    for (GenericRecord genericRecord : record.getSomeRecordArray()) {
+      Map<String, Object> map1 = new HashMap<>();
+      genericRecord
+          .getSchema()
+          .getFields()
+          .forEach(field -> map1.put(field.name(), 
genericRecord.get(field.name())));
+      list.add(map1);
+    }
+
     Assert.assertEquals(
-        record.getSomeRecordArray(),
+        list,
         flattener.makeJsonPathExtractor("$.someRecordArray").apply(record)
     );
 
@@ -355,7 +388,7 @@ public class AvroFlattenerMakerTest
     );
 
     Assert.assertEquals(
-        record.getSomeRecordArray(),
+        list,
         
flattener.makeJsonPathExtractor("$.someRecordArray[?(@.nestedString)]").apply(record)
     );
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to