This is an automated email from the ASF dual-hosted git repository.
abhishekrb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 8f87169a1e0 Sort JSON keys in nested columns to fix comparator
inconsistency/stability (#18169)
8f87169a1e0 is described below
commit 8f87169a1e0ca79ffd315ec61a400e44526488ac
Author: aho135 <[email protected]>
AuthorDate: Tue Jul 1 12:05:47 2025 -0700
Sort JSON keys in nested columns to fix comparator inconsistency/stability
(#18169)
Sort keys in JSON objects to resolve a transitivity bug in the
StructuredData comparator. This would otherwise cause ingestion tasks and/or
queries to fail intermittently.
---
.../java/org/apache/druid/segment/nested/StructuredData.java | 12 ++++++++++--
.../org/apache/druid/segment/nested/StructuredDataTest.java | 12 ++++++++++++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
index b7a627c389a..37fad571c33 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/StructuredData.java
@@ -21,6 +21,8 @@ package org.apache.druid.segment.nested;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializationFeature;
import com.google.common.primitives.Longs;
import net.jpountz.xxhash.XXHash64;
import net.jpountz.xxhash.XXHashFactory;
@@ -38,15 +40,21 @@ public class StructuredData implements
Comparable<StructuredData>
{
private static final XXHash64 HASH_FUNCTION =
XXHashFactory.fastestInstance().hash64();
- // seed from the example... but, it doesn't matter what it is as long as its
the same every time
+ // seed from the example... but, it doesn't matter what it is as long as
it's the same every time
private static int SEED = 0x9747b28c;
public static final Comparator<StructuredData> COMPARATOR =
Comparators.naturalNullsFirst();
+ /** SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS is required so that hash
computations for JSON objects that
+ * have different key orders but are otherwise equivalent will be
consistent. See
+ * {@link StructuredDataTest#testCompareToWithDifferentJSONOrder()} for an
example
+ */
+ private static final ObjectWriter WRITER =
ColumnSerializerUtils.SMILE_MAPPER.writer(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
+
private static long computeHash(StructuredData data)
{
try {
- final byte[] bytes =
ColumnSerializerUtils.SMILE_MAPPER.writeValueAsBytes(data.value);
+ final byte[] bytes = WRITER.writeValueAsBytes(data.value);
return HASH_FUNCTION.hash(bytes, 0, bytes.length, SEED);
}
catch (JsonProcessingException e) {
diff --git
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
index cb0acad74ff..1642107a6b1 100644
---
a/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
+++
b/processing/src/test/java/org/apache/druid/segment/nested/StructuredDataTest.java
@@ -95,6 +95,18 @@ public class StructuredDataTest
}
+ @Test
+ public void testCompareToWithDifferentJSONOrder()
+ {
+ StructuredData sd0 = new StructuredData(ImmutableMap.of("D", 0.0, "E",
0.0, "F", 0.0, "A", 0.0, "B", 0.0, "C", 0.0));
+ StructuredData sd1 = new StructuredData(ImmutableMap.of("A", 0.0, "B",
0.0, "C", 0.0, "D", 0.0, "E", 0.0, "F", 0.0));
+ StructuredData sd2 = new StructuredData(ImmutableMap.of("A", 0.0, "B",
0.0, "C", 0.0, "D", 34304.0, "E", 34304.0, "F", 34304.0));
+
+ Assert.assertEquals(1, sd0.compareTo(sd2));
+ Assert.assertEquals(1, sd1.compareTo(sd2));
+ Assert.assertEquals(0, sd0.compareTo(sd1));
+ }
+
@Test
public void testEqualsAndHashcode()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]