This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 063b6e4e9ef Handle mixed-type map key values by promoting to string 
(#17722)
063b6e4e9ef is described below

commit 063b6e4e9efc6593ac0145cf652b9169138d32e2
Author: Xiang Fu <[email protected]>
AuthorDate: Thu Feb 19 19:52:42 2026 -0800

    Handle mixed-type map key values by promoting to string (#17722)
---
 ...ieldTypeMixedValueIngestingIntegrationTest.java | 144 +++++++++++++++++++++
 .../BigDecimalColumnPreIndexStatsCollector.java    |   4 +
 .../stats/DoubleColumnPreIndexStatsCollector.java  |   4 +
 .../stats/FloatColumnPreIndexStatsCollector.java   |   4 +
 .../stats/IntColumnPreIndexStatsCollector.java     |   4 +
 .../stats/LongColumnPreIndexStatsCollector.java    |   4 +
 .../stats/MapColumnPreIndexStatsCollector.java     | 100 +++++++++++---
 .../stats/MapColumnPreIndexStatsCollectorTest.java | 126 ++++++++++++++++++
 8 files changed, 374 insertions(+), 16 deletions(-)

diff --git 
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
new file mode 100644
index 00000000000..65736c68eba
--- /dev/null
+++ 
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.custom;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.JsonNodeType;
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.pinot.spi.data.ComplexFieldSpec;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.testng.annotations.Test;
+
+import static org.apache.avro.Schema.create;
+import static org.testng.Assert.assertEquals;
+
+
+@Test(suiteName = "CustomClusterIntegrationTest")
+public class MapFieldTypeMixedValueIngestingIntegrationTest extends 
CustomDataQueryClusterIntegrationTest {
+
+  private static final String DEFAULT_TABLE_NAME = 
"MapFieldTypeMixedValueIngestingIntegrationTest";
+  private static final String MAP_FIELD_NAME = "tracingContext";
+  private static final String TRACE_ID_KEY = "traceId";
+  private static final int NUM_RECORDS = 1000;
+  private static final int FLUSH_SIZE = 100;
+  private static final long NUMERIC_TRACE_ID = 9876543210L;
+  private static final String STRING_TRACE_ID = 
"c69b6613-e174-49f1-ac47-4e9ab98e513f";
+
+  @Override
+  protected long getCountStarResult() {
+    return NUM_RECORDS;
+  }
+
+  @Override
+  public String getTableName() {
+    return DEFAULT_TABLE_NAME;
+  }
+
+  @Override
+  public Schema createSchema() {
+    ComplexFieldSpec tracingContextFieldSpec = new 
ComplexFieldSpec(MAP_FIELD_NAME, FieldSpec.DataType.MAP, true,
+        Map.of(
+            ComplexFieldSpec.KEY_FIELD,
+            new DimensionFieldSpec(ComplexFieldSpec.KEY_FIELD, 
FieldSpec.DataType.STRING, true),
+            ComplexFieldSpec.VALUE_FIELD,
+            new DimensionFieldSpec(ComplexFieldSpec.VALUE_FIELD, 
FieldSpec.DataType.STRING, true)
+        ));
+    return new Schema.SchemaBuilder().setSchemaName(getTableName())
+        .addField(tracingContextFieldSpec)
+        .addDateTimeField(TIMESTAMP_FIELD_NAME, FieldSpec.DataType.TIMESTAMP, 
"1:MILLISECONDS", "1:MILLISECONDS")
+        .build();
+  }
+
+  public List<File> createAvroFiles()
+      throws Exception {
+    org.apache.avro.Schema avroSchema = 
org.apache.avro.Schema.createRecord("myRecord", null, null, false);
+    org.apache.avro.Schema mapValueSchema =
+        
org.apache.avro.Schema.createUnion(Arrays.asList(create(org.apache.avro.Schema.Type.LONG),
+            create(org.apache.avro.Schema.Type.STRING)));
+    org.apache.avro.Schema mapAvroSchema = 
org.apache.avro.Schema.createMap(mapValueSchema);
+    List<org.apache.avro.Schema.Field> fields =
+        Arrays.asList(
+            new org.apache.avro.Schema.Field(MAP_FIELD_NAME, mapAvroSchema, 
null, null),
+            new org.apache.avro.Schema.Field(TIMESTAMP_FIELD_NAME, 
create(org.apache.avro.Schema.Type.LONG), null, null)
+        );
+    avroSchema.setFields(fields);
+
+    try (AvroFilesAndWriters avroFilesAndWriters = 
createAvroFilesAndWriters(avroSchema)) {
+      long tsBase = System.currentTimeMillis();
+      List<DataFileWriter<GenericData.Record>> writers = 
avroFilesAndWriters.getWriters();
+      for (int i = 0; i < NUM_RECORDS; i++) {
+        Map<String, Object> mixedMapRecord = new HashMap<>();
+        if (i % 100 == 99) {
+          mixedMapRecord.put(TRACE_ID_KEY, STRING_TRACE_ID);
+        } else {
+          mixedMapRecord.put(TRACE_ID_KEY, NUMERIC_TRACE_ID);
+        }
+        GenericData.Record mapRecord = new GenericData.Record(avroSchema);
+        mapRecord.put(MAP_FIELD_NAME, mixedMapRecord);
+        mapRecord.put(TIMESTAMP_FIELD_NAME, tsBase + i);
+        writers.get(0).append(mapRecord);
+      }
+      return avroFilesAndWriters.getAvroFiles();
+    }
+  }
+
+  @Override
+  protected int getRealtimeSegmentFlushSize() {
+    return FLUSH_SIZE;
+  }
+
+  @Test(dataProvider = "useBothQueryEngines")
+  public void testNumericMixedMapKeyValuesAsString(boolean 
useMultiStageQueryEngine)
+      throws Exception {
+    setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+    String query = "SELECT " + MAP_FIELD_NAME + "['" + TRACE_ID_KEY + "'] FROM 
" + getTableName()
+        + " ORDER BY " + TIMESTAMP_FIELD_NAME + " LIMIT " + NUM_RECORDS;
+    JsonNode pinotResponse = postQuery(query);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
+    JsonNode rows = pinotResponse.get("resultTable").get("rows");
+    assertEquals(rows.size(), NUM_RECORDS);
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      assertEquals(rows.get(i).get(0).getNodeType(), JsonNodeType.STRING);
+      if (i % 100 == 99) {
+        assertEquals(rows.get(i).get(0).textValue(), STRING_TRACE_ID);
+      } else {
+        assertEquals(rows.get(i).get(0).textValue(), 
Long.toString(NUMERIC_TRACE_ID));
+      }
+    }
+
+    query = "SELECT COUNT(*) FROM " + getTableName() + " WHERE " + 
MAP_FIELD_NAME + "['" + TRACE_ID_KEY
+        + "'] = '" + STRING_TRACE_ID + "'";
+    pinotResponse = postQuery(query);
+    assertEquals(pinotResponse.get("exceptions").size(), 0);
+    
assertEquals(pinotResponse.get("resultTable").get("rows").get(0).get(0).intValue(),
 NUM_RECORDS / FLUSH_SIZE);
+  }
+
+  @Override
+  public boolean isRealtimeTable() {
+    return true;
+  }
+}
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
index bd4506e2cfc..d93102c43a5 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
@@ -106,6 +106,10 @@ public class BigDecimalColumnPreIndexStatsCollector 
extends AbstractColumnStatis
     return _sealed ? _sortedValues.length : _values.size();
   }
 
+  BigDecimal[] getValues() {
+    return _values.toArray(new BigDecimal[0]);
+  }
+
   @Override
   public void seal() {
     if (!_sealed) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
index 77b34b0459f..ad22f2902cd 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class DoubleColumnPreIndexStatsCollector extends 
AbstractColumnStatistics
     return _sealed ? _sortedValues.length : _values.size();
   }
 
+  double[] getValues() {
+    return _values.toDoubleArray();
+  }
+
   @Override
   public void seal() {
     if (!_sealed) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
index aa60f4d5bae..54419d867d3 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class FloatColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsC
     return _sealed ? _sortedValues.length : _values.size();
   }
 
+  float[] getValues() {
+    return _values.toFloatArray();
+  }
+
   @Override
   public void seal() {
     if (!_sealed) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
index 74882f25ff3..80622739d83 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class IntColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
     return _sealed ? _sortedValues.length : _values.size();
   }
 
+  int[] getValues() {
+    return _values.toIntArray();
+  }
+
   @Override
   public void seal() {
     if (!_sealed) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
index c60efc5bd9a..a9e90b3f133 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class LongColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCo
     return _sealed ? _sortedValues.length : _values.size();
   }
 
+  long[] getValues() {
+    return _values.toLongArray();
+  }
+
   @Override
   public void seal() {
     if (!_sealed) {
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
index 1d7701ca827..fab70fddd71 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
@@ -115,6 +115,10 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
           keyStats.collect(value);
           continue;
         }
+        if (keyStats instanceof BytesColumnPredIndexStatsCollector) {
+          keyStats.collect(value);
+          continue;
+        }
         if (keyStats instanceof StringColumnPreIndexStatsCollector) {
           if (value instanceof String || value instanceof Number || value 
instanceof Boolean) {
             keyStats.collect(String.valueOf(value));
@@ -131,8 +135,9 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
           try {
             keyStats.collect(new BigDecimal(value.toString()));
           } catch (NumberFormatException e) {
-            LOGGER.error("Failed to parse BigDecimal for key '{}', value '{}': 
{}", key, value, e.getMessage());
-            // Skip collecting this value for statistics
+            LOGGER.warn("Could not parse map key '{}' value '{}' as 
BIG_DECIMAL; promoting to STRING collector",
+                key, value);
+            keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats, 
value);
           }
           continue;
         }
@@ -156,23 +161,44 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
           }
         }
         if (keyStats instanceof IntColumnPreIndexStatsCollector) {
-          keyStats.collect(Integer.parseInt(value.toString()));
+          try {
+            keyStats.collect(Integer.parseInt(value.toString()));
+          } catch (NumberFormatException e) {
+            LOGGER.warn("Could not parse map key '{}' value '{}' as INT; 
promoting to STRING collector", key, value);
+            keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats, 
value);
+          }
           continue;
         }
         if (keyStats instanceof LongColumnPreIndexStatsCollector) {
-          keyStats.collect(Long.parseLong(value.toString()));
+          try {
+            keyStats.collect(Long.parseLong(value.toString()));
+          } catch (NumberFormatException e) {
+            LOGGER.warn("Could not parse map key '{}' value '{}' as LONG; 
promoting to STRING collector", key, value);
+            keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats, 
value);
+          }
           continue;
         }
         if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
-          keyStats.collect(Float.parseFloat(value.toString()));
+          try {
+            keyStats.collect(Float.parseFloat(value.toString()));
+          } catch (NumberFormatException e) {
+            LOGGER.warn("Could not parse map key '{}' value '{}' as FLOAT; 
promoting to STRING collector", key, value);
+            keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats, 
value);
+          }
           continue;
         }
         if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
-          keyStats.collect(Double.parseDouble(value.toString()));
+          try {
+            keyStats.collect(Double.parseDouble(value.toString()));
+          } catch (NumberFormatException e) {
+            LOGGER.warn("Could not parse map key '{}' value '{}' as DOUBLE; 
promoting to STRING collector", key, value);
+            keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats, 
value);
+          }
           continue;
         }
         // Catch all
-        keyStats.collect(value);
+        throw new IllegalArgumentException("Unsupported value type " + 
value.getClass() + " for key " + key
+            + " with collector " + keyStats.getClass().getSimpleName());
       }
       _totalNumberOfEntries++;
     } else {
@@ -254,16 +280,20 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
   private AbstractColumnStatisticsCollector createKeyStatsCollector(String 
key, Object value) {
     // Get the type of the value
     PinotDataType type = PinotDataType.getSingleValueType(value.getClass());
+    return createKeyStatsCollector(key, convertToDataType(type));
+  }
+
+  private AbstractColumnStatisticsCollector createKeyStatsCollector(String 
key, FieldSpec.DataType dataType) {
     TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(key).build();
     Schema keySchema = new Schema.SchemaBuilder().setSchemaName(key)
-        .addField(new DimensionFieldSpec(key, convertToDataType(type), 
false)).build();
+        .addField(new DimensionFieldSpec(key, dataType, false)).build();
     StatsCollectorConfig config = new StatsCollectorConfig(tableConfig, 
keySchema, null);
 
     if (_createNoDictCollectorsForKeys) {
       return new NoDictColumnStatisticsCollector(key, config);
     }
-    switch (type) {
-      case INTEGER:
+    switch (dataType.getStoredType()) {
+      case INT:
         return new IntColumnPreIndexStatsCollector(key, config);
       case LONG:
         return new LongColumnPreIndexStatsCollector(key, config);
@@ -273,23 +303,60 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
         return new DoubleColumnPreIndexStatsCollector(key, config);
       case BIG_DECIMAL:
         return new BigDecimalColumnPreIndexStatsCollector(key, config);
-      case BOOLEAN:
+      case BYTES:
+        return new BytesColumnPredIndexStatsCollector(key, config);
       case STRING:
       case MAP:
-      case OBJECT:
         return new StringColumnPreIndexStatsCollector(key, config);
       default:
-        LOGGER.warn("Unknown data type {} for key {} and value type {}", type, 
key, value.getClass().getName());
+        LOGGER.warn("Unknown data type {} for key {}", dataType, key);
         return new StringColumnPreIndexStatsCollector(key, config);
+      }
+  }
+
+  private AbstractColumnStatisticsCollector 
promoteNumericKeyStatsToStringCollector(String key,
+      AbstractColumnStatisticsCollector keyStats, Object value) {
+    AbstractColumnStatisticsCollector stringKeyStats = 
createKeyStatsCollector(key, FieldSpec.DataType.STRING);
+    int previousTotalNumberOfEntries = keyStats.getTotalNumberOfEntries();
+    int previousMaxNumberOfMultiValues = keyStats.getMaxNumberOfMultiValues();
+    if (keyStats instanceof IntColumnPreIndexStatsCollector) {
+      for (int intValue : ((IntColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(intValue));
+      }
+    } else if (keyStats instanceof LongColumnPreIndexStatsCollector) {
+      for (long longValue : ((LongColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(longValue));
+      }
+    } else if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
+      for (float floatValue : ((FloatColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(floatValue));
+      }
+    } else if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
+      for (double doubleValue : ((DoubleColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(doubleValue));
+      }
+    } else if (keyStats instanceof BigDecimalColumnPreIndexStatsCollector) {
+      for (BigDecimal decimalValue : ((BigDecimalColumnPreIndexStatsCollector) 
keyStats).getValues()) {
+        stringKeyStats.collect(String.valueOf(decimalValue));
+      }
     }
+    _keyStats.put(key, stringKeyStats);
+    stringKeyStats._maxNumberOfMultiValues = previousMaxNumberOfMultiValues;
+    stringKeyStats._totalNumberOfEntries = previousTotalNumberOfEntries;
+    stringKeyStats.collect(String.valueOf(value));
+    return stringKeyStats;
   }
 
   /**
    * Convert Map value data type to stored field type.
    * Note that all unknown types are automatically converted to String type.
    */
-  private static FieldSpec.DataType convertToDataType(PinotDataType ty) {
-    switch (ty) {
+  private static FieldSpec.DataType convertToDataType(PinotDataType 
pinotDataType) {
+    switch (pinotDataType) {
+      case BOOLEAN:
+        return FieldSpec.DataType.BOOLEAN;
+      case BYTE:
+      case CHARACTER:
       case SHORT:
       case INTEGER:
         return FieldSpec.DataType.INT;
@@ -303,7 +370,8 @@ public class MapColumnPreIndexStatsCollector extends 
AbstractColumnStatisticsCol
         return FieldSpec.DataType.BIG_DECIMAL;
       case TIMESTAMP:
         return FieldSpec.DataType.TIMESTAMP;
-      case BOOLEAN:
+      case BYTES:
+        return FieldSpec.DataType.BYTES;
       case STRING:
       case OBJECT:
       case MAP:
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
index 0e7ce979afb..c5e19f2ab47 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
@@ -34,6 +34,7 @@ import org.apache.pinot.spi.data.ComplexFieldSpec;
 import org.apache.pinot.spi.data.DimensionFieldSpec;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.ByteArray;
 import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
 import org.testng.annotations.Test;
 
@@ -351,6 +352,131 @@ public class MapColumnPreIndexStatsCollectorTest {
     assertEquals(sObj.getMaxValue(), "{\"k1\":\"v1\",\"k2\":\"v2\"}");
   }
 
+  @Test
+  public void testNumericKeyTypePromotedToStringForMixedValues() {
+    Map<String, Object> r1 = new HashMap<>();
+    r1.put("traceId", 9876543210L);
+
+    Map<String, Object> r2 = new HashMap<>();
+    r2.put("traceId", "c69b6613-e174-49f1-ac47-4e9ab98e513f");
+
+    StatsCollectorConfig cfg = newConfig(false);
+    MapColumnPreIndexStatsCollector col = new 
MapColumnPreIndexStatsCollector("col", cfg);
+    col.collect(r1);
+    col.collect(r2);
+    col.seal();
+
+    AbstractColumnStatisticsCollector keyStats = 
col.getKeyStatistics("traceId");
+    assertNotNull(keyStats);
+    assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+    assertEquals(keyStats.getCardinality(), 2);
+    assertEquals(keyStats.getMinValue(), "9876543210");
+    assertEquals(keyStats.getMaxValue(), 
"c69b6613-e174-49f1-ac47-4e9ab98e513f");
+    assertEquals(keyStats.getTotalNumberOfEntries(), 2);
+  }
+
+  @Test
+  public void 
testNumericKeyTypePromotedToStringForMixedValuesPreservesCounters() {
+    Map<String, Object> r1 = new HashMap<>();
+    r1.put("traceId", 9876543210L);
+
+    Map<String, Object> r2 = new HashMap<>();
+    r2.put("traceId", 9876543210L);
+
+    Map<String, Object> r3 = new HashMap<>();
+    r3.put("traceId", "2x");
+
+    StatsCollectorConfig cfg = newConfig(false);
+    MapColumnPreIndexStatsCollector col = new 
MapColumnPreIndexStatsCollector("col", cfg);
+    col.collect(r1);
+    col.collect(r2);
+    col.collect(r3);
+    col.seal();
+
+    AbstractColumnStatisticsCollector keyStats = 
col.getKeyStatistics("traceId");
+    assertNotNull(keyStats);
+    assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+    assertEquals(keyStats.getCardinality(), 2);
+    assertEquals(keyStats.getMinValue(), "2x");
+    assertEquals(keyStats.getMaxValue(), "9876543210");
+    assertEquals(keyStats.getTotalNumberOfEntries(), 3);
+    assertEquals(keyStats.getMaxNumberOfMultiValues(), 0);
+    assertFalse(keyStats.isSorted());
+  }
+
+  @Test
+  public void 
testNumericKeyTypePromotedToStringForMixedValuesKeepsLexicographicOrdering() {
+    Map<String, Object> r1 = new HashMap<>();
+    r1.put("traceId", 2);
+
+    Map<String, Object> r2 = new HashMap<>();
+    r2.put("traceId", 10);
+
+    Map<String, Object> r3 = new HashMap<>();
+    r3.put("traceId", "2a");
+
+    StatsCollectorConfig cfg = newConfig(false);
+    MapColumnPreIndexStatsCollector col = new 
MapColumnPreIndexStatsCollector("col", cfg);
+    col.collect(r1);
+    col.collect(r2);
+    col.collect(r3);
+    col.seal();
+
+    AbstractColumnStatisticsCollector keyStats = 
col.getKeyStatistics("traceId");
+    assertNotNull(keyStats);
+    assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+    assertEquals(keyStats.getCardinality(), 3);
+    assertEquals(keyStats.getMinValue(), "10");
+    assertEquals(keyStats.getMaxValue(), "2a");
+    assertEquals(keyStats.getTotalNumberOfEntries(), 3);
+    assertFalse(keyStats.isSorted());
+  }
+
+  @Test
+  public void testBooleanKeyTypeUsesStringCollector() {
+    Map<String, Object> r1 = new HashMap<>();
+    r1.put("active", true);
+
+    Map<String, Object> r2 = new HashMap<>();
+    r2.put("active", false);
+
+    StatsCollectorConfig cfg = newConfig(false);
+    MapColumnPreIndexStatsCollector col = new 
MapColumnPreIndexStatsCollector("col", cfg);
+    col.collect(r1);
+    col.collect(r2);
+    col.seal();
+
+    AbstractColumnStatisticsCollector keyStats = 
col.getKeyStatistics("active");
+    assertNotNull(keyStats);
+    assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+    assertEquals(keyStats.getCardinality(), 2);
+    assertEquals(keyStats.getMinValue(), "false");
+    assertEquals(keyStats.getMaxValue(), "true");
+  }
+
+  @Test
+  public void testBytesKeyTypeUsesBytesCollector() {
+    Map<String, Object> r1 = new HashMap<>();
+    r1.put("blob", new byte[]{1, 2});
+
+    Map<String, Object> r2 = new HashMap<>();
+    r2.put("blob", new byte[]{1, 3});
+
+    StatsCollectorConfig cfg = newConfig(false);
+    MapColumnPreIndexStatsCollector col = new 
MapColumnPreIndexStatsCollector("col", cfg);
+    col.collect(r1);
+    col.collect(r2);
+    col.seal();
+
+    AbstractColumnStatisticsCollector keyStats = col.getKeyStatistics("blob");
+    assertNotNull(keyStats);
+    assertTrue(keyStats instanceof BytesColumnPredIndexStatsCollector);
+    assertEquals(keyStats.getMinValue(), new ByteArray(new byte[]{1, 2}));
+    assertEquals(keyStats.getMaxValue(), new ByteArray(new byte[]{1, 3}));
+    assertEquals(keyStats.getCardinality(), 2);
+    assertEquals(keyStats.getTotalNumberOfEntries(), 2);
+  }
+
   @Test(expectedExceptions = UnsupportedOperationException.class)
   public void testUnsupportedEntryTypeThrows() {
     StatsCollectorConfig cfg = newConfig(false);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to