This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 063b6e4e9ef Handle mixed-type map key values by promoting to string
(#17722)
063b6e4e9ef is described below
commit 063b6e4e9efc6593ac0145cf652b9169138d32e2
Author: Xiang Fu <[email protected]>
AuthorDate: Thu Feb 19 19:52:42 2026 -0800
Handle mixed-type map key values by promoting to string (#17722)
---
...ieldTypeMixedValueIngestingIntegrationTest.java | 144 +++++++++++++++++++++
.../BigDecimalColumnPreIndexStatsCollector.java | 4 +
.../stats/DoubleColumnPreIndexStatsCollector.java | 4 +
.../stats/FloatColumnPreIndexStatsCollector.java | 4 +
.../stats/IntColumnPreIndexStatsCollector.java | 4 +
.../stats/LongColumnPreIndexStatsCollector.java | 4 +
.../stats/MapColumnPreIndexStatsCollector.java | 100 +++++++++++---
.../stats/MapColumnPreIndexStatsCollectorTest.java | 126 ++++++++++++++++++
8 files changed, 374 insertions(+), 16 deletions(-)
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
new file mode 100644
index 00000000000..65736c68eba
--- /dev/null
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/MapFieldTypeMixedValueIngestingIntegrationTest.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.custom;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.JsonNodeType;
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.pinot.spi.data.ComplexFieldSpec;
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.testng.annotations.Test;
+
+import static org.apache.avro.Schema.create;
+import static org.testng.Assert.assertEquals;
+
+
+@Test(suiteName = "CustomClusterIntegrationTest")
+public class MapFieldTypeMixedValueIngestingIntegrationTest extends
CustomDataQueryClusterIntegrationTest {
+
+ private static final String DEFAULT_TABLE_NAME =
"MapFieldTypeMixedValueIngestingIntegrationTest";
+ private static final String MAP_FIELD_NAME = "tracingContext";
+ private static final String TRACE_ID_KEY = "traceId";
+ private static final int NUM_RECORDS = 1000;
+ private static final int FLUSH_SIZE = 100;
+ private static final long NUMERIC_TRACE_ID = 9876543210L;
+ private static final String STRING_TRACE_ID =
"c69b6613-e174-49f1-ac47-4e9ab98e513f";
+
+ @Override
+ protected long getCountStarResult() {
+ return NUM_RECORDS;
+ }
+
+ @Override
+ public String getTableName() {
+ return DEFAULT_TABLE_NAME;
+ }
+
+ @Override
+ public Schema createSchema() {
+ ComplexFieldSpec tracingContextFieldSpec = new
ComplexFieldSpec(MAP_FIELD_NAME, FieldSpec.DataType.MAP, true,
+ Map.of(
+ ComplexFieldSpec.KEY_FIELD,
+ new DimensionFieldSpec(ComplexFieldSpec.KEY_FIELD,
FieldSpec.DataType.STRING, true),
+ ComplexFieldSpec.VALUE_FIELD,
+ new DimensionFieldSpec(ComplexFieldSpec.VALUE_FIELD,
FieldSpec.DataType.STRING, true)
+ ));
+ return new Schema.SchemaBuilder().setSchemaName(getTableName())
+ .addField(tracingContextFieldSpec)
+ .addDateTimeField(TIMESTAMP_FIELD_NAME, FieldSpec.DataType.TIMESTAMP,
"1:MILLISECONDS", "1:MILLISECONDS")
+ .build();
+ }
+
+ public List<File> createAvroFiles()
+ throws Exception {
+ org.apache.avro.Schema avroSchema =
org.apache.avro.Schema.createRecord("myRecord", null, null, false);
+ org.apache.avro.Schema mapValueSchema =
+
org.apache.avro.Schema.createUnion(Arrays.asList(create(org.apache.avro.Schema.Type.LONG),
+ create(org.apache.avro.Schema.Type.STRING)));
+ org.apache.avro.Schema mapAvroSchema =
org.apache.avro.Schema.createMap(mapValueSchema);
+ List<org.apache.avro.Schema.Field> fields =
+ Arrays.asList(
+ new org.apache.avro.Schema.Field(MAP_FIELD_NAME, mapAvroSchema,
null, null),
+ new org.apache.avro.Schema.Field(TIMESTAMP_FIELD_NAME,
create(org.apache.avro.Schema.Type.LONG), null, null)
+ );
+ avroSchema.setFields(fields);
+
+ try (AvroFilesAndWriters avroFilesAndWriters =
createAvroFilesAndWriters(avroSchema)) {
+ long tsBase = System.currentTimeMillis();
+ List<DataFileWriter<GenericData.Record>> writers =
avroFilesAndWriters.getWriters();
+ for (int i = 0; i < NUM_RECORDS; i++) {
+ Map<String, Object> mixedMapRecord = new HashMap<>();
+ if (i % 100 == 99) {
+ mixedMapRecord.put(TRACE_ID_KEY, STRING_TRACE_ID);
+ } else {
+ mixedMapRecord.put(TRACE_ID_KEY, NUMERIC_TRACE_ID);
+ }
+ GenericData.Record mapRecord = new GenericData.Record(avroSchema);
+ mapRecord.put(MAP_FIELD_NAME, mixedMapRecord);
+ mapRecord.put(TIMESTAMP_FIELD_NAME, tsBase + i);
+ writers.get(0).append(mapRecord);
+ }
+ return avroFilesAndWriters.getAvroFiles();
+ }
+ }
+
+ @Override
+ protected int getRealtimeSegmentFlushSize() {
+ return FLUSH_SIZE;
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testNumericMixedMapKeyValuesAsString(boolean
useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query = "SELECT " + MAP_FIELD_NAME + "['" + TRACE_ID_KEY + "'] FROM
" + getTableName()
+ + " ORDER BY " + TIMESTAMP_FIELD_NAME + " LIMIT " + NUM_RECORDS;
+ JsonNode pinotResponse = postQuery(query);
+ assertEquals(pinotResponse.get("exceptions").size(), 0);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ assertEquals(rows.size(), NUM_RECORDS);
+ for (int i = 0; i < NUM_RECORDS; i++) {
+ assertEquals(rows.get(i).get(0).getNodeType(), JsonNodeType.STRING);
+ if (i % 100 == 99) {
+ assertEquals(rows.get(i).get(0).textValue(), STRING_TRACE_ID);
+ } else {
+ assertEquals(rows.get(i).get(0).textValue(),
Long.toString(NUMERIC_TRACE_ID));
+ }
+ }
+
+ query = "SELECT COUNT(*) FROM " + getTableName() + " WHERE " +
MAP_FIELD_NAME + "['" + TRACE_ID_KEY
+ + "'] = '" + STRING_TRACE_ID + "'";
+ pinotResponse = postQuery(query);
+ assertEquals(pinotResponse.get("exceptions").size(), 0);
+
assertEquals(pinotResponse.get("resultTable").get("rows").get(0).get(0).intValue(),
NUM_RECORDS / FLUSH_SIZE);
+ }
+
+ @Override
+ public boolean isRealtimeTable() {
+ return true;
+ }
+}
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
index bd4506e2cfc..d93102c43a5 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/BigDecimalColumnPreIndexStatsCollector.java
@@ -106,6 +106,10 @@ public class BigDecimalColumnPreIndexStatsCollector
extends AbstractColumnStatis
return _sealed ? _sortedValues.length : _values.size();
}
+ BigDecimal[] getValues() {
+ return _values.toArray(new BigDecimal[0]);
+ }
+
@Override
public void seal() {
if (!_sealed) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
index 77b34b0459f..ad22f2902cd 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/DoubleColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class DoubleColumnPreIndexStatsCollector extends
AbstractColumnStatistics
return _sealed ? _sortedValues.length : _values.size();
}
+ double[] getValues() {
+ return _values.toDoubleArray();
+ }
+
@Override
public void seal() {
if (!_sealed) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
index aa60f4d5bae..54419d867d3 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/FloatColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class FloatColumnPreIndexStatsCollector extends
AbstractColumnStatisticsC
return _sealed ? _sortedValues.length : _values.size();
}
+ float[] getValues() {
+ return _values.toFloatArray();
+ }
+
@Override
public void seal() {
if (!_sealed) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
index 74882f25ff3..80622739d83 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/IntColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class IntColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
return _sealed ? _sortedValues.length : _values.size();
}
+ int[] getValues() {
+ return _values.toIntArray();
+ }
+
@Override
public void seal() {
if (!_sealed) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
index c60efc5bd9a..a9e90b3f133 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/LongColumnPreIndexStatsCollector.java
@@ -110,6 +110,10 @@ public class LongColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCo
return _sealed ? _sortedValues.length : _values.size();
}
+ long[] getValues() {
+ return _values.toLongArray();
+ }
+
@Override
public void seal() {
if (!_sealed) {
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
index 1d7701ca827..fab70fddd71 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollector.java
@@ -115,6 +115,10 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
keyStats.collect(value);
continue;
}
+ if (keyStats instanceof BytesColumnPredIndexStatsCollector) {
+ keyStats.collect(value);
+ continue;
+ }
if (keyStats instanceof StringColumnPreIndexStatsCollector) {
if (value instanceof String || value instanceof Number || value
instanceof Boolean) {
keyStats.collect(String.valueOf(value));
@@ -131,8 +135,9 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
try {
keyStats.collect(new BigDecimal(value.toString()));
} catch (NumberFormatException e) {
- LOGGER.error("Failed to parse BigDecimal for key '{}', value '{}':
{}", key, value, e.getMessage());
- // Skip collecting this value for statistics
+ LOGGER.warn("Could not parse map key '{}' value '{}' as
BIG_DECIMAL; promoting to STRING collector",
+ key, value);
+ keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats,
value);
}
continue;
}
@@ -156,23 +161,44 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
}
}
if (keyStats instanceof IntColumnPreIndexStatsCollector) {
- keyStats.collect(Integer.parseInt(value.toString()));
+ try {
+ keyStats.collect(Integer.parseInt(value.toString()));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("Could not parse map key '{}' value '{}' as INT;
promoting to STRING collector", key, value);
+ keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats,
value);
+ }
continue;
}
if (keyStats instanceof LongColumnPreIndexStatsCollector) {
- keyStats.collect(Long.parseLong(value.toString()));
+ try {
+ keyStats.collect(Long.parseLong(value.toString()));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("Could not parse map key '{}' value '{}' as LONG;
promoting to STRING collector", key, value);
+ keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats,
value);
+ }
continue;
}
if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
- keyStats.collect(Float.parseFloat(value.toString()));
+ try {
+ keyStats.collect(Float.parseFloat(value.toString()));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("Could not parse map key '{}' value '{}' as FLOAT;
promoting to STRING collector", key, value);
+ keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats,
value);
+ }
continue;
}
if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
- keyStats.collect(Double.parseDouble(value.toString()));
+ try {
+ keyStats.collect(Double.parseDouble(value.toString()));
+ } catch (NumberFormatException e) {
+ LOGGER.warn("Could not parse map key '{}' value '{}' as DOUBLE;
promoting to STRING collector", key, value);
+ keyStats = promoteNumericKeyStatsToStringCollector(key, keyStats,
value);
+ }
continue;
}
// Catch all
- keyStats.collect(value);
+ throw new IllegalArgumentException("Unsupported value type " +
value.getClass() + " for key " + key
+ + " with collector " + keyStats.getClass().getSimpleName());
}
_totalNumberOfEntries++;
} else {
@@ -254,16 +280,20 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
private AbstractColumnStatisticsCollector createKeyStatsCollector(String
key, Object value) {
// Get the type of the value
PinotDataType type = PinotDataType.getSingleValueType(value.getClass());
+ return createKeyStatsCollector(key, convertToDataType(type));
+ }
+
+ private AbstractColumnStatisticsCollector createKeyStatsCollector(String
key, FieldSpec.DataType dataType) {
TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(key).build();
Schema keySchema = new Schema.SchemaBuilder().setSchemaName(key)
- .addField(new DimensionFieldSpec(key, convertToDataType(type),
false)).build();
+ .addField(new DimensionFieldSpec(key, dataType, false)).build();
StatsCollectorConfig config = new StatsCollectorConfig(tableConfig,
keySchema, null);
if (_createNoDictCollectorsForKeys) {
return new NoDictColumnStatisticsCollector(key, config);
}
- switch (type) {
- case INTEGER:
+ switch (dataType.getStoredType()) {
+ case INT:
return new IntColumnPreIndexStatsCollector(key, config);
case LONG:
return new LongColumnPreIndexStatsCollector(key, config);
@@ -273,23 +303,60 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
return new DoubleColumnPreIndexStatsCollector(key, config);
case BIG_DECIMAL:
return new BigDecimalColumnPreIndexStatsCollector(key, config);
- case BOOLEAN:
+ case BYTES:
+ return new BytesColumnPredIndexStatsCollector(key, config);
case STRING:
case MAP:
- case OBJECT:
return new StringColumnPreIndexStatsCollector(key, config);
default:
- LOGGER.warn("Unknown data type {} for key {} and value type {}", type,
key, value.getClass().getName());
+ LOGGER.warn("Unknown data type {} for key {}", dataType, key);
return new StringColumnPreIndexStatsCollector(key, config);
+ }
+ }
+
+ private AbstractColumnStatisticsCollector
promoteNumericKeyStatsToStringCollector(String key,
+ AbstractColumnStatisticsCollector keyStats, Object value) {
+ AbstractColumnStatisticsCollector stringKeyStats =
createKeyStatsCollector(key, FieldSpec.DataType.STRING);
+ int previousTotalNumberOfEntries = keyStats.getTotalNumberOfEntries();
+ int previousMaxNumberOfMultiValues = keyStats.getMaxNumberOfMultiValues();
+ if (keyStats instanceof IntColumnPreIndexStatsCollector) {
+ for (int intValue : ((IntColumnPreIndexStatsCollector)
keyStats).getValues()) {
+ stringKeyStats.collect(String.valueOf(intValue));
+ }
+ } else if (keyStats instanceof LongColumnPreIndexStatsCollector) {
+ for (long longValue : ((LongColumnPreIndexStatsCollector)
keyStats).getValues()) {
+ stringKeyStats.collect(String.valueOf(longValue));
+ }
+ } else if (keyStats instanceof FloatColumnPreIndexStatsCollector) {
+ for (float floatValue : ((FloatColumnPreIndexStatsCollector)
keyStats).getValues()) {
+ stringKeyStats.collect(String.valueOf(floatValue));
+ }
+ } else if (keyStats instanceof DoubleColumnPreIndexStatsCollector) {
+ for (double doubleValue : ((DoubleColumnPreIndexStatsCollector)
keyStats).getValues()) {
+ stringKeyStats.collect(String.valueOf(doubleValue));
+ }
+ } else if (keyStats instanceof BigDecimalColumnPreIndexStatsCollector) {
+ for (BigDecimal decimalValue : ((BigDecimalColumnPreIndexStatsCollector)
keyStats).getValues()) {
+ stringKeyStats.collect(String.valueOf(decimalValue));
+ }
}
+ _keyStats.put(key, stringKeyStats);
+ stringKeyStats._maxNumberOfMultiValues = previousMaxNumberOfMultiValues;
+ stringKeyStats._totalNumberOfEntries = previousTotalNumberOfEntries;
+ stringKeyStats.collect(String.valueOf(value));
+ return stringKeyStats;
}
/**
* Convert Map value data type to stored field type.
* Note that all unknown types are automatically converted to String type.
*/
- private static FieldSpec.DataType convertToDataType(PinotDataType ty) {
- switch (ty) {
+ private static FieldSpec.DataType convertToDataType(PinotDataType
pinotDataType) {
+ switch (pinotDataType) {
+ case BOOLEAN:
+ return FieldSpec.DataType.BOOLEAN;
+ case BYTE:
+ case CHARACTER:
case SHORT:
case INTEGER:
return FieldSpec.DataType.INT;
@@ -303,7 +370,8 @@ public class MapColumnPreIndexStatsCollector extends
AbstractColumnStatisticsCol
return FieldSpec.DataType.BIG_DECIMAL;
case TIMESTAMP:
return FieldSpec.DataType.TIMESTAMP;
- case BOOLEAN:
+ case BYTES:
+ return FieldSpec.DataType.BYTES;
case STRING:
case OBJECT:
case MAP:
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
index 0e7ce979afb..c5e19f2ab47 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/creator/impl/stats/MapColumnPreIndexStatsCollectorTest.java
@@ -34,6 +34,7 @@ import org.apache.pinot.spi.data.ComplexFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.testng.annotations.Test;
@@ -351,6 +352,131 @@ public class MapColumnPreIndexStatsCollectorTest {
assertEquals(sObj.getMaxValue(), "{\"k1\":\"v1\",\"k2\":\"v2\"}");
}
+ @Test
+ public void testNumericKeyTypePromotedToStringForMixedValues() {
+ Map<String, Object> r1 = new HashMap<>();
+ r1.put("traceId", 9876543210L);
+
+ Map<String, Object> r2 = new HashMap<>();
+ r2.put("traceId", "c69b6613-e174-49f1-ac47-4e9ab98e513f");
+
+ StatsCollectorConfig cfg = newConfig(false);
+ MapColumnPreIndexStatsCollector col = new
MapColumnPreIndexStatsCollector("col", cfg);
+ col.collect(r1);
+ col.collect(r2);
+ col.seal();
+
+ AbstractColumnStatisticsCollector keyStats =
col.getKeyStatistics("traceId");
+ assertNotNull(keyStats);
+ assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+ assertEquals(keyStats.getCardinality(), 2);
+ assertEquals(keyStats.getMinValue(), "9876543210");
+ assertEquals(keyStats.getMaxValue(),
"c69b6613-e174-49f1-ac47-4e9ab98e513f");
+ assertEquals(keyStats.getTotalNumberOfEntries(), 2);
+ }
+
+ @Test
+ public void
testNumericKeyTypePromotedToStringForMixedValuesPreservesCounters() {
+ Map<String, Object> r1 = new HashMap<>();
+ r1.put("traceId", 9876543210L);
+
+ Map<String, Object> r2 = new HashMap<>();
+ r2.put("traceId", 9876543210L);
+
+ Map<String, Object> r3 = new HashMap<>();
+ r3.put("traceId", "2x");
+
+ StatsCollectorConfig cfg = newConfig(false);
+ MapColumnPreIndexStatsCollector col = new
MapColumnPreIndexStatsCollector("col", cfg);
+ col.collect(r1);
+ col.collect(r2);
+ col.collect(r3);
+ col.seal();
+
+ AbstractColumnStatisticsCollector keyStats =
col.getKeyStatistics("traceId");
+ assertNotNull(keyStats);
+ assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+ assertEquals(keyStats.getCardinality(), 2);
+ assertEquals(keyStats.getMinValue(), "2x");
+ assertEquals(keyStats.getMaxValue(), "9876543210");
+ assertEquals(keyStats.getTotalNumberOfEntries(), 3);
+ assertEquals(keyStats.getMaxNumberOfMultiValues(), 0);
+ assertFalse(keyStats.isSorted());
+ }
+
+ @Test
+ public void
testNumericKeyTypePromotedToStringForMixedValuesKeepsLexicographicOrdering() {
+ Map<String, Object> r1 = new HashMap<>();
+ r1.put("traceId", 2);
+
+ Map<String, Object> r2 = new HashMap<>();
+ r2.put("traceId", 10);
+
+ Map<String, Object> r3 = new HashMap<>();
+ r3.put("traceId", "2a");
+
+ StatsCollectorConfig cfg = newConfig(false);
+ MapColumnPreIndexStatsCollector col = new
MapColumnPreIndexStatsCollector("col", cfg);
+ col.collect(r1);
+ col.collect(r2);
+ col.collect(r3);
+ col.seal();
+
+ AbstractColumnStatisticsCollector keyStats =
col.getKeyStatistics("traceId");
+ assertNotNull(keyStats);
+ assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+ assertEquals(keyStats.getCardinality(), 3);
+ assertEquals(keyStats.getMinValue(), "10");
+ assertEquals(keyStats.getMaxValue(), "2a");
+ assertEquals(keyStats.getTotalNumberOfEntries(), 3);
+ assertFalse(keyStats.isSorted());
+ }
+
+ @Test
+ public void testBooleanKeyTypeUsesStringCollector() {
+ Map<String, Object> r1 = new HashMap<>();
+ r1.put("active", true);
+
+ Map<String, Object> r2 = new HashMap<>();
+ r2.put("active", false);
+
+ StatsCollectorConfig cfg = newConfig(false);
+ MapColumnPreIndexStatsCollector col = new
MapColumnPreIndexStatsCollector("col", cfg);
+ col.collect(r1);
+ col.collect(r2);
+ col.seal();
+
+ AbstractColumnStatisticsCollector keyStats =
col.getKeyStatistics("active");
+ assertNotNull(keyStats);
+ assertTrue(keyStats instanceof StringColumnPreIndexStatsCollector);
+ assertEquals(keyStats.getCardinality(), 2);
+ assertEquals(keyStats.getMinValue(), "false");
+ assertEquals(keyStats.getMaxValue(), "true");
+ }
+
+ @Test
+ public void testBytesKeyTypeUsesBytesCollector() {
+ Map<String, Object> r1 = new HashMap<>();
+ r1.put("blob", new byte[]{1, 2});
+
+ Map<String, Object> r2 = new HashMap<>();
+ r2.put("blob", new byte[]{1, 3});
+
+ StatsCollectorConfig cfg = newConfig(false);
+ MapColumnPreIndexStatsCollector col = new
MapColumnPreIndexStatsCollector("col", cfg);
+ col.collect(r1);
+ col.collect(r2);
+ col.seal();
+
+ AbstractColumnStatisticsCollector keyStats = col.getKeyStatistics("blob");
+ assertNotNull(keyStats);
+ assertTrue(keyStats instanceof BytesColumnPredIndexStatsCollector);
+ assertEquals(keyStats.getMinValue(), new ByteArray(new byte[]{1, 2}));
+ assertEquals(keyStats.getMaxValue(), new ByteArray(new byte[]{1, 3}));
+ assertEquals(keyStats.getCardinality(), 2);
+ assertEquals(keyStats.getTotalNumberOfEntries(), 2);
+ }
+
@Test(expectedExceptions = UnsupportedOperationException.class)
public void testUnsupportedEntryTypeThrows() {
StatsCollectorConfig cfg = newConfig(false);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]