This is an automated email from the ASF dual-hosted git repository. snlee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
commit 06e91c13a11c5a07939adf326de28e95e39f4596 Author: Aishik <[email protected]> AuthorDate: Thu Nov 23 17:56:03 2023 +0530 added code to remove NaN from multivalued columns and modified order of transformers. --- .../recordtransformer/CompositeTransformer.java | 7 ++++--- .../recordtransformer/SpecialValueTransformer.java | 18 ++++++++++------ .../recordtransformer/RecordTransformerTest.java | 24 +++++++++++++--------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java index 50ca2a97c1..0ba394e9c9 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java @@ -70,15 +70,16 @@ public class CompositeTransformer implements RecordTransformer { * </li> * <li> * {@link SpecialValueTransformer} after {@link DataTypeTransformer} so that we already have the values complying - * with the schema before handling special values + * with the schema before handling special values and before {@link NullValueTransformer} so that it transforms + * all the null values properly * </li> * </ul> */ public static List<RecordTransformer> getDefaultTransformers(TableConfig tableConfig, Schema schema) { return Stream.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig), new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema), - new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema), - new SpecialValueTransformer(schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp()) + new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema), + new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp()) .collect(Collectors.toList()); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java index 4cfbbc8ce9..26df94933a 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java @@ -18,7 +18,9 @@ */ package org.apache.pinot.segment.local.recordtransformer; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.Schema; @@ -65,10 +67,10 @@ public class SpecialValueTransformer implements RecordTransformer { private Object transformNaN(Object value) { if ((value instanceof Float) && ((Float) value).isNaN()) { LOGGER.info("Float.NaN detected, converting to default null."); - value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT; + value = null; } else if ((value instanceof Double) && ((Double) value).isNaN()) { LOGGER.info("Double.NaN detected, converting to default null."); - value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE; + value = null; } return value; } @@ -86,12 +88,16 @@ public class SpecialValueTransformer implements RecordTransformer { // Multi-valued column. Object[] values = (Object[]) value; int numValues = values.length; - for (int i = 0; i < numValues; i++) { - if (values[i] != null) { - values[i] = transformNegativeZero(values[i]); - values[i] = transformNaN(values[i]); + List<Object> negativeZeroNanSanitizedValues = new ArrayList<>(numValues); + int numberOfElements = values.length; + for (Object o : values) { + Object zeroTransformedValue = transformNegativeZero(o); + Object nanTransformedValue = transformNaN(zeroTransformedValue); + if (nanTransformedValue != null) { + negativeZeroNanSanitizedValues.add(nanTransformedValue); } } + record.putValue(element,negativeZeroNanSanitizedValues.toArray()); } else { // Single-valued column. Object zeroTransformedValue = transformNegativeZero(value); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java index 6361963785..2685f8b6da 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java @@ -19,6 +19,8 @@ package org.apache.pinot.segment.local.recordtransformer; import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; @@ -286,12 +288,12 @@ public class RecordTransformerTest { Double.doubleToRawLongBits(0.0d)); assertEquals(record.getValue("mvFloatNegativeZero"), new Float[]{0.0f, 1.0f, 0.0f, 3.0f}); assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d, 1.0d, 0.0d, 3.0d}); - assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT); - assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE); + assertNull(record.getValue("svFloatNaN")); + assertNull(record.getValue("svDoubleNaN")); assertEquals(record.getValue("mvFloatNaN"), - new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f}); + new Float[]{0.0f, 2.0f}); assertEquals(record.getValue("mvDoubleNaN"), - new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d}); + new Double[]{0.0d, 2.0d}); } } @@ -302,7 +304,8 @@ public class RecordTransformerTest { // Build Schema and ingestionConfig in such a way that all the transformers are loaded. Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT) .addSingleValueDimension("svDouble", DataType.DOUBLE) - .addSingleValueDimension("expressionTestColumn", DataType.INT).addSingleValueDimension("svNaN", DataType.FLOAT) + .addSingleValueDimension("expressionTestColumn", DataType.INT) + .addSingleValueDimension("svNaN", DataType.FLOAT).addMultiValueDimension("mvNaN",DataType.FLOAT) .addSingleValueDimension("emptyDimensionForNullValueTransformer", DataType.FLOAT) .addSingleValueDimension("svStringWithNullCharacters", DataType.STRING) .addSingleValueDimension("indexableExtras", DataType.JSON) @@ -327,8 +330,8 @@ public class RecordTransformerTest { List<RecordTransformer> expectedListOfTransformers = List.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig), new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema), - new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema), - new SpecialValueTransformer(schema), new SanitizationTransformer(schema)); + new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema), + new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)); // Check that the number of current transformers match the expected number of transformers. assertEquals(currentListOfTransformers.size(), NUMBER_OF_TRANSFORMERS); @@ -349,6 +352,7 @@ public class RecordTransformerTest { // Data for SpecialValue Transformer. record.putValue("svNaN", Float.NaN); + record.putValue("mvNaN",new Float[]{1.0f,Float.NaN,2.0f}); // Data for sanitization transformer. record.putValue("svStringWithNullCharacters", "1\0002\0003"); @@ -645,10 +649,10 @@ public class RecordTransformerTest { assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT); assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE); assertEquals(record.getValue("mvFloatNaN"), - new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f}); + new Float[]{0.0f, 2.0f}); assertEquals(record.getValue("mvDoubleNaN"), - new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d}); - assertTrue(record.getNullValueFields().isEmpty()); + new Double[]{0.0d, 2.0d}); + assertEquals(new ArrayList<>(record.getNullValueFields()), new ArrayList<>(Arrays.asList("svFloatNaN","svDoubleNaN"))); } // Test empty record --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
