This is an automated email from the ASF dual-hosted git repository. snlee pushed a commit to branch revert-12032-santizeNanNegativeZeroBehaviour in repository https://gitbox.apache.org/repos/asf/pinot.git
commit f51e685a4972666866785f41d6d1a088be58be34 Author: Seunghyun Lee <[email protected]> AuthorDate: Mon Nov 27 11:39:57 2023 -0800 Revert "added code to remove NaN from multivalued columns and modified order of transformers." This reverts commit 06e91c13a11c5a07939adf326de28e95e39f4596. --- .../recordtransformer/CompositeTransformer.java | 7 +++---- .../recordtransformer/SpecialValueTransformer.java | 18 ++++++---------- .../recordtransformer/RecordTransformerTest.java | 24 +++++++++------------- 3 files changed, 19 insertions(+), 30 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java index 0ba394e9c9..50ca2a97c1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java @@ -70,16 +70,15 @@ public class CompositeTransformer implements RecordTransformer { * </li> * <li> * {@link SpecialValueTransformer} after {@link DataTypeTransformer} so that we already have the values complying - * with the schema before handling special values and before {@link NullValueTransformer} so that it transforms - * all the null values properly + * with the schema before handling special values * </li> * </ul> */ public static List<RecordTransformer> getDefaultTransformers(TableConfig tableConfig, Schema schema) { return Stream.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig), new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema), - new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema), - new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp()) + new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema), + new SpecialValueTransformer(schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp()) .collect(Collectors.toList()); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java index 26df94933a..4cfbbc8ce9 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java @@ -18,9 +18,7 @@ */ package org.apache.pinot.segment.local.recordtransformer; -import java.util.ArrayList; import java.util.HashSet; -import java.util.List; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.Schema; @@ -67,10 +65,10 @@ public class SpecialValueTransformer implements RecordTransformer { private Object transformNaN(Object value) { if ((value instanceof Float) && ((Float) value).isNaN()) { LOGGER.info("Float.NaN detected, converting to default null."); - value = null; + value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT; } else if ((value instanceof Double) && ((Double) value).isNaN()) { LOGGER.info("Double.NaN detected, converting to default null."); - value = null; + value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE; } return value; } @@ -88,16 +86,12 @@ public class SpecialValueTransformer implements RecordTransformer { // Multi-valued column. Object[] values = (Object[]) value; int numValues = values.length; - List<Object> negativeZeroNanSanitizedValues = new ArrayList<>(numValues); - int numberOfElements = values.length; - for (Object o : values) { - Object zeroTransformedValue = transformNegativeZero(o); - Object nanTransformedValue = transformNaN(zeroTransformedValue); - if (nanTransformedValue != null) { - negativeZeroNanSanitizedValues.add(nanTransformedValue); + for (int i = 0; i < numValues; i++) { + if (values[i] != null) { + values[i] = transformNegativeZero(values[i]); + values[i] = transformNaN(values[i]); } } - record.putValue(element,negativeZeroNanSanitizedValues.toArray()); } else { // Single-valued column. Object zeroTransformedValue = transformNegativeZero(value); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java index 2685f8b6da..6361963785 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java @@ -19,8 +19,6 @@ package org.apache.pinot.segment.local.recordtransformer; import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.TimeUnit; @@ -288,12 +286,12 @@ public class RecordTransformerTest { Double.doubleToRawLongBits(0.0d)); assertEquals(record.getValue("mvFloatNegativeZero"), new Float[]{0.0f, 1.0f, 0.0f, 3.0f}); assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d, 1.0d, 0.0d, 3.0d}); - assertNull(record.getValue("svFloatNaN")); - assertNull(record.getValue("svDoubleNaN")); + assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT); + assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE); assertEquals(record.getValue("mvFloatNaN"), - new Float[]{0.0f, 2.0f}); + new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f}); assertEquals(record.getValue("mvDoubleNaN"), - new Double[]{0.0d, 2.0d}); + new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d}); } } @@ -304,8 +302,7 @@ public class RecordTransformerTest { // Build Schema and ingestionConfig in such a way that all the transformers are loaded. Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT) .addSingleValueDimension("svDouble", DataType.DOUBLE) - .addSingleValueDimension("expressionTestColumn", DataType.INT) - .addSingleValueDimension("svNaN", DataType.FLOAT).addMultiValueDimension("mvNaN",DataType.FLOAT) + .addSingleValueDimension("expressionTestColumn", DataType.INT).addSingleValueDimension("svNaN", DataType.FLOAT) .addSingleValueDimension("emptyDimensionForNullValueTransformer", DataType.FLOAT) .addSingleValueDimension("svStringWithNullCharacters", DataType.STRING) .addSingleValueDimension("indexableExtras", DataType.JSON) @@ -330,8 +327,8 @@ public class RecordTransformerTest { List<RecordTransformer> expectedListOfTransformers = List.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig), new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema), - new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema), - new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)); + new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema), + new SpecialValueTransformer(schema), new SanitizationTransformer(schema)); // Check that the number of current transformers match the expected number of transformers. assertEquals(currentListOfTransformers.size(), NUMBER_OF_TRANSFORMERS); @@ -352,7 +349,6 @@ public class RecordTransformerTest { // Data for SpecialValue Transformer. record.putValue("svNaN", Float.NaN); - record.putValue("mvNaN",new Float[]{1.0f,Float.NaN,2.0f}); // Data for sanitization transformer. record.putValue("svStringWithNullCharacters", "1\0002\0003"); @@ -649,10 +645,10 @@ public class RecordTransformerTest { assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT); assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE); assertEquals(record.getValue("mvFloatNaN"), - new Float[]{0.0f, 2.0f}); + new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f}); assertEquals(record.getValue("mvDoubleNaN"), - new Double[]{0.0d, 2.0d}); - assertEquals(new ArrayList<>(record.getNullValueFields()), new ArrayList<>(Arrays.asList("svFloatNaN","svDoubleNaN"))); + new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d}); + assertTrue(record.getNullValueFields().isEmpty()); } // Test empty record --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
