This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 0a8d574801 Canonicalize BigDecimal values during ingestion (#14958)
0a8d574801 is described below

commit 0a8d574801e56f1a47cdc6fbf8d3c7533955f2e6
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri Jan 31 17:15:32 2025 -0800

    Canonicalize BigDecimal values during ingestion (#14958)
---
 .../function/BaseTransformFunctionTest.java        |   3 +-
 .../recordtransformer/SpecialValueTransformer.java | 135 ++++++++--------
 .../recordtransformer/RecordTransformerTest.java   |  90 ++++++-----
 .../apache/pinot/spi/data/DimensionFieldSpec.java  |   2 +-
 .../java/org/apache/pinot/spi/data/FieldSpec.java  | 176 ++++++++++++---------
 5 files changed, 225 insertions(+), 181 deletions(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
index 17816ae6b3..6ce876765d 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
@@ -153,7 +153,8 @@ public abstract class BaseTransformFunctionTest {
       _longSVValues[i] = RANDOM.nextLong();
       _floatSVValues[i] = _intSVValues[i] * RANDOM.nextFloat();
       _doubleSVValues[i] = _intSVValues[i] * RANDOM.nextDouble();
-      _bigDecimalSVValues[i] = 
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(_intSVValues[i]));
+      _bigDecimalSVValues[i] =
+          
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(_intSVValues[i])).stripTrailingZeros();
       _stringSVValues[i] = df.format(_intSVValues[i] * RANDOM.nextDouble());
       _jsonSVValues[i] = String.format(
           "{\"intVal\":%s, \"longVal\":%s, \"floatVal\":%s, \"doubleVal\":%s, 
\"bigDecimalVal\":%s, "
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
index be3b3a3abe..93e15fab2e 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
@@ -18,112 +18,123 @@
  */
 package org.apache.pinot.segment.local.recordtransformer;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import javax.annotation.Nullable;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.recordtransformer.RecordTransformer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 
 /**
  * The {@code SpecialValueTransformer} class will transform special values 
according to the following rules:
  * <ul>
- *   <li>Negative zero (-0.0) should be converted to 0.0</li>
- *   <li>NaN should be converted to default null</li>
+ *   <li>
+ *     For FLOAT and DOUBLE:
+ *     <ul>
+ *       <li>Negative zero (-0.0) should be converted to 0.0</li>
+ *       <li>NaN should be converted to default null</li>
+ *     </ul>
+ *   </li>
+ *   <li>
+ *     For BIG_DECIMAL:
+ *     <ul>
+ *       <li>Strip trailing zeros</li>
+ *     </ul>
+ *   </li>
  * </ul>
+ * <p>This transformation is required to ensure that the value is equal to 
itself, and the ordering of the values is
+ * consistent with equals. This is required for certain data structures (e.g. 
sorted map) and algorithm (e.g. binary
+ * search) to work correctly. Read more about it in {@link Comparable}.
  * <p>NOTE: should put this after the {@link DataTypeTransformer} so that we 
already have the values complying
  * with the schema before handling special values and before {@link 
NullValueTransformer} so that it transforms
  * all the null values properly.
  */
 public class SpecialValueTransformer implements RecordTransformer {
+  private final static int NEGATIVE_ZERO_FLOAT_BITS = 
Float.floatToRawIntBits(-0.0f);
+  private final static long NEGATIVE_ZERO_DOUBLE_BITS = 
Double.doubleToLongBits(-0.0d);
 
-  private static final Logger LOGGER = 
LoggerFactory.getLogger(SpecialValueTransformer.class);
-  private final HashSet<String> _specialValuesKeySet = new HashSet<>();
-  private int _negativeZeroConversionCount = 0;
-  private int _nanConversionCount = 0;
+  private final Set<String> _columnsToCheck = new HashSet<>();
 
   public SpecialValueTransformer(Schema schema) {
     for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
-      if (!fieldSpec.isVirtualColumn() && (fieldSpec.getDataType() == 
DataType.FLOAT
-          || fieldSpec.getDataType() == DataType.DOUBLE)) {
-        _specialValuesKeySet.add(fieldSpec.getName());
+      if (!fieldSpec.isVirtualColumn()) {
+        DataType dataType = fieldSpec.getDataType();
+        if (dataType == DataType.FLOAT || dataType == DataType.DOUBLE
+            || (dataType == DataType.BIG_DECIMAL && 
!fieldSpec.isAllowTrailingZeros())) {
+          _columnsToCheck.add(fieldSpec.getName());
+        }
       }
     }
   }
 
-  private Object transformNegativeZero(Object value) {
-    if ((value instanceof Float) && (Float.floatToRawIntBits((float) value) == 
Float.floatToRawIntBits(-0.0f))) {
-      value = 0.0f;
-      _negativeZeroConversionCount++;
-    } else if ((value instanceof Double) && (Double.doubleToLongBits((double) 
value) == Double.doubleToLongBits(
-        -0.0d))) {
-      value = 0.0d;
-      _negativeZeroConversionCount++;
-    }
-    return value;
-  }
-
-  private Object transformNaN(Object value) {
-    if ((value instanceof Float) && ((Float) value).isNaN()) {
-      value = null;
-      _nanConversionCount++;
-    } else if ((value instanceof Double) && ((Double) value).isNaN()) {
-      _nanConversionCount++;
-      value = null;
-    }
-    return value;
-  }
-
   @Override
   public boolean isNoOp() {
-    return _specialValuesKeySet.isEmpty();
+    return _columnsToCheck.isEmpty();
   }
 
   @Override
   public GenericRow transform(GenericRow record) {
-    for (String element : _specialValuesKeySet) {
-      Object value = record.getValue(element);
+    for (String column : _columnsToCheck) {
+      Object value = record.getValue(column);
       if (value instanceof Object[]) {
         // Multi-valued column.
         Object[] values = (Object[]) value;
-        int numValues = values.length;
-        List<Object> negativeZeroNanSanitizedValues = new 
ArrayList<>(numValues);
-        for (Object o : values) {
-          Object zeroTransformedValue = transformNegativeZero(o);
-          Object nanTransformedValue = transformNaN(zeroTransformedValue);
-          if (nanTransformedValue != null) {
-            negativeZeroNanSanitizedValues.add(nanTransformedValue);
+        List<Object> transformedValues = new ArrayList<>(values.length);
+        boolean transformed = false;
+        for (Object v : values) {
+          Object transformedValue = transformValue(v);
+          if (transformedValue != v) {
+            transformed = true;
+          }
+          if (transformedValue != null) {
+            transformedValues.add(transformedValue);
+          }
+          if (transformed) {
+            record.putValue(column, !transformedValues.isEmpty() ? 
transformedValues.toArray() : null);
           }
         }
-        record.putValue(element, negativeZeroNanSanitizedValues.toArray());
-      } else {
+      } else if (value != null) {
         // Single-valued column.
-        Object zeroTransformedValue = transformNegativeZero(value);
-        Object nanTransformedValue = transformNaN(zeroTransformedValue);
-        if (nanTransformedValue != value) {
-          record.putValue(element, nanTransformedValue);
+        Object transformedValue = transformValue(value);
+        if (transformedValue != value) {
+          record.putValue(column, transformedValue);
         }
       }
     }
-    if (_negativeZeroConversionCount > 0 || _nanConversionCount > 0) {
-      LOGGER.debug("Converted {} -0.0s to 0.0 and {} NaNs to null", 
_negativeZeroConversionCount, _nanConversionCount);
-    }
     return record;
   }
 
-  @VisibleForTesting
-  int getNegativeZeroConversionCount() {
-    return _negativeZeroConversionCount;
-  }
-
-  @VisibleForTesting
-  int getNanConversionCount() {
-    return _nanConversionCount;
+  @Nullable
+  private Object transformValue(Object value) {
+    if (value instanceof Float) {
+      Float floatValue = (Float) value;
+      if (floatValue.isNaN()) {
+        return null;
+      }
+      if (Float.floatToRawIntBits(floatValue) == NEGATIVE_ZERO_FLOAT_BITS) {
+        return 0.0f;
+      }
+    } else if (value instanceof Double) {
+      Double doubleValue = (Double) value;
+      if (doubleValue.isNaN()) {
+        return null;
+      }
+      if (Double.doubleToRawLongBits(doubleValue) == 
NEGATIVE_ZERO_DOUBLE_BITS) {
+        return 0.0d;
+      }
+    } else if (value instanceof BigDecimal) {
+      BigDecimal bigDecimalValue = (BigDecimal) value;
+      BigDecimal stripped = bigDecimalValue.stripTrailingZeros();
+      if (!stripped.equals(bigDecimalValue)) {
+        return stripped;
+      }
+    }
+    return value;
   }
 }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index fb2d604ce9..f220385dd4 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.recordtransformer;
 
+import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -48,23 +49,35 @@ import static org.testng.Assert.*;
 public class RecordTransformerTest {
   private static final Schema SCHEMA = new Schema.SchemaBuilder()
       // For data type conversion
-      .addSingleValueDimension("svInt", 
DataType.INT).addSingleValueDimension("svLong", DataType.LONG)
-      .addSingleValueDimension("svFloat", 
DataType.FLOAT).addSingleValueDimension("svDouble", DataType.DOUBLE)
-      .addSingleValueDimension("svBoolean", 
DataType.BOOLEAN).addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
-      .addSingleValueDimension("svBytes", 
DataType.BYTES).addMultiValueDimension("mvInt", DataType.INT)
-      .addSingleValueDimension("svJson", 
DataType.JSON).addMultiValueDimension("mvLong", DataType.LONG)
-      .addMultiValueDimension("mvFloat", 
DataType.FLOAT).addMultiValueDimension("mvDouble", DataType.DOUBLE)
+      .addSingleValueDimension("svInt", DataType.INT)
+      .addSingleValueDimension("svLong", DataType.LONG)
+      .addSingleValueDimension("svFloat", DataType.FLOAT)
+      .addSingleValueDimension("svDouble", DataType.DOUBLE)
+      .addSingleValueDimension("svBoolean", DataType.BOOLEAN)
+      .addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
+      .addSingleValueDimension("svBytes", DataType.BYTES)
+      .addMultiValueDimension("mvInt", DataType.INT)
+      .addSingleValueDimension("svJson", DataType.JSON)
+      .addMultiValueDimension("mvLong", DataType.LONG)
+      .addMultiValueDimension("mvFloat", DataType.FLOAT)
+      .addMultiValueDimension("mvDouble", DataType.DOUBLE)
       // For sanitation
       .addSingleValueDimension("svStringWithNullCharacters", DataType.STRING)
       .addSingleValueDimension("svStringWithLengthLimit", DataType.STRING)
-      .addMultiValueDimension("mvString1", 
DataType.STRING).addMultiValueDimension("mvString2", DataType.STRING)
+      .addMultiValueDimension("mvString1", DataType.STRING)
+      .addMultiValueDimension("mvString2", DataType.STRING)
       // For negative zero and NaN conversions
       .addSingleValueDimension("svFloatNegativeZero", DataType.FLOAT)
       .addMultiValueDimension("mvFloatNegativeZero", DataType.FLOAT)
       .addSingleValueDimension("svDoubleNegativeZero", DataType.DOUBLE)
       .addMultiValueDimension("mvDoubleNegativeZero", DataType.DOUBLE)
-      .addSingleValueDimension("svFloatNaN", 
DataType.FLOAT).addMultiValueDimension("mvFloatNaN", DataType.FLOAT)
-      .addSingleValueDimension("svDoubleNaN", 
DataType.DOUBLE).addMultiValueDimension("mvDoubleNaN", DataType.DOUBLE)
+      .addSingleValueDimension("svFloatNaN", DataType.FLOAT)
+      .addMultiValueDimension("mvFloatNaN", DataType.FLOAT)
+      .addSingleValueDimension("svDoubleNaN", DataType.DOUBLE)
+      .addMultiValueDimension("mvDoubleNaN", DataType.DOUBLE)
+      .addMetric("bigDecimalZero", DataType.BIG_DECIMAL)
+      .addMetric("bigDecimalZeroWithPoint", DataType.BIG_DECIMAL)
+      .addMetric("bigDecimalZeroWithExponent", DataType.BIG_DECIMAL)
       .build();
   private static final TableConfig TABLE_CONFIG =
       new 
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build();
@@ -105,6 +118,9 @@ public class RecordTransformerTest {
     record.putValue("svDoubleNaN", Double.NaN);
     record.putValue("mvFloatNaN", new Float[]{-0.0f, Float.NaN, 2.0f});
     record.putValue("mvDoubleNaN", new Double[]{-0.0d, Double.NaN, 2.0d});
+    record.putValue("bigDecimalZero", new BigDecimal("0"));
+    record.putValue("bigDecimalZeroWithPoint", new BigDecimal("0.0"));
+    record.putValue("bigDecimalZeroWithExponent", new BigDecimal("0E-18"));
     return record;
   }
 
@@ -187,7 +203,8 @@ public class RecordTransformerTest {
   @Test
   public void testDataTypeTransformerIncorrectDataTypes() {
     Schema schema = new 
Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.BYTES)
-        .addSingleValueDimension("svLong", DataType.LONG).build();
+        .addSingleValueDimension("svLong", DataType.LONG)
+        .build();
 
     RecordTransformer transformer = new DataTypeTransformer(TABLE_CONFIG, 
schema);
     GenericRow record = getRecord();
@@ -365,8 +382,7 @@ public class RecordTransformerTest {
     // scenario where json field exceeds max length and fieldSpec 
maxLengthExceedStrategy is to NO_ACTION
     schema = SCHEMA;
     schema.getFieldSpecFor("svJson").setMaxLength(10);
-    schema.getFieldSpecFor("svJson")
-        
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
+    
schema.getFieldSpecFor("svJson").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
     transformer = new SanitizationTransformer(schema);
     record = getRecord();
     for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -378,8 +394,7 @@ public class RecordTransformerTest {
     // scenario where json field exceeds max length and fieldSpec 
maxLengthExceedStrategy is to TRIM_LENGTH
     schema = SCHEMA;
     schema.getFieldSpecFor("svJson").setMaxLength(10);
-    schema.getFieldSpecFor("svJson")
-        
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
+    
schema.getFieldSpecFor("svJson").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
     transformer = new SanitizationTransformer(schema);
     record = getRecord();
     for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -417,17 +432,16 @@ public class RecordTransformerTest {
         record = transformer.transform(record);
       } catch (Exception e) {
         assertTrue(e instanceof IllegalStateException);
-        assertEquals(e.getMessage(), "Throwing exception as value: "
-            + "{\"first\": \"daffy\", \"last\": \"duck\"} for column "
-            + "svJson exceeds configured max length 10.");
+        assertEquals(e.getMessage(),
+            "Throwing exception as value: {\"first\": \"daffy\", \"last\": 
\"duck\"} for column svJson exceeds "
+                + "configured max length 10.");
       }
     }
 
     // scenario where bytes field exceeds max length and fieldSpec 
maxLengthExceedStrategy is to NO_ACTION
     schema = SCHEMA;
     schema.getFieldSpecFor("svBytes").setMaxLength(2);
-    schema.getFieldSpecFor("svBytes")
-        
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
+    
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
     transformer = new SanitizationTransformer(schema);
     record = getRecord();
     for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -439,8 +453,7 @@ public class RecordTransformerTest {
     // scenario where bytes field exceeds max length and fieldSpec 
maxLengthExceedStrategy is to TRIM_LENGTH
     schema = SCHEMA;
     schema.getFieldSpecFor("svBytes").setMaxLength(2);
-    schema.getFieldSpecFor("svBytes")
-        
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
+    
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
     transformer = new SanitizationTransformer(schema);
     record = getRecord();
     for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -468,8 +481,7 @@ public class RecordTransformerTest {
     // scenario where bytes field exceeds max length and fieldSpec 
maxLengthExceedStrategy is to ERROR
     schema = SCHEMA;
     schema.getFieldSpecFor("svBytes").setMaxLength(2);
-    schema.getFieldSpecFor("svBytes")
-        .setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.ERROR);
+    
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.ERROR);
     transformer = new SanitizationTransformer(schema);
     record = getRecord();
     for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -477,8 +489,8 @@ public class RecordTransformerTest {
         record = transformer.transform(record);
       } catch (Exception e) {
         assertTrue(e instanceof IllegalStateException);
-        assertEquals(e.getMessage(), "Throwing exception as value: 7b7b for 
column svBytes "
-            + "exceeds configured max length 2.");
+        assertEquals(e.getMessage(),
+            "Throwing exception as value: 7b7b for column svBytes exceeds 
configured max length 2.");
       }
     }
   }
@@ -500,8 +512,9 @@ public class RecordTransformerTest {
       assertNull(record.getValue("svDoubleNaN"));
       assertEquals(record.getValue("mvFloatNaN"), new Float[]{0.0f, 2.0f});
       assertEquals(record.getValue("mvDoubleNaN"), new Double[]{0.0d, 2.0d});
-      assertEquals(transformer.getNegativeZeroConversionCount(), 6);
-      assertEquals(transformer.getNanConversionCount(), 4);
+      assertEquals(record.getValue("bigDecimalZero"), BigDecimal.ZERO);
+      assertEquals(record.getValue("bigDecimalZeroWithPoint"), 
BigDecimal.ZERO);
+      assertEquals(record.getValue("bigDecimalZeroWithExponent"), 
BigDecimal.ZERO);
     }
   }
 
@@ -513,21 +526,24 @@ public class RecordTransformerTest {
     Schema schema = new 
Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT)
         .addSingleValueDimension("svDouble", DataType.DOUBLE)
         .addSingleValueDimension("expressionTestColumn", DataType.INT)
-        .addSingleValueDimension("svNaN", 
DataType.FLOAT).addMultiValueDimension("mvNaN", DataType.FLOAT)
+        .addSingleValueDimension("svNaN", DataType.FLOAT)
+        .addMultiValueDimension("mvNaN", DataType.FLOAT)
         .addSingleValueDimension("emptyDimensionForNullValueTransformer", 
DataType.FLOAT)
         .addSingleValueDimension("svStringNull", DataType.STRING)
         .addSingleValueDimension("indexableExtras", DataType.JSON)
-        .addDateTime("timeCol", DataType.TIMESTAMP, 
"1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS").build();
+        .addDateTime("timeCol", DataType.TIMESTAMP, 
"1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS")
+        .build();
 
     IngestionConfig ingestionConfig = new IngestionConfig();
-    TableConfig tableConfig =
-        new 
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setIngestionConfig(ingestionConfig)
-            .setTimeColumnName("timeCol").build();
+    TableConfig tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable")
+        .setIngestionConfig(ingestionConfig)
+        .setTimeColumnName("timeCol")
+        .build();
     ingestionConfig.setFilterConfig(new FilterConfig("svInt = 123 AND svDouble 
<= 200"));
     ingestionConfig.setTransformConfigs(List.of(new 
TransformConfig("expressionTestColumn", "plus(x,10)")));
     ingestionConfig.setSchemaConformingTransformerConfig(
-        new SchemaConformingTransformerConfig(null, "indexableExtras", false, 
null, null, null, null, null,
-            null, null, null, null, null, null, null, null, null, null, null, 
null, null, null));
+        new SchemaConformingTransformerConfig(null, "indexableExtras", false, 
null, null, null, null, null, null, null,
+            null, null, null, null, null, null, null, null, null, null, null, 
null));
     ingestionConfig.setRowTimeValueCheck(true);
     ingestionConfig.setContinueOnError(false);
 
@@ -858,10 +874,8 @@ public class RecordTransformerTest {
       assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d, 
1.0d, 0.0d, 3.0d});
       assertEquals(record.getValue("svFloatNaN"), 
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
       assertEquals(record.getValue("svDoubleNaN"), 
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
-      assertEquals(record.getValue("mvFloatNaN"),
-          new Float[]{0.0f, 2.0f});
-      assertEquals(record.getValue("mvDoubleNaN"),
-          new Double[]{0.0d, 2.0d});
+      assertEquals(record.getValue("mvFloatNaN"), new Float[]{0.0f, 2.0f});
+      assertEquals(record.getValue("mvDoubleNaN"), new Double[]{0.0d, 2.0d});
       assertEquals(new ArrayList<>(record.getNullValueFields()),
           new ArrayList<>(Arrays.asList("svFloatNaN", "svDoubleNaN")));
     }
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
index 0637abd7fa..c98fcd5cf3 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
@@ -71,7 +71,7 @@ public final class DimensionFieldSpec extends FieldSpec {
   @Override
   public String toString() {
     return "< field type: DIMENSION, field name: " + _name + ", data type: " + 
_dataType + ", is single-value field: "
-        + _isSingleValueField + ", default null value: " + _defaultNullValue + 
", max length exceed strategy: "
+        + _singleValueField + ", default null value: " + _defaultNullValue + 
", max length exceed strategy: "
         + _maxLengthExceedStrategy + " >";
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index b56ef1c829..5d29412f5e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -31,6 +31,7 @@ import java.sql.Timestamp;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import javax.annotation.Nullable;
 import org.apache.pinot.spi.utils.BooleanUtils;
 import org.apache.pinot.spi.utils.ByteArray;
@@ -42,17 +43,18 @@ import org.apache.pinot.spi.utils.TimestampUtils;
 
 /**
  * The <code>FieldSpec</code> class contains all specs related to any field 
(column) in {@link Schema}.
- * <p>There are 3 types of <code>FieldSpec</code>:
- * {@link DimensionFieldSpec}, {@link MetricFieldSpec}, {@link TimeFieldSpec}
  * <p>Specs stored are as followings:
- * <p>- <code>Name</code>: name of the field.
- * <p>- <code>DataType</code>: type of the data stored (e.g. INTEGER, LONG, 
FLOAT, DOUBLE, STRING).
- * <p>- <code>IsSingleValueField</code>: single-value or multi-value field.
- * <p>- <code>DefaultNullValue</code>: when no value found for this field, use 
this value. Stored in string format.
- * <p>- <code>VirtualColumnProvider</code>: the virtual column provider to use 
for this field.
- * <p>- <code>NotNull</code>: whether the column accepts nulls or not. 
Defaults to false.
- * <p>- <code>MaxLength</code>: the maximum length of the string column. 
Defaults to 512.
- * <p>- <code>MaxLengthExceedStrategy</code>: the strategy to handle the case 
when the string column exceeds the max
+ * <ul>
+ *   <li>"name": name of the field.</li>
+ *   <li>"dataType": type of the data stored (e.g. INTEGER, LONG, FLOAT, 
DOUBLE, STRING).</li>
+ *   <li>"singleValueField": single-value or multi-value field.</li>
+ *   <li>"notNull": whether the column accepts nulls or not. Defaults to false 
(accepts nulls).</li>
+ *   <li>"maxLength": maximum length of the column. Defaults to 512.</li>
+ *   <li>"maxLengthExceedStrategy": the strategy to handle the case when the 
column exceeds the max length.</li>
+ *   <li>"allowTrailingZeros": whether to allow trailing zeros for a 
BIG_DECIMAL column.</li>
+ *   <li>"defaultNullValue": when no value found for this field, use this 
value.</li>
+ *   <li>"virtualColumnProvider": the virtual column provider to use for this 
field.</li>
+ * </ul>
  */
 @SuppressWarnings("unused")
 @JsonTypeInfo(
@@ -126,15 +128,19 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
 
   protected String _name;
   protected DataType _dataType;
-  protected boolean _isSingleValueField = true;
-  protected boolean _notNull = false;
+  protected boolean _singleValueField = true;
+  protected boolean _notNull;
 
-  // NOTE: This only applies to STRING column, which is the max number of 
characters
-  private int _maxLength = DEFAULT_MAX_LENGTH;
-
-  // NOTE: This only applies to STRING column during {@link 
SanitizationTransformer}
+  // Max length applies to STRING, JSON, BYTES columns, and is enforced in 
{@link SanitizationTransformer}.
+  protected int _maxLength = DEFAULT_MAX_LENGTH;
   protected MaxLengthExceedStrategy _maxLengthExceedStrategy;
 
+  // Whether to allow trailing zeros for BIG_DECIMAL columns. Trailing zeros 
are stripped by default in
+  // {@link SpecialValueTransformer}. If this flag is set to true, trailing 
zeros will be preserved, and it is users'
+  // responsibility to ensure there are no big decimal values with same value 
but different trailing zeros. Read more
+  // about why trailing zeros need to be stripped in {@link 
SpecialValueTransformer}.
+  protected boolean _allowTrailingZeros;
+
   protected Object _defaultNullValue;
   private transient String _stringDefaultNullValue;
 
@@ -165,7 +171,7 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
       @Nullable Object defaultNullValue, @Nullable MaxLengthExceedStrategy 
maxLengthExceedStrategy) {
     _name = name;
     _dataType = dataType;
-    _isSingleValueField = isSingleValueField;
+    _singleValueField = isSingleValueField;
     _maxLength = maxLength;
     setDefaultNullValue(defaultNullValue);
     _maxLengthExceedStrategy = maxLengthExceedStrategy;
@@ -193,12 +199,37 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
   }
 
   public boolean isSingleValueField() {
-    return _isSingleValueField;
+    return _singleValueField;
   }
 
   // Required by JSON de-serializer. DO NOT REMOVE.
   public void setSingleValueField(boolean isSingleValueField) {
-    _isSingleValueField = isSingleValueField;
+    _singleValueField = isSingleValueField;
+  }
+
+  /**
+   * Returns whether the column is nullable or not.
+   */
+  @JsonIgnore
+  public boolean isNullable() {
+    return !_notNull;
+  }
+
+  /**
+   * @see #isNullable()
+   */
+  @JsonIgnore
+  public void setNullable(Boolean nullable) {
+    _notNull = !nullable;
+  }
+
+  public boolean isNotNull() {
+    return _notNull;
+  }
+
+  // Required by JSON de-serializer. DO NOT REMOVE.
+  public void setNotNull(boolean notNull) {
+    _notNull = notNull;
   }
 
   public int getMaxLength() {
@@ -220,22 +251,13 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
     _maxLengthExceedStrategy = maxLengthExceedStrategy;
   }
 
-  public String getVirtualColumnProvider() {
-    return _virtualColumnProvider;
-  }
-
-  public void setVirtualColumnProvider(String virtualColumnProvider) {
-    _virtualColumnProvider = virtualColumnProvider;
+  public boolean isAllowTrailingZeros() {
+    return _allowTrailingZeros;
   }
 
-  /**
-   * Returns whether the column is virtual. Virtual columns are constructed 
while loading the segment, thus do not exist
-   * in the record, nor should be persisted to the disk.
-   * <p>Identify a column as virtual if the virtual column provider is 
configured.
-   */
-  @JsonIgnore
-  public boolean isVirtualColumn() {
-    return _virtualColumnProvider != null && !_virtualColumnProvider.isEmpty();
+  // Required by JSON de-serializer. DO NOT REMOVE.
+  public void setAllowTrailingZeros(boolean allowTrailingZeros) {
+    _allowTrailingZeros = allowTrailingZeros;
   }
 
   public Object getDefaultNullValue() {
@@ -248,7 +270,7 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
 
   /**
    * Helper method to return the String value for the given object.
-   * This is required as not all data types have a toString() (eg byte[]).
+   * This is required as not all data types have a toString() (e.g. byte[]).
    *
    * @param value Value for which String value needs to be returned
    * @return String value for the object.
@@ -350,38 +372,32 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
     return _transformFunction;
   }
 
-  // Required by JSON de-serializer. DO NOT REMOVE.
-
   /**
    * Deprecated. Use TableConfig -> IngestionConfig -> TransformConfigs
    */
+  // Required by JSON de-serializer. DO NOT REMOVE.
   @Deprecated
   public void setTransformFunction(@Nullable String transformFunction) {
     _transformFunction = transformFunction;
   }
 
-  /**
-   * Returns whether the column is nullable or not.
-   */
-  @JsonIgnore
-  public boolean isNullable() {
-    return !_notNull;
+  public String getVirtualColumnProvider() {
+    return _virtualColumnProvider;
+  }
+
+  // Required by JSON de-serializer. DO NOT REMOVE.
+  public void setVirtualColumnProvider(String virtualColumnProvider) {
+    _virtualColumnProvider = virtualColumnProvider;
   }
 
   /**
-   * @see #isNullable()
+   * Returns whether the column is virtual. Virtual columns are constructed 
while loading the segment, thus do not exist
+   * in the record, nor should be persisted to the disk.
+   * <p>Identify a column as virtual if the virtual column provider is 
configured.
    */
   @JsonIgnore
-  public void setNullable(Boolean nullable) {
-    _notNull = !nullable;
-  }
-
-  public boolean isNotNull() {
-    return _notNull;
-  }
-
-  public void setNotNull(boolean notNull) {
-    _notNull = notNull;
+  public boolean isVirtualColumn() {
+    return _virtualColumnProvider != null && !_virtualColumnProvider.isEmpty();
   }
 
   /**
@@ -394,15 +410,26 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
     jsonObject.put("name", _name);
     jsonObject.put("dataType", _dataType.name());
     jsonObject.put("fieldType", getFieldType().toString());
-    if (!_isSingleValueField) {
+    if (!_singleValueField) {
       jsonObject.put("singleValueField", false);
     }
+    if (_notNull) {
+      jsonObject.put("notNull", true);
+    }
     if (_maxLength != DEFAULT_MAX_LENGTH) {
       jsonObject.put("maxLength", _maxLength);
     }
+    if (_maxLengthExceedStrategy != null) {
+      jsonObject.put("maxLengthExceedStrategy", 
_maxLengthExceedStrategy.name());
+    }
+    if (_allowTrailingZeros) {
+      jsonObject.put("allowTrailingZeros", true);
+    }
     appendDefaultNullValue(jsonObject);
     appendTransformFunction(jsonObject);
-    jsonObject.put("notNull", _notNull);
+    if (_virtualColumnProvider != null) {
+      jsonObject.put("virtualColumnProvider", _virtualColumnProvider);
+    }
     return jsonObject;
   }
 
@@ -457,39 +484,31 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
     }
   }
 
-  @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
   @Override
   public boolean equals(Object o) {
-    if (EqualityUtils.isSameReference(this, o)) {
+    if (this == o) {
       return true;
     }
-
-    if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+    if (o == null || getClass() != o.getClass()) {
       return false;
     }
-
     FieldSpec that = (FieldSpec) o;
-    return EqualityUtils.isEqual(_name, that._name) && 
EqualityUtils.isEqual(_dataType, that._dataType) && EqualityUtils
-        .isEqual(_isSingleValueField, that._isSingleValueField) && 
EqualityUtils
-        .isEqual(getStringValue(_defaultNullValue), 
getStringValue(that._defaultNullValue)) && EqualityUtils
-        .isEqual(_maxLength, that._maxLength) && 
EqualityUtils.isEqual(_transformFunction, that._transformFunction)
-        && EqualityUtils.isEqual(_maxLengthExceedStrategy, 
that._maxLengthExceedStrategy)
-        && EqualityUtils.isEqual(_virtualColumnProvider, 
that._virtualColumnProvider)
-        && EqualityUtils.isEqual(_notNull, that._notNull);
+    return _name.equals(that._name)
+        && _dataType == that._dataType
+        && _singleValueField == that._singleValueField
+        && _notNull == that._notNull
+        && _maxLength == that._maxLength
+        && _maxLengthExceedStrategy == that._maxLengthExceedStrategy
+        && _allowTrailingZeros == that._allowTrailingZeros
+        && 
getStringValue(_defaultNullValue).equals(getStringValue(that._defaultNullValue))
+        && Objects.equals(_transformFunction, that._transformFunction)
+        && Objects.equals(_virtualColumnProvider, that._virtualColumnProvider);
   }
 
   @Override
   public int hashCode() {
-    int result = EqualityUtils.hashCodeOf(_name);
-    result = EqualityUtils.hashCodeOf(result, _dataType);
-    result = EqualityUtils.hashCodeOf(result, _isSingleValueField);
-    result = EqualityUtils.hashCodeOf(result, 
getStringValue(_defaultNullValue));
-    result = EqualityUtils.hashCodeOf(result, _maxLength);
-    result = EqualityUtils.hashCodeOf(result, _maxLengthExceedStrategy);
-    result = EqualityUtils.hashCodeOf(result, _transformFunction);
-    result = EqualityUtils.hashCodeOf(result, _virtualColumnProvider);
-    result = EqualityUtils.hashCodeOf(result, _notNull);
-    return result;
+    return Objects.hash(_name, _dataType, _singleValueField, _notNull, 
_maxLength, _maxLengthExceedStrategy,
+        _allowTrailingZeros, getStringValue(_defaultNullValue), 
_transformFunction, _virtualColumnProvider);
   }
 
   /**
@@ -748,10 +767,9 @@ public abstract class FieldSpec implements 
Comparable<FieldSpec>, Serializable {
    * @return
    */
   public boolean isBackwardCompatibleWith(FieldSpec oldFieldSpec) {
-
     return EqualityUtils.isEqual(_name, oldFieldSpec._name)
         && EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
-        && EqualityUtils.isEqual(_isSingleValueField, 
oldFieldSpec._isSingleValueField);
+        && EqualityUtils.isEqual(_singleValueField, 
oldFieldSpec._singleValueField);
   }
 
   public static class FieldSpecMetadata {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to