This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 0a8d574801 Canonicalize BigDecimal values during ingestion (#14958)
0a8d574801 is described below
commit 0a8d574801e56f1a47cdc6fbf8d3c7533955f2e6
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Fri Jan 31 17:15:32 2025 -0800
Canonicalize BigDecimal values during ingestion (#14958)
---
.../function/BaseTransformFunctionTest.java | 3 +-
.../recordtransformer/SpecialValueTransformer.java | 135 ++++++++--------
.../recordtransformer/RecordTransformerTest.java | 90 ++++++-----
.../apache/pinot/spi/data/DimensionFieldSpec.java | 2 +-
.../java/org/apache/pinot/spi/data/FieldSpec.java | 176 ++++++++++++---------
5 files changed, 225 insertions(+), 181 deletions(-)
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
index 17816ae6b3..6ce876765d 100644
---
a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java
@@ -153,7 +153,8 @@ public abstract class BaseTransformFunctionTest {
_longSVValues[i] = RANDOM.nextLong();
_floatSVValues[i] = _intSVValues[i] * RANDOM.nextFloat();
_doubleSVValues[i] = _intSVValues[i] * RANDOM.nextDouble();
- _bigDecimalSVValues[i] =
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(_intSVValues[i]));
+ _bigDecimalSVValues[i] =
+
BigDecimal.valueOf(RANDOM.nextDouble()).multiply(BigDecimal.valueOf(_intSVValues[i])).stripTrailingZeros();
_stringSVValues[i] = df.format(_intSVValues[i] * RANDOM.nextDouble());
_jsonSVValues[i] = String.format(
"{\"intVal\":%s, \"longVal\":%s, \"floatVal\":%s, \"doubleVal\":%s,
\"bigDecimalVal\":%s, "
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
index be3b3a3abe..93e15fab2e 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
@@ -18,112 +18,123 @@
*/
package org.apache.pinot.segment.local.recordtransformer;
-import com.google.common.annotations.VisibleForTesting;
+import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.Set;
+import javax.annotation.Nullable;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.recordtransformer.RecordTransformer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* The {@code SpecialValueTransformer} class will transform special values
according to the following rules:
* <ul>
- * <li>Negative zero (-0.0) should be converted to 0.0</li>
- * <li>NaN should be converted to default null</li>
+ * <li>
+ * For FLOAT and DOUBLE:
+ * <ul>
+ * <li>Negative zero (-0.0) should be converted to 0.0</li>
+ * <li>NaN should be converted to default null</li>
+ * </ul>
+ * </li>
+ * <li>
+ * For BIG_DECIMAL:
+ * <ul>
+ * <li>Strip trailing zeros</li>
+ * </ul>
+ * </li>
* </ul>
+ * <p>This transformation is required to ensure that the value is equal to
itself, and the ordering of the values is
+ * consistent with equals. This is required for certain data structures (e.g.
sorted map) and algorithm (e.g. binary
+ * search) to work correctly. Read more about it in {@link Comparable}.
* <p>NOTE: should put this after the {@link DataTypeTransformer} so that we
already have the values complying
* with the schema before handling special values and before {@link
NullValueTransformer} so that it transforms
* all the null values properly.
*/
public class SpecialValueTransformer implements RecordTransformer {
+ private final static int NEGATIVE_ZERO_FLOAT_BITS =
Float.floatToRawIntBits(-0.0f);
+ private final static long NEGATIVE_ZERO_DOUBLE_BITS =
Double.doubleToLongBits(-0.0d);
- private static final Logger LOGGER =
LoggerFactory.getLogger(SpecialValueTransformer.class);
- private final HashSet<String> _specialValuesKeySet = new HashSet<>();
- private int _negativeZeroConversionCount = 0;
- private int _nanConversionCount = 0;
+ private final Set<String> _columnsToCheck = new HashSet<>();
public SpecialValueTransformer(Schema schema) {
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
- if (!fieldSpec.isVirtualColumn() && (fieldSpec.getDataType() ==
DataType.FLOAT
- || fieldSpec.getDataType() == DataType.DOUBLE)) {
- _specialValuesKeySet.add(fieldSpec.getName());
+ if (!fieldSpec.isVirtualColumn()) {
+ DataType dataType = fieldSpec.getDataType();
+ if (dataType == DataType.FLOAT || dataType == DataType.DOUBLE
+ || (dataType == DataType.BIG_DECIMAL &&
!fieldSpec.isAllowTrailingZeros())) {
+ _columnsToCheck.add(fieldSpec.getName());
+ }
}
}
}
- private Object transformNegativeZero(Object value) {
- if ((value instanceof Float) && (Float.floatToRawIntBits((float) value) ==
Float.floatToRawIntBits(-0.0f))) {
- value = 0.0f;
- _negativeZeroConversionCount++;
- } else if ((value instanceof Double) && (Double.doubleToLongBits((double)
value) == Double.doubleToLongBits(
- -0.0d))) {
- value = 0.0d;
- _negativeZeroConversionCount++;
- }
- return value;
- }
-
- private Object transformNaN(Object value) {
- if ((value instanceof Float) && ((Float) value).isNaN()) {
- value = null;
- _nanConversionCount++;
- } else if ((value instanceof Double) && ((Double) value).isNaN()) {
- _nanConversionCount++;
- value = null;
- }
- return value;
- }
-
@Override
public boolean isNoOp() {
- return _specialValuesKeySet.isEmpty();
+ return _columnsToCheck.isEmpty();
}
@Override
public GenericRow transform(GenericRow record) {
- for (String element : _specialValuesKeySet) {
- Object value = record.getValue(element);
+ for (String column : _columnsToCheck) {
+ Object value = record.getValue(column);
if (value instanceof Object[]) {
// Multi-valued column.
Object[] values = (Object[]) value;
- int numValues = values.length;
- List<Object> negativeZeroNanSanitizedValues = new
ArrayList<>(numValues);
- for (Object o : values) {
- Object zeroTransformedValue = transformNegativeZero(o);
- Object nanTransformedValue = transformNaN(zeroTransformedValue);
- if (nanTransformedValue != null) {
- negativeZeroNanSanitizedValues.add(nanTransformedValue);
+ List<Object> transformedValues = new ArrayList<>(values.length);
+ boolean transformed = false;
+ for (Object v : values) {
+ Object transformedValue = transformValue(v);
+ if (transformedValue != v) {
+ transformed = true;
+ }
+ if (transformedValue != null) {
+ transformedValues.add(transformedValue);
+ }
+ if (transformed) {
+ record.putValue(column, !transformedValues.isEmpty() ?
transformedValues.toArray() : null);
}
}
- record.putValue(element, negativeZeroNanSanitizedValues.toArray());
- } else {
+ } else if (value != null) {
// Single-valued column.
- Object zeroTransformedValue = transformNegativeZero(value);
- Object nanTransformedValue = transformNaN(zeroTransformedValue);
- if (nanTransformedValue != value) {
- record.putValue(element, nanTransformedValue);
+ Object transformedValue = transformValue(value);
+ if (transformedValue != value) {
+ record.putValue(column, transformedValue);
}
}
}
- if (_negativeZeroConversionCount > 0 || _nanConversionCount > 0) {
- LOGGER.debug("Converted {} -0.0s to 0.0 and {} NaNs to null",
_negativeZeroConversionCount, _nanConversionCount);
- }
return record;
}
- @VisibleForTesting
- int getNegativeZeroConversionCount() {
- return _negativeZeroConversionCount;
- }
-
- @VisibleForTesting
- int getNanConversionCount() {
- return _nanConversionCount;
+ @Nullable
+ private Object transformValue(Object value) {
+ if (value instanceof Float) {
+ Float floatValue = (Float) value;
+ if (floatValue.isNaN()) {
+ return null;
+ }
+ if (Float.floatToRawIntBits(floatValue) == NEGATIVE_ZERO_FLOAT_BITS) {
+ return 0.0f;
+ }
+ } else if (value instanceof Double) {
+ Double doubleValue = (Double) value;
+ if (doubleValue.isNaN()) {
+ return null;
+ }
+ if (Double.doubleToRawLongBits(doubleValue) ==
NEGATIVE_ZERO_DOUBLE_BITS) {
+ return 0.0d;
+ }
+ } else if (value instanceof BigDecimal) {
+ BigDecimal bigDecimalValue = (BigDecimal) value;
+ BigDecimal stripped = bigDecimalValue.stripTrailingZeros();
+ if (!stripped.equals(bigDecimalValue)) {
+ return stripped;
+ }
+ }
+ return value;
}
}
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index fb2d604ce9..f220385dd4 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.recordtransformer;
+import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
@@ -48,23 +49,35 @@ import static org.testng.Assert.*;
public class RecordTransformerTest {
private static final Schema SCHEMA = new Schema.SchemaBuilder()
// For data type conversion
- .addSingleValueDimension("svInt",
DataType.INT).addSingleValueDimension("svLong", DataType.LONG)
- .addSingleValueDimension("svFloat",
DataType.FLOAT).addSingleValueDimension("svDouble", DataType.DOUBLE)
- .addSingleValueDimension("svBoolean",
DataType.BOOLEAN).addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
- .addSingleValueDimension("svBytes",
DataType.BYTES).addMultiValueDimension("mvInt", DataType.INT)
- .addSingleValueDimension("svJson",
DataType.JSON).addMultiValueDimension("mvLong", DataType.LONG)
- .addMultiValueDimension("mvFloat",
DataType.FLOAT).addMultiValueDimension("mvDouble", DataType.DOUBLE)
+ .addSingleValueDimension("svInt", DataType.INT)
+ .addSingleValueDimension("svLong", DataType.LONG)
+ .addSingleValueDimension("svFloat", DataType.FLOAT)
+ .addSingleValueDimension("svDouble", DataType.DOUBLE)
+ .addSingleValueDimension("svBoolean", DataType.BOOLEAN)
+ .addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
+ .addSingleValueDimension("svBytes", DataType.BYTES)
+ .addMultiValueDimension("mvInt", DataType.INT)
+ .addSingleValueDimension("svJson", DataType.JSON)
+ .addMultiValueDimension("mvLong", DataType.LONG)
+ .addMultiValueDimension("mvFloat", DataType.FLOAT)
+ .addMultiValueDimension("mvDouble", DataType.DOUBLE)
// For sanitation
.addSingleValueDimension("svStringWithNullCharacters", DataType.STRING)
.addSingleValueDimension("svStringWithLengthLimit", DataType.STRING)
- .addMultiValueDimension("mvString1",
DataType.STRING).addMultiValueDimension("mvString2", DataType.STRING)
+ .addMultiValueDimension("mvString1", DataType.STRING)
+ .addMultiValueDimension("mvString2", DataType.STRING)
// For negative zero and NaN conversions
.addSingleValueDimension("svFloatNegativeZero", DataType.FLOAT)
.addMultiValueDimension("mvFloatNegativeZero", DataType.FLOAT)
.addSingleValueDimension("svDoubleNegativeZero", DataType.DOUBLE)
.addMultiValueDimension("mvDoubleNegativeZero", DataType.DOUBLE)
- .addSingleValueDimension("svFloatNaN",
DataType.FLOAT).addMultiValueDimension("mvFloatNaN", DataType.FLOAT)
- .addSingleValueDimension("svDoubleNaN",
DataType.DOUBLE).addMultiValueDimension("mvDoubleNaN", DataType.DOUBLE)
+ .addSingleValueDimension("svFloatNaN", DataType.FLOAT)
+ .addMultiValueDimension("mvFloatNaN", DataType.FLOAT)
+ .addSingleValueDimension("svDoubleNaN", DataType.DOUBLE)
+ .addMultiValueDimension("mvDoubleNaN", DataType.DOUBLE)
+ .addMetric("bigDecimalZero", DataType.BIG_DECIMAL)
+ .addMetric("bigDecimalZeroWithPoint", DataType.BIG_DECIMAL)
+ .addMetric("bigDecimalZeroWithExponent", DataType.BIG_DECIMAL)
.build();
private static final TableConfig TABLE_CONFIG =
new
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").build();
@@ -105,6 +118,9 @@ public class RecordTransformerTest {
record.putValue("svDoubleNaN", Double.NaN);
record.putValue("mvFloatNaN", new Float[]{-0.0f, Float.NaN, 2.0f});
record.putValue("mvDoubleNaN", new Double[]{-0.0d, Double.NaN, 2.0d});
+ record.putValue("bigDecimalZero", new BigDecimal("0"));
+ record.putValue("bigDecimalZeroWithPoint", new BigDecimal("0.0"));
+ record.putValue("bigDecimalZeroWithExponent", new BigDecimal("0E-18"));
return record;
}
@@ -187,7 +203,8 @@ public class RecordTransformerTest {
@Test
public void testDataTypeTransformerIncorrectDataTypes() {
Schema schema = new
Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.BYTES)
- .addSingleValueDimension("svLong", DataType.LONG).build();
+ .addSingleValueDimension("svLong", DataType.LONG)
+ .build();
RecordTransformer transformer = new DataTypeTransformer(TABLE_CONFIG,
schema);
GenericRow record = getRecord();
@@ -365,8 +382,7 @@ public class RecordTransformerTest {
// scenario where json field exceeds max length and fieldSpec
maxLengthExceedStrategy is to NO_ACTION
schema = SCHEMA;
schema.getFieldSpecFor("svJson").setMaxLength(10);
- schema.getFieldSpecFor("svJson")
-
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
+
schema.getFieldSpecFor("svJson").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
transformer = new SanitizationTransformer(schema);
record = getRecord();
for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -378,8 +394,7 @@ public class RecordTransformerTest {
// scenario where json field exceeds max length and fieldSpec
maxLengthExceedStrategy is to TRIM_LENGTH
schema = SCHEMA;
schema.getFieldSpecFor("svJson").setMaxLength(10);
- schema.getFieldSpecFor("svJson")
-
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
+
schema.getFieldSpecFor("svJson").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
transformer = new SanitizationTransformer(schema);
record = getRecord();
for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -417,17 +432,16 @@ public class RecordTransformerTest {
record = transformer.transform(record);
} catch (Exception e) {
assertTrue(e instanceof IllegalStateException);
- assertEquals(e.getMessage(), "Throwing exception as value: "
- + "{\"first\": \"daffy\", \"last\": \"duck\"} for column "
- + "svJson exceeds configured max length 10.");
+ assertEquals(e.getMessage(),
+ "Throwing exception as value: {\"first\": \"daffy\", \"last\":
\"duck\"} for column svJson exceeds "
+ + "configured max length 10.");
}
}
// scenario where bytes field exceeds max length and fieldSpec
maxLengthExceedStrategy is to NO_ACTION
schema = SCHEMA;
schema.getFieldSpecFor("svBytes").setMaxLength(2);
- schema.getFieldSpecFor("svBytes")
-
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
+
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.NO_ACTION);
transformer = new SanitizationTransformer(schema);
record = getRecord();
for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -439,8 +453,7 @@ public class RecordTransformerTest {
// scenario where bytes field exceeds max length and fieldSpec
maxLengthExceedStrategy is to TRIM_LENGTH
schema = SCHEMA;
schema.getFieldSpecFor("svBytes").setMaxLength(2);
- schema.getFieldSpecFor("svBytes")
-
.setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
+
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.TRIM_LENGTH);
transformer = new SanitizationTransformer(schema);
record = getRecord();
for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -468,8 +481,7 @@ public class RecordTransformerTest {
// scenario where bytes field exceeds max length and fieldSpec
maxLengthExceedStrategy is to ERROR
schema = SCHEMA;
schema.getFieldSpecFor("svBytes").setMaxLength(2);
- schema.getFieldSpecFor("svBytes")
- .setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.ERROR);
+
schema.getFieldSpecFor("svBytes").setMaxLengthExceedStrategy(FieldSpec.MaxLengthExceedStrategy.ERROR);
transformer = new SanitizationTransformer(schema);
record = getRecord();
for (int i = 0; i < NUM_ROUNDS; i++) {
@@ -477,8 +489,8 @@ public class RecordTransformerTest {
record = transformer.transform(record);
} catch (Exception e) {
assertTrue(e instanceof IllegalStateException);
- assertEquals(e.getMessage(), "Throwing exception as value: 7b7b for
column svBytes "
- + "exceeds configured max length 2.");
+ assertEquals(e.getMessage(),
+ "Throwing exception as value: 7b7b for column svBytes exceeds
configured max length 2.");
}
}
}
@@ -500,8 +512,9 @@ public class RecordTransformerTest {
assertNull(record.getValue("svDoubleNaN"));
assertEquals(record.getValue("mvFloatNaN"), new Float[]{0.0f, 2.0f});
assertEquals(record.getValue("mvDoubleNaN"), new Double[]{0.0d, 2.0d});
- assertEquals(transformer.getNegativeZeroConversionCount(), 6);
- assertEquals(transformer.getNanConversionCount(), 4);
+ assertEquals(record.getValue("bigDecimalZero"), BigDecimal.ZERO);
+ assertEquals(record.getValue("bigDecimalZeroWithPoint"),
BigDecimal.ZERO);
+ assertEquals(record.getValue("bigDecimalZeroWithExponent"),
BigDecimal.ZERO);
}
}
@@ -513,21 +526,24 @@ public class RecordTransformerTest {
Schema schema = new
Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT)
.addSingleValueDimension("svDouble", DataType.DOUBLE)
.addSingleValueDimension("expressionTestColumn", DataType.INT)
- .addSingleValueDimension("svNaN",
DataType.FLOAT).addMultiValueDimension("mvNaN", DataType.FLOAT)
+ .addSingleValueDimension("svNaN", DataType.FLOAT)
+ .addMultiValueDimension("mvNaN", DataType.FLOAT)
.addSingleValueDimension("emptyDimensionForNullValueTransformer",
DataType.FLOAT)
.addSingleValueDimension("svStringNull", DataType.STRING)
.addSingleValueDimension("indexableExtras", DataType.JSON)
- .addDateTime("timeCol", DataType.TIMESTAMP,
"1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS").build();
+ .addDateTime("timeCol", DataType.TIMESTAMP,
"1:MILLISECONDS:TIMESTAMP", "1:MILLISECONDS")
+ .build();
IngestionConfig ingestionConfig = new IngestionConfig();
- TableConfig tableConfig =
- new
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setIngestionConfig(ingestionConfig)
- .setTimeColumnName("timeCol").build();
+ TableConfig tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName("testTable")
+ .setIngestionConfig(ingestionConfig)
+ .setTimeColumnName("timeCol")
+ .build();
ingestionConfig.setFilterConfig(new FilterConfig("svInt = 123 AND svDouble
<= 200"));
ingestionConfig.setTransformConfigs(List.of(new
TransformConfig("expressionTestColumn", "plus(x,10)")));
ingestionConfig.setSchemaConformingTransformerConfig(
- new SchemaConformingTransformerConfig(null, "indexableExtras", false,
null, null, null, null, null,
- null, null, null, null, null, null, null, null, null, null, null,
null, null, null));
+ new SchemaConformingTransformerConfig(null, "indexableExtras", false,
null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null, null, null, null,
null));
ingestionConfig.setRowTimeValueCheck(true);
ingestionConfig.setContinueOnError(false);
@@ -858,10 +874,8 @@ public class RecordTransformerTest {
assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d,
1.0d, 0.0d, 3.0d});
assertEquals(record.getValue("svFloatNaN"),
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
assertEquals(record.getValue("svDoubleNaN"),
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
- assertEquals(record.getValue("mvFloatNaN"),
- new Float[]{0.0f, 2.0f});
- assertEquals(record.getValue("mvDoubleNaN"),
- new Double[]{0.0d, 2.0d});
+ assertEquals(record.getValue("mvFloatNaN"), new Float[]{0.0f, 2.0f});
+ assertEquals(record.getValue("mvDoubleNaN"), new Double[]{0.0d, 2.0d});
assertEquals(new ArrayList<>(record.getNullValueFields()),
new ArrayList<>(Arrays.asList("svFloatNaN", "svDoubleNaN")));
}
diff --git
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
index 0637abd7fa..c98fcd5cf3 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/DimensionFieldSpec.java
@@ -71,7 +71,7 @@ public final class DimensionFieldSpec extends FieldSpec {
@Override
public String toString() {
return "< field type: DIMENSION, field name: " + _name + ", data type: " +
_dataType + ", is single-value field: "
- + _isSingleValueField + ", default null value: " + _defaultNullValue +
", max length exceed strategy: "
+ + _singleValueField + ", default null value: " + _defaultNullValue +
", max length exceed strategy: "
+ _maxLengthExceedStrategy + " >";
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index b56ef1c829..5d29412f5e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -31,6 +31,7 @@ import java.sql.Timestamp;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import javax.annotation.Nullable;
import org.apache.pinot.spi.utils.BooleanUtils;
import org.apache.pinot.spi.utils.ByteArray;
@@ -42,17 +43,18 @@ import org.apache.pinot.spi.utils.TimestampUtils;
/**
* The <code>FieldSpec</code> class contains all specs related to any field
(column) in {@link Schema}.
- * <p>There are 3 types of <code>FieldSpec</code>:
- * {@link DimensionFieldSpec}, {@link MetricFieldSpec}, {@link TimeFieldSpec}
* <p>Specs stored are as followings:
- * <p>- <code>Name</code>: name of the field.
- * <p>- <code>DataType</code>: type of the data stored (e.g. INTEGER, LONG,
FLOAT, DOUBLE, STRING).
- * <p>- <code>IsSingleValueField</code>: single-value or multi-value field.
- * <p>- <code>DefaultNullValue</code>: when no value found for this field, use
this value. Stored in string format.
- * <p>- <code>VirtualColumnProvider</code>: the virtual column provider to use
for this field.
- * <p>- <code>NotNull</code>: whether the column accepts nulls or not.
Defaults to false.
- * <p>- <code>MaxLength</code>: the maximum length of the string column.
Defaults to 512.
- * <p>- <code>MaxLengthExceedStrategy</code>: the strategy to handle the case
when the string column exceeds the max
+ * <ul>
+ * <li>"name": name of the field.</li>
+ * <li>"dataType": type of the data stored (e.g. INTEGER, LONG, FLOAT,
DOUBLE, STRING).</li>
+ * <li>"singleValueField": single-value or multi-value field.</li>
+ * <li>"notNull": whether the column accepts nulls or not. Defaults to false
(accepts nulls).</li>
+ * <li>"maxLength": maximum length of the column. Defaults to 512.</li>
+ * <li>"maxLengthExceedStrategy": the strategy to handle the case when the
column exceeds the max length.</li>
+ * <li>"allowTrailingZeros": whether to allow trailing zeros for a
BIG_DECIMAL column.</li>
+ * <li>"defaultNullValue": when no value found for this field, use this
value.</li>
+ * <li>"virtualColumnProvider": the virtual column provider to use for this
field.</li>
+ * </ul>
*/
@SuppressWarnings("unused")
@JsonTypeInfo(
@@ -126,15 +128,19 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
protected String _name;
protected DataType _dataType;
- protected boolean _isSingleValueField = true;
- protected boolean _notNull = false;
+ protected boolean _singleValueField = true;
+ protected boolean _notNull;
- // NOTE: This only applies to STRING column, which is the max number of
characters
- private int _maxLength = DEFAULT_MAX_LENGTH;
-
- // NOTE: This only applies to STRING column during {@link
SanitizationTransformer}
+ // Max length applies to STRING, JSON, BYTES columns, and is enforced in
{@link SanitizationTransformer}.
+ protected int _maxLength = DEFAULT_MAX_LENGTH;
protected MaxLengthExceedStrategy _maxLengthExceedStrategy;
+ // Whether to allow trailing zeros for BIG_DECIMAL columns. Trailing zeros
are stripped by default in
+ // {@link SpecialValueTransformer}. If this flag is set to true, trailing
zeros will be preserved, and it is users'
+ // responsibility to ensure there are no big decimal values with same value
but different trailing zeros. Read more
+ // about why trailing zeros need to be stripped in {@link
SpecialValueTransformer}.
+ protected boolean _allowTrailingZeros;
+
protected Object _defaultNullValue;
private transient String _stringDefaultNullValue;
@@ -165,7 +171,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
@Nullable Object defaultNullValue, @Nullable MaxLengthExceedStrategy
maxLengthExceedStrategy) {
_name = name;
_dataType = dataType;
- _isSingleValueField = isSingleValueField;
+ _singleValueField = isSingleValueField;
_maxLength = maxLength;
setDefaultNullValue(defaultNullValue);
_maxLengthExceedStrategy = maxLengthExceedStrategy;
@@ -193,12 +199,37 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
}
public boolean isSingleValueField() {
- return _isSingleValueField;
+ return _singleValueField;
}
// Required by JSON de-serializer. DO NOT REMOVE.
public void setSingleValueField(boolean isSingleValueField) {
- _isSingleValueField = isSingleValueField;
+ _singleValueField = isSingleValueField;
+ }
+
+ /**
+ * Returns whether the column is nullable or not.
+ */
+ @JsonIgnore
+ public boolean isNullable() {
+ return !_notNull;
+ }
+
+ /**
+ * @see #isNullable()
+ */
+ @JsonIgnore
+ public void setNullable(Boolean nullable) {
+ _notNull = !nullable;
+ }
+
+ public boolean isNotNull() {
+ return _notNull;
+ }
+
+ // Required by JSON de-serializer. DO NOT REMOVE.
+ public void setNotNull(boolean notNull) {
+ _notNull = notNull;
}
public int getMaxLength() {
@@ -220,22 +251,13 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
_maxLengthExceedStrategy = maxLengthExceedStrategy;
}
- public String getVirtualColumnProvider() {
- return _virtualColumnProvider;
- }
-
- public void setVirtualColumnProvider(String virtualColumnProvider) {
- _virtualColumnProvider = virtualColumnProvider;
+ public boolean isAllowTrailingZeros() {
+ return _allowTrailingZeros;
}
- /**
- * Returns whether the column is virtual. Virtual columns are constructed
while loading the segment, thus do not exist
- * in the record, nor should be persisted to the disk.
- * <p>Identify a column as virtual if the virtual column provider is
configured.
- */
- @JsonIgnore
- public boolean isVirtualColumn() {
- return _virtualColumnProvider != null && !_virtualColumnProvider.isEmpty();
+ // Required by JSON de-serializer. DO NOT REMOVE.
+ public void setAllowTrailingZeros(boolean allowTrailingZeros) {
+ _allowTrailingZeros = allowTrailingZeros;
}
public Object getDefaultNullValue() {
@@ -248,7 +270,7 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
/**
* Helper method to return the String value for the given object.
- * This is required as not all data types have a toString() (eg byte[]).
+ * This is required as not all data types have a toString() (e.g. byte[]).
*
* @param value Value for which String value needs to be returned
* @return String value for the object.
@@ -350,38 +372,32 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
return _transformFunction;
}
- // Required by JSON de-serializer. DO NOT REMOVE.
-
/**
* Deprecated. Use TableConfig -> IngestionConfig -> TransformConfigs
*/
+ // Required by JSON de-serializer. DO NOT REMOVE.
@Deprecated
public void setTransformFunction(@Nullable String transformFunction) {
_transformFunction = transformFunction;
}
- /**
- * Returns whether the column is nullable or not.
- */
- @JsonIgnore
- public boolean isNullable() {
- return !_notNull;
+ public String getVirtualColumnProvider() {
+ return _virtualColumnProvider;
+ }
+
+ // Required by JSON de-serializer. DO NOT REMOVE.
+ public void setVirtualColumnProvider(String virtualColumnProvider) {
+ _virtualColumnProvider = virtualColumnProvider;
}
/**
- * @see #isNullable()
+ * Returns whether the column is virtual. Virtual columns are constructed
while loading the segment, thus do not exist
+ * in the record, nor should be persisted to the disk.
+ * <p>Identify a column as virtual if the virtual column provider is
configured.
*/
@JsonIgnore
- public void setNullable(Boolean nullable) {
- _notNull = !nullable;
- }
-
- public boolean isNotNull() {
- return _notNull;
- }
-
- public void setNotNull(boolean notNull) {
- _notNull = notNull;
+ public boolean isVirtualColumn() {
+ return _virtualColumnProvider != null && !_virtualColumnProvider.isEmpty();
}
/**
@@ -394,15 +410,26 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
jsonObject.put("name", _name);
jsonObject.put("dataType", _dataType.name());
jsonObject.put("fieldType", getFieldType().toString());
- if (!_isSingleValueField) {
+ if (!_singleValueField) {
jsonObject.put("singleValueField", false);
}
+ if (_notNull) {
+ jsonObject.put("notNull", true);
+ }
if (_maxLength != DEFAULT_MAX_LENGTH) {
jsonObject.put("maxLength", _maxLength);
}
+ if (_maxLengthExceedStrategy != null) {
+ jsonObject.put("maxLengthExceedStrategy",
_maxLengthExceedStrategy.name());
+ }
+ if (_allowTrailingZeros) {
+ jsonObject.put("allowTrailingZeros", true);
+ }
appendDefaultNullValue(jsonObject);
appendTransformFunction(jsonObject);
- jsonObject.put("notNull", _notNull);
+ if (_virtualColumnProvider != null) {
+ jsonObject.put("virtualColumnProvider", _virtualColumnProvider);
+ }
return jsonObject;
}
@@ -457,39 +484,31 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
}
}
- @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
@Override
public boolean equals(Object o) {
- if (EqualityUtils.isSameReference(this, o)) {
+ if (this == o) {
return true;
}
-
- if (EqualityUtils.isNullOrNotSameClass(this, o)) {
+ if (o == null || getClass() != o.getClass()) {
return false;
}
-
FieldSpec that = (FieldSpec) o;
- return EqualityUtils.isEqual(_name, that._name) &&
EqualityUtils.isEqual(_dataType, that._dataType) && EqualityUtils
- .isEqual(_isSingleValueField, that._isSingleValueField) &&
EqualityUtils
- .isEqual(getStringValue(_defaultNullValue),
getStringValue(that._defaultNullValue)) && EqualityUtils
- .isEqual(_maxLength, that._maxLength) &&
EqualityUtils.isEqual(_transformFunction, that._transformFunction)
- && EqualityUtils.isEqual(_maxLengthExceedStrategy,
that._maxLengthExceedStrategy)
- && EqualityUtils.isEqual(_virtualColumnProvider,
that._virtualColumnProvider)
- && EqualityUtils.isEqual(_notNull, that._notNull);
+ return _name.equals(that._name)
+ && _dataType == that._dataType
+ && _singleValueField == that._singleValueField
+ && _notNull == that._notNull
+ && _maxLength == that._maxLength
+ && _maxLengthExceedStrategy == that._maxLengthExceedStrategy
+ && _allowTrailingZeros == that._allowTrailingZeros
+ &&
getStringValue(_defaultNullValue).equals(getStringValue(that._defaultNullValue))
+ && Objects.equals(_transformFunction, that._transformFunction)
+ && Objects.equals(_virtualColumnProvider, that._virtualColumnProvider);
}
@Override
public int hashCode() {
- int result = EqualityUtils.hashCodeOf(_name);
- result = EqualityUtils.hashCodeOf(result, _dataType);
- result = EqualityUtils.hashCodeOf(result, _isSingleValueField);
- result = EqualityUtils.hashCodeOf(result,
getStringValue(_defaultNullValue));
- result = EqualityUtils.hashCodeOf(result, _maxLength);
- result = EqualityUtils.hashCodeOf(result, _maxLengthExceedStrategy);
- result = EqualityUtils.hashCodeOf(result, _transformFunction);
- result = EqualityUtils.hashCodeOf(result, _virtualColumnProvider);
- result = EqualityUtils.hashCodeOf(result, _notNull);
- return result;
+ return Objects.hash(_name, _dataType, _singleValueField, _notNull,
_maxLength, _maxLengthExceedStrategy,
+ _allowTrailingZeros, getStringValue(_defaultNullValue),
_transformFunction, _virtualColumnProvider);
}
/**
@@ -748,10 +767,9 @@ public abstract class FieldSpec implements
Comparable<FieldSpec>, Serializable {
* @return
*/
public boolean isBackwardCompatibleWith(FieldSpec oldFieldSpec) {
-
return EqualityUtils.isEqual(_name, oldFieldSpec._name)
&& EqualityUtils.isEqual(_dataType, oldFieldSpec._dataType)
- && EqualityUtils.isEqual(_isSingleValueField,
oldFieldSpec._isSingleValueField);
+ && EqualityUtils.isEqual(_singleValueField,
oldFieldSpec._singleValueField);
}
public static class FieldSpecMetadata {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]