This is an automated email from the ASF dual-hosted git repository.
amashenkov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new 83e8a2a0d5 IGNITE-22061 Use constant value when estimating decimal
size (#3837)
83e8a2a0d5 is described below
commit 83e8a2a0d582e71b6b7e857e742c367d06dcba7a
Author: Pavel Pereslegin <[email protected]>
AuthorDate: Tue Jun 4 11:28:29 2024 +0300
IGNITE-22061 Use constant value when estimating decimal size (#3837)
---
.../internal/binarytuple/BinaryTupleBuilder.java | 11 +---
.../internal/binarytuple/BinaryTupleCommon.java | 23 +++++++
.../marshaller/reflection/ObjectStatistics.java | 43 +++++++++---
.../ignite/internal/schema/row/RowAssembler.java | 14 +++-
.../schema/marshaller/TupleMarshallerImpl.java | 36 +++++++++-
.../marshaller/TupleMarshallerStatisticsTest.java | 76 ++++++++++++++++++++++
6 files changed, 179 insertions(+), 24 deletions(-)
diff --git
a/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleBuilder.java
b/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleBuilder.java
index 0bf09c2f60..d6c19d7ae0 100644
---
a/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleBuilder.java
+++
b/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleBuilder.java
@@ -19,7 +19,6 @@ package org.apache.ignite.internal.binarytuple;
import java.math.BigDecimal;
import java.math.BigInteger;
-import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
@@ -315,15 +314,7 @@ public class BinaryTupleBuilder {
* @return {@code this} for chaining.
*/
public BinaryTupleBuilder appendDecimalNotNull(BigDecimal value, int
scale) {
- if (value.scale() > scale) {
- value = value.setScale(scale, RoundingMode.HALF_UP);
- }
-
- BigDecimal noZeros = value.stripTrailingZeros();
- if (noZeros.scale() <= Short.MAX_VALUE && noZeros.scale() >=
Short.MIN_VALUE) {
- // Use more compact representation if possible.
- value = noZeros;
- }
+ value = BinaryTupleCommon.shrinkDecimal(value, scale);
// See CatalogUtils.MAX_DECIMAL_SCALE = Short.MAX_VALUE
if (value.scale() > Short.MAX_VALUE) {
diff --git
a/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleCommon.java
b/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleCommon.java
index 45f71d8f24..9f97c8e16c 100644
---
a/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleCommon.java
+++
b/modules/binary-tuple/src/main/java/org/apache/ignite/internal/binarytuple/BinaryTupleCommon.java
@@ -17,6 +17,8 @@
package org.apache.ignite.internal.binarytuple;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
import org.apache.ignite.internal.lang.IgniteInternalException;
/**
@@ -100,4 +102,25 @@ public class BinaryTupleCommon {
throw new IgniteInternalException("Too big binary tuple size");
}
+
+ /**
+ * Converts specified {@link BigDecimal} value to a more compact form, if
possible.
+ *
+ * @param value Field value.
+ * @param scale Maximum scale.
+ * @return Decimal with a scale reduced to the specified scale and trimmed
trailing zeros.
+ */
+ public static BigDecimal shrinkDecimal(BigDecimal value, int scale) {
+ if (value.scale() > scale) {
+ value = value.setScale(scale, RoundingMode.HALF_UP);
+ }
+
+ BigDecimal noZeros = value.stripTrailingZeros();
+ if (noZeros.scale() <= Short.MAX_VALUE && noZeros.scale() >=
Short.MIN_VALUE) {
+ // Use more compact representation if possible.
+ return noZeros;
+ }
+
+ return value;
+ }
}
diff --git
a/modules/schema/src/main/java/org/apache/ignite/internal/schema/marshaller/reflection/ObjectStatistics.java
b/modules/schema/src/main/java/org/apache/ignite/internal/schema/marshaller/reflection/ObjectStatistics.java
index cc8f3c4aa6..6cb1f8c313 100644
---
a/modules/schema/src/main/java/org/apache/ignite/internal/schema/marshaller/reflection/ObjectStatistics.java
+++
b/modules/schema/src/main/java/org/apache/ignite/internal/schema/marshaller/reflection/ObjectStatistics.java
@@ -19,12 +19,15 @@ package
org.apache.ignite.internal.schema.marshaller.reflection;
import static
org.apache.ignite.internal.schema.marshaller.MarshallerUtil.getValueSize;
+import java.math.BigDecimal;
import java.util.List;
+import org.apache.ignite.internal.binarytuple.BinaryTupleCommon;
import org.apache.ignite.internal.marshaller.Marshaller;
import org.apache.ignite.internal.schema.Column;
import org.apache.ignite.internal.schema.SchemaDescriptor;
import org.apache.ignite.internal.schema.row.RowAssembler;
import org.apache.ignite.internal.type.NativeType;
+import org.apache.ignite.internal.type.NativeTypeSpec;
import org.jetbrains.annotations.Nullable;
/**
@@ -32,20 +35,35 @@ import org.jetbrains.annotations.Nullable;
*/
class ObjectStatistics {
/** Cached zero statistics. */
- private static final ObjectStatistics ZERO_STATISTICS = new
ObjectStatistics(-1);
+ private static final ObjectStatistics ZERO_STATISTICS = new
ObjectStatistics(-1, false);
+
+ /**
+ * The value is used to avoid unnecessary buffer reallocation when building
+ * a binary row. The exact size of the decimal value can be determined
after
+ * compaction (see {@link BinaryTupleCommon#shrinkDecimal(BigDecimal,
int)}.
+ */
+ private static final int DECIMAL_VALUE_ESTIMATED_SIZE = 26;
/** Estimated total size of the object. */
private final int estimatedValueSize;
+ /** flag indicating that the size is calculated exactly or approximately.
*/
+ private final boolean exactEstimation;
+
/** Constructor. */
- private ObjectStatistics(int estimatedValueSize) {
+ private ObjectStatistics(int estimatedValueSize, boolean exactEstimation) {
this.estimatedValueSize = estimatedValueSize;
+ this.exactEstimation = exactEstimation;
}
- private int getEstimatedValueSize() {
+ private int estimatedValueSize() {
return estimatedValueSize;
}
+ private boolean exactEstimation() {
+ return exactEstimation;
+ }
+
/**
* Reads object fields and gather statistic.
*/
@@ -55,6 +73,7 @@ class ObjectStatistics {
}
int estimatedValueSize = 0;
+ boolean exactEstimation = true;
for (int i = 0; i < cols.size(); i++) {
Object val = marsh.value(obj, i);
@@ -70,19 +89,23 @@ class ObjectStatistics {
if (colType.spec().fixedLength()) {
estimatedValueSize += colType.sizeInBytes();
} else {
- estimatedValueSize += getValueSize(val, colType);
+ int valueSize = colType.spec() == NativeTypeSpec.DECIMAL ?
DECIMAL_VALUE_ESTIMATED_SIZE : getValueSize(val, colType);
+
+ exactEstimation = false;
+
+ estimatedValueSize += valueSize;
}
}
- return new ObjectStatistics(estimatedValueSize);
+ return new ObjectStatistics(estimatedValueSize, exactEstimation);
}
static RowAssembler createAssembler(SchemaDescriptor schema, Marshaller
keyMarsh, Object key) {
ObjectStatistics keyStat = collectObjectStats(schema.keyColumns(),
keyMarsh, key);
- int totalValueSize = keyStat.getEstimatedValueSize();
+ int totalValueSize = keyStat.estimatedValueSize();
- return new RowAssembler(schema.version(), schema.keyColumns(),
totalValueSize);
+ return new RowAssembler(schema.version(), schema.keyColumns(),
totalValueSize, keyStat.exactEstimation());
}
static RowAssembler createAssembler(
@@ -91,12 +114,12 @@ class ObjectStatistics {
ObjectStatistics valStat = collectObjectStats(schema.valueColumns(),
valMarsh, val);
int totalValueSize;
- if (keyStat.getEstimatedValueSize() < 0 ||
valStat.getEstimatedValueSize() < 0) {
+ if (keyStat.estimatedValueSize() < 0 || valStat.estimatedValueSize() <
0) {
totalValueSize = -1;
} else {
- totalValueSize = keyStat.getEstimatedValueSize() +
valStat.getEstimatedValueSize();
+ totalValueSize = keyStat.estimatedValueSize() +
valStat.estimatedValueSize();
}
- return new RowAssembler(schema, totalValueSize);
+ return new RowAssembler(schema.version(), schema.columns(),
totalValueSize, keyStat.exactEstimation() && valStat.exactEstimation());
}
}
diff --git
a/modules/schema/src/main/java/org/apache/ignite/internal/schema/row/RowAssembler.java
b/modules/schema/src/main/java/org/apache/ignite/internal/schema/row/RowAssembler.java
index 1ce7eb305f..20d24463e3 100644
---
a/modules/schema/src/main/java/org/apache/ignite/internal/schema/row/RowAssembler.java
+++
b/modules/schema/src/main/java/org/apache/ignite/internal/schema/row/RowAssembler.java
@@ -80,10 +80,22 @@ public class RowAssembler {
* @param totalValueSize Total estimated length of non-NULL values, -1 if
not known.
*/
public RowAssembler(int schemaVersion, List<Column> columns, int
totalValueSize) {
+ this(schemaVersion, columns, totalValueSize, true);
+ }
+
+ /**
+ * Create a builder.
+ *
+ * @param schemaVersion Version of the schema.
+ * @param columns List of columns to serialize. Values must be appended in
the same order.
+ * @param totalValueSize Total estimated length of non-NULL values, -1 if
not known.
+ * @param exactEstimate Whether the total size is exact estimate or
approximate.
+ */
+ public RowAssembler(int schemaVersion, List<Column> columns, int
totalValueSize, boolean exactEstimate) {
this.schemaVersion = schemaVersion;
this.columns = columns;
- builder = new BinaryTupleBuilder(columns.size(), totalValueSize);
+ builder = new BinaryTupleBuilder(columns.size(), totalValueSize,
exactEstimate);
curCol = 0;
}
diff --git
a/modules/table/src/main/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerImpl.java
b/modules/table/src/main/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerImpl.java
index 36b9b25e74..bf6c4afd3d 100644
---
a/modules/table/src/main/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerImpl.java
+++
b/modules/table/src/main/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerImpl.java
@@ -19,11 +19,13 @@ package org.apache.ignite.internal.schema.marshaller;
import static
org.apache.ignite.internal.schema.marshaller.MarshallerUtil.getValueSize;
+import java.math.BigDecimal;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.ignite.internal.binarytuple.BinaryTupleCommon;
import org.apache.ignite.internal.binarytuple.BinaryTupleContainer;
import org.apache.ignite.internal.binarytuple.BinaryTupleReader;
import org.apache.ignite.internal.schema.BinaryRowImpl;
@@ -34,9 +36,12 @@ import
org.apache.ignite.internal.schema.SchemaMismatchException;
import org.apache.ignite.internal.schema.SchemaVersionMismatchException;
import org.apache.ignite.internal.schema.row.Row;
import org.apache.ignite.internal.schema.row.RowAssembler;
+import org.apache.ignite.internal.type.DecimalNativeType;
import org.apache.ignite.internal.type.NativeType;
+import org.apache.ignite.internal.type.NativeTypeSpec;
import org.apache.ignite.table.Tuple;
import org.jetbrains.annotations.Nullable;
+import org.jetbrains.annotations.TestOnly;
/**
* Tuple marshaller implementation.
@@ -164,7 +169,7 @@ public class TupleMarshallerImpl implements TupleMarshaller
{
: Row.wrapBinaryRow(schema, rowBuilder.build());
}
- private void gatherStatistics(
+ void gatherStatistics(
List<Column> columns,
Tuple tuple,
ValuesWithStatistics targetTuple
@@ -187,21 +192,41 @@ public class TupleMarshallerImpl implements
TupleMarshaller {
}
col.validate(val);
- targetTuple.values.put(col.name(), val);
if (val != null) {
if (colType.spec().fixedLength()) {
estimatedValueSize += colType.sizeInBytes();
} else {
+ val = shrinkValue(val, col.type());
+
estimatedValueSize += getValueSize(val, colType);
}
}
+
+ targetTuple.values.put(col.name(), val);
}
targetTuple.estimatedValueSize += estimatedValueSize;
targetTuple.knownColumns += knownColumns;
}
+ /**
+ * Converts the passed value to a more compact form, if possible.
+ *
+ * @param value Field value.
+ * @param type Mapped type.
+ * @return Value in a more compact form, or the original value if it
cannot be compacted.
+ */
+ private static <T> T shrinkValue(T value, NativeType type) {
+ if (type.spec() == NativeTypeSpec.DECIMAL) {
+ assert type instanceof DecimalNativeType;
+
+ return (T) BinaryTupleCommon.shrinkDecimal((BigDecimal) value,
((DecimalNativeType) type).scale());
+ }
+
+ return value;
+ }
+
/**
* Extracts columns.
*
@@ -277,7 +302,7 @@ public class TupleMarshallerImpl implements TupleMarshaller
{
* Container to keep columns values and related statistics which help
* to build row with {@link RowAssembler}.
*/
- private static class ValuesWithStatistics {
+ static class ValuesWithStatistics {
private final Map<String, Object> values = new HashMap<>();
private int estimatedValueSize;
@@ -286,5 +311,10 @@ public class TupleMarshallerImpl implements
TupleMarshaller {
@Nullable Object value(String columnName) {
return values.get(columnName);
}
+
+ @TestOnly
+ int estimatedValueSize() {
+ return estimatedValueSize;
+ }
}
}
diff --git
a/modules/table/src/test/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerStatisticsTest.java
b/modules/table/src/test/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerStatisticsTest.java
new file mode 100644
index 0000000000..1c9224399c
--- /dev/null
+++
b/modules/table/src/test/java/org/apache/ignite/internal/schema/marshaller/TupleMarshallerStatisticsTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.schema.marshaller;
+
+import static
org.apache.ignite.internal.schema.marshaller.MarshallerUtil.sizeInBytes;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.is;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
+import org.apache.ignite.internal.binarytuple.BinaryTupleBuilder;
+import org.apache.ignite.internal.binarytuple.BinaryTupleCommon;
+import org.apache.ignite.internal.catalog.commands.CatalogUtils;
+import org.apache.ignite.internal.schema.Column;
+import org.apache.ignite.internal.schema.SchemaDescriptor;
+import
org.apache.ignite.internal.schema.marshaller.TupleMarshallerImpl.ValuesWithStatistics;
+import org.apache.ignite.internal.table.impl.TestTupleBuilder;
+import org.apache.ignite.internal.type.NativeTypes;
+import org.apache.ignite.table.Tuple;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+/**
+ * Tests binary tuple size estimation in {@link TupleMarshallerImpl}.
+ */
+public class TupleMarshallerStatisticsTest {
+ private static final int PRECISION = 10;
+
+ private static final int HUGE_DECIMAL_SCALE = Short.MAX_VALUE * 10;
+
+ /** The test checks that when calculating the tuple size, the passed
decimal value is compacted. */
+ @ParameterizedTest(name = "scale = {0}")
+ @ValueSource(ints = {3, 1024, CatalogUtils.MAX_DECIMAL_SCALE - PRECISION})
+ public void testDecimalSizeEstimation(int columnScale) {
+ SchemaDescriptor schema = new SchemaDescriptor(1,
+ new Column[]{new Column("KEY",
NativeTypes.decimalOf(PRECISION, columnScale), false)},
+ new Column[]{new Column("UNUSED", NativeTypes.INT32, true)});
+
+ TupleMarshallerImpl marshaller = new TupleMarshallerImpl(schema);
+
+ BigDecimal hugeScaledDecimal = new BigDecimal(1, new
MathContext(PRECISION))
+ .setScale(HUGE_DECIMAL_SCALE, RoundingMode.HALF_UP);
+ Tuple tuple = new TestTupleBuilder().set("KEY", hugeScaledDecimal);
+
+ assertThat(sizeInBytes(hugeScaledDecimal), is(greaterThan(16384)));
+
+ ValuesWithStatistics statistics = new ValuesWithStatistics();
+ marshaller.gatherStatistics(schema.keyColumns(), tuple, statistics);
+ assertThat(statistics.estimatedValueSize(), is(3));
+ BigDecimal compactedValue = (BigDecimal) statistics.value("KEY");
+ assertNotNull(compactedValue);
+ assertThat(sizeInBytes(compactedValue), is(3));
+
+ BinaryTupleBuilder builder = new BinaryTupleBuilder(1,
statistics.estimatedValueSize());
+ builder.appendDecimal(compactedValue, columnScale);
+ assertThat(builder.build().capacity(),
is(statistics.estimatedValueSize() + BinaryTupleCommon.HEADER_SIZE + 1));
+ }
+}