This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new d359fb3d689 Cache value selectors in RowBasedColumnSelectorFactory.
(#15615)
d359fb3d689 is described below
commit d359fb3d689a9a32cfe6c3f21995d7885190ebbd
Author: Gian Merlino <[email protected]>
AuthorDate: Mon Jan 15 18:03:27 2024 -0800
Cache value selectors in RowBasedColumnSelectorFactory. (#15615)
* Cache value selectors in RowBasedColumnSelectorFactory.
There was already caching for dimension selectors. This patch adds caching
for value (object and number) selectors. It's helpful when the same field is
read multiple times during processing of a single row (for example, by being
an input to both MIN and MAX aggregations).
* Fix typing.
* Fix logic.
---
.../apache/druid/common/config/NullHandling.java | 5 +-
.../java/org/apache/druid/data/input/Rows.java | 74 +++++++++++++++++---
.../segment/RowBasedColumnSelectorFactory.java | 79 +++++++++++++++-------
3 files changed, 121 insertions(+), 37 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java
b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java
index fede12e68d5..7b4722052d4 100644
--- a/processing/src/main/java/org/apache/druid/common/config/NullHandling.java
+++ b/processing/src/main/java/org/apache/druid/common/config/NullHandling.java
@@ -203,10 +203,11 @@ public class NullHandling
* May be null or non-null based on the current SQL-compatible null handling
mode.
*/
@Nullable
- @SuppressWarnings("unchecked")
public static Object defaultValueForType(ValueType type)
{
- if (type == ValueType.FLOAT) {
+ if (sqlCompatible()) {
+ return null;
+ } else if (type == ValueType.FLOAT) {
return defaultFloatValue();
} else if (type == ValueType.DOUBLE) {
return defaultDoubleValue();
diff --git a/processing/src/main/java/org/apache/druid/data/input/Rows.java
b/processing/src/main/java/org/apache/druid/data/input/Rows.java
index add6781b81b..d2a7ded7612 100644
--- a/processing/src/main/java/org/apache/druid/data/input/Rows.java
+++ b/processing/src/main/java/org/apache/druid/data/input/Rows.java
@@ -23,8 +23,10 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.primitives.Longs;
import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.parsers.ParseException;
+import org.apache.druid.segment.column.ValueType;
import javax.annotation.Nullable;
import java.util.Arrays;
@@ -90,6 +92,7 @@ public final class Rows
*
* @param name field name of the object being converted (may
be used for exception messages)
* @param inputValue the actual object being converted
+ * @param outputType expected return type, or null if it should be
automatically detected
* @param throwParseExceptions whether this method should throw a {@link
ParseException} or use a default/null value
* when {@param inputValue} is not numeric
*
@@ -98,14 +101,19 @@ public final class Rows
* @throws ParseException if the input cannot be converted to a number and
{@code throwParseExceptions} is true
*/
@Nullable
- public static <T extends Number> Number objectToNumber(
+ public static Number objectToNumber(
final String name,
final Object inputValue,
+ @Nullable final ValueType outputType,
final boolean throwParseExceptions
)
{
+ if (outputType != null && !outputType.isNumeric()) {
+ throw new IAE("Output type[%s] must be numeric", outputType);
+ }
+
if (inputValue == null) {
- return NullHandling.defaultLongValue();
+ return (Number) NullHandling.defaultValueForType(outputType != null ?
outputType : ValueType.LONG);
} else if (inputValue instanceof Number) {
return (Number) inputValue;
} else if (inputValue instanceof String) {
@@ -113,30 +121,74 @@ public final class Rows
String metricValueString = StringUtils.removeChar(((String)
inputValue).trim(), ',');
// Longs.tryParse() doesn't support leading '+', so we need to trim it
ourselves
metricValueString = trimLeadingPlusOfLongString(metricValueString);
- Long v = Longs.tryParse(metricValueString);
- // Do NOT use ternary operator here, because it makes Java to convert
Long to Double
- if (v != null) {
- return v;
+
+ Number v = null;
+
+ // Try parsing as Long first, since it's significantly faster than
Double parsing, and also there are various
+ // integer numbers that can be represented as Long but cannot be
represented as Double.
+ if (outputType == null || outputType == ValueType.LONG) {
+ v = Longs.tryParse(metricValueString);
+ }
+
+ if (v == null && outputType != ValueType.LONG) {
+ v = Double.valueOf(metricValueString);
+ }
+
+ if (v == null) {
+ if (throwParseExceptions) {
+ throw new ParseException(
+ String.valueOf(inputValue),
+ "Unable to parse value[%s] for field[%s] as type[%s]",
+ inputValue.getClass(),
+ name,
+ outputType
+ );
+ } else {
+ return (Number) NullHandling.defaultValueForType(outputType);
+ }
} else {
- return Double.valueOf(metricValueString);
+ return outputType == ValueType.FLOAT ? Float.valueOf(v.floatValue())
: v;
}
}
catch (Exception e) {
if (throwParseExceptions) {
- throw new ParseException(String.valueOf(inputValue), e, "Unable to
parse value[%s] for field[%s]", inputValue, name);
+ throw new ParseException(
+ String.valueOf(inputValue),
+ e,
+ "Unable to parse value[%s] for field[%s]",
+ inputValue,
+ name
+ );
} else {
- return NullHandling.defaultLongValue();
+ return (Number) NullHandling.defaultValueForType(outputType != null
? outputType : ValueType.LONG);
}
}
} else {
if (throwParseExceptions) {
- throw new ParseException(String.valueOf(inputValue), "Unknown type[%s]
for field[%s]", inputValue.getClass(), name);
+ throw new ParseException(
+ String.valueOf(inputValue),
+ "Unknown type[%s] for field[%s]",
+ inputValue.getClass(),
+ name
+ );
} else {
- return NullHandling.defaultLongValue();
+ return (Number) NullHandling.defaultValueForType(outputType != null ?
outputType : ValueType.LONG);
}
}
}
+ /**
+ * Shorthand for {@link #objectToNumber(String, Object, ValueType, boolean)}
with null expectedType.
+ */
+ public static Number objectToNumber(
+ final String name,
+ final Object inputValue,
+ final boolean throwParseExceptions
+ )
+ {
+ return objectToNumber(name, inputValue, null, throwParseExceptions);
+ }
+
private static String trimLeadingPlusOfLongString(String metricValueString)
{
if (metricValueString.length() > 1 && metricValueString.charAt(0) == '+') {
diff --git
a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
index 0c120ca8f29..17e6e5daa7d 100644
---
a/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
+++
b/processing/src/main/java/org/apache/druid/segment/RowBasedColumnSelectorFactory.java
@@ -32,6 +32,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
+import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.RangeIndexedInts;
import org.apache.druid.segment.nested.StructuredData;
@@ -443,44 +444,56 @@ public class RowBasedColumnSelectorFactory<T> implements
ColumnSelectorFactory
return new TimeLongColumnSelector();
} else {
final Function<T, Object> columnFunction =
adapter.columnFunction(columnName);
+ final ColumnCapabilities capabilities =
columnInspector.getColumnCapabilities(columnName);
+ final ValueType numberType =
+ capabilities != null && capabilities.getType().isNumeric() ?
capabilities.getType() : null;
return new ColumnValueSelector<Object>()
{
+ private long currentValueId = RowIdSupplier.INIT;
+ private long currentValueAsNumberId = RowIdSupplier.INIT;
+ @Nullable
+ private Object currentValue;
+ @Nullable
+ private Number currentValueAsNumber;
+
@Override
public boolean isNull()
{
- return !NullHandling.replaceWithDefault() &&
getCurrentValueAsNumber() == null;
+ updateCurrentValueAsNumber();
+ return !NullHandling.replaceWithDefault() && currentValueAsNumber ==
null;
}
@Override
public double getDouble()
{
- final Number n = getCurrentValueAsNumber();
- assert NullHandling.replaceWithDefault() || n != null;
- return n != null ? n.doubleValue() : 0d;
+ updateCurrentValueAsNumber();
+ assert NullHandling.replaceWithDefault() || currentValueAsNumber !=
null;
+ return currentValueAsNumber != null ?
currentValueAsNumber.doubleValue() : 0d;
}
@Override
public float getFloat()
{
- final Number n = getCurrentValueAsNumber();
- assert NullHandling.replaceWithDefault() || n != null;
- return n != null ? n.floatValue() : 0f;
+ updateCurrentValueAsNumber();
+ assert NullHandling.replaceWithDefault() || currentValueAsNumber !=
null;
+ return currentValueAsNumber != null ?
currentValueAsNumber.floatValue() : 0f;
}
@Override
public long getLong()
{
- final Number n = getCurrentValueAsNumber();
- assert NullHandling.replaceWithDefault() || n != null;
- return n != null ? n.longValue() : 0L;
+ updateCurrentValueAsNumber();
+ assert NullHandling.replaceWithDefault() || currentValueAsNumber !=
null;
+ return currentValueAsNumber != null ?
currentValueAsNumber.longValue() : 0L;
}
@Nullable
@Override
public Object getObject()
{
- return getCurrentValue();
+ updateCurrentValue();
+ return currentValue;
}
@Override
@@ -495,24 +508,42 @@ public class RowBasedColumnSelectorFactory<T> implements
ColumnSelectorFactory
inspector.visit("row", rowSupplier);
}
- @Nullable
- private Object getCurrentValue()
+ private void updateCurrentValue()
{
- return columnFunction.apply(rowSupplier.get());
+ if (rowIdSupplier == null || rowIdSupplier.getRowId() !=
currentValueId) {
+ try {
+ currentValue = columnFunction.apply(rowSupplier.get());
+ }
+ catch (Throwable e) {
+ currentValueId = RowIdSupplier.INIT;
+ throw e;
+ }
+
+ if (rowIdSupplier != null) {
+ currentValueId = rowIdSupplier.getRowId();
+ }
+ }
}
- @Nullable
- private Number getCurrentValueAsNumber()
+ private void updateCurrentValueAsNumber()
{
- final Object currentValue = getCurrentValue();
- if (currentValue instanceof StructuredData) {
- return Rows.objectToNumber(columnName, ((StructuredData)
currentValue).getValue(), throwParseExceptions);
+ updateCurrentValue();
+
+ if (rowIdSupplier == null || rowIdSupplier.getRowId() !=
currentValueAsNumberId) {
+ try {
+ final Object valueToUse =
+ currentValue instanceof StructuredData ? ((StructuredData)
currentValue).getValue() : currentValue;
+ currentValueAsNumber = Rows.objectToNumber(columnName,
valueToUse, numberType, throwParseExceptions);
+ }
+ catch (Throwable e) {
+ currentValueAsNumberId = RowIdSupplier.INIT;
+ throw e;
+ }
+
+ if (rowIdSupplier != null) {
+ currentValueAsNumberId = rowIdSupplier.getRowId();
+ }
}
- return Rows.objectToNumber(
- columnName,
- currentValue,
- throwParseExceptions
- );
}
};
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]