This is an automated email from the ASF dual-hosted git repository.
cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new dca31d466c9 minor adjustments for performance (#16714)
dca31d466c9 is described below
commit dca31d466c90f9bf2faa1fa0a0f6e7aab62c8fb5
Author: Clint Wylie <[email protected]>
AuthorDate: Thu Jul 11 16:57:15 2024 -0700
minor adjustments for performance (#16714)
changes:
* switch to stop using some string.format
* switch some streams to classic loops
---
.../groupby/epinephelinae/GroupByQueryEngine.java | 35 +++++------
.../epinephelinae/vector/VectorGroupByEngine.java | 67 ++++++++++++----------
.../query/timeseries/TimeseriesQueryEngine.java | 3 +-
.../org/apache/druid/segment/VirtualColumns.java | 7 ++-
.../nested/CompressedNestedDataComplexColumn.java | 13 ++---
.../nested/NestedCommonFormatColumnSerializer.java | 2 +-
.../nested/NestedDataColumnSerializerV4.java | 10 +---
7 files changed, 73 insertions(+), 64 deletions(-)
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
index 085e6022aab..35f09c5446d 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
@@ -211,24 +211,25 @@ public class GroupByQueryEngine
final List<DimensionSpec> dimensions
)
{
- return dimensions
- .stream()
- .allMatch(
- dimension -> {
- if (dimension.mustDecorate()) {
- // DimensionSpecs that decorate may turn singly-valued columns
into multi-valued selectors.
- // To be safe, we must return false here.
- return false;
- }
+ for (DimensionSpec dimension : dimensions) {
+ if (dimension.mustDecorate()) {
+ // DimensionSpecs that decorate may turn singly-valued columns into
multi-valued selectors.
+ // To be safe, we must return false here.
+ return false;
+ }
+
+ // if dimension spec type is array, skip it since we can handle array or
multi-valued
+ if (dimension.getOutputType().isArray()) {
+ continue;
+ }
- // Now check column capabilities, which must be present and
explicitly not multi-valued and not arrays
- final ColumnCapabilities columnCapabilities =
inspector.getColumnCapabilities(dimension.getDimension());
- return dimension.getOutputType().isArray()
- || (columnCapabilities != null
- && columnCapabilities.hasMultipleValues().isFalse()
- && !columnCapabilities.isArray()
- );
- });
+ // Now check column capabilities, which must be present and explicitly
not multi-valued and not arrays
+ final ColumnCapabilities capabilities =
inspector.getColumnCapabilities(dimension.getDimension());
+ if (capabilities == null ||
capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) {
+ return false;
+ }
+ }
+ return true;
}
private abstract static class GroupByEngineIterator<KeyType> implements
Iterator<ResultRow>, Closeable
diff --git
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
index 93633644a2e..75f4539e8c3 100644
---
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
+++
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import org.apache.druid.query.DruidProcessingConfig;
import org.apache.druid.query.aggregation.AggregatorAdapters;
+import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.Filter;
@@ -204,9 +205,7 @@ public class VectorGroupByEngine
return adapter.canVectorize(filter, query.getVirtualColumns(), false)
&& canVectorizeDimensions(inspector, query.getDimensions())
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(),
adapter)
- && query.getAggregatorSpecs()
- .stream()
- .allMatch(aggregatorFactory ->
aggregatorFactory.canVectorize(inspector));
+ && canVectorizeAggregators(inspector, query.getAggregatorSpecs());
}
private static boolean canVectorizeDimensions(
@@ -214,35 +213,45 @@ public class VectorGroupByEngine
final List<DimensionSpec> dimensions
)
{
- return dimensions
- .stream()
- .allMatch(
- dimension -> {
- if (!dimension.canVectorize()) {
- return false;
- }
+ for (DimensionSpec dimension : dimensions) {
+ if (!dimension.canVectorize()) {
+ return false;
+ }
- if (dimension.mustDecorate()) {
- // group by on multi value dimensions are not currently
supported
- // DimensionSpecs that decorate may turn singly-valued columns
into multi-valued selectors.
- // To be safe, we must return false here.
- return false;
- }
+ if (dimension.mustDecorate()) {
+ // group by on multi value dimensions are not currently supported
+ // DimensionSpecs that decorate may turn singly-valued columns into
multi-valued selectors.
+ // To be safe, we must return false here.
+ return false;
+ }
- if (!dimension.getOutputType().isPrimitive()) {
- // group by on arrays and complex types is not currently
supported in the vector processing engine
- return false;
- }
+ if (!dimension.getOutputType().isPrimitive()) {
+ // group by on arrays and complex types is not currently supported in
the vector processing engine
+ return false;
+ }
- // Now check column capabilities.
- final ColumnCapabilities columnCapabilities =
inspector.getColumnCapabilities(dimension.getDimension());
- // null here currently means the column does not exist, nil
columns can be vectorized
- if (columnCapabilities == null) {
- return true;
- }
- // must be single valued
- return columnCapabilities.hasMultipleValues().isFalse();
- });
+ // Now check column capabilities.
+ final ColumnCapabilities columnCapabilities =
inspector.getColumnCapabilities(dimension.getDimension());
+ if (columnCapabilities != null &&
columnCapabilities.hasMultipleValues().isMaybeTrue()) {
+ // null here currently means the column does not exist, nil columns
can be vectorized
+ // multi-value columns implicit unnest is not currently supported in
the vector processing engine
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public static boolean canVectorizeAggregators(
+ final ColumnInspector inspector,
+ final List<AggregatorFactory> aggregatorFactories
+ )
+ {
+ for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
+ if (!aggregatorFactory.canVectorize(inspector)) {
+ return false;
+ }
+ }
+ return true;
}
@VisibleForTesting
diff --git
a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
index c5e83b84e87..d8369c8c6da 100644
---
a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
+++
b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
@@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorAdapters;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine;
import org.apache.druid.query.vector.VectorCursorGranularizer;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.SegmentMissingException;
@@ -103,7 +104,7 @@ public class TimeseriesQueryEngine
final boolean doVectorize = query.context().getVectorize().shouldVectorize(
adapter.canVectorize(filter, query.getVirtualColumns(), descending)
&& VirtualColumns.shouldVectorize(query, query.getVirtualColumns(),
adapter)
- && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory ->
aggregatorFactory.canVectorize(inspector))
+ && VectorGroupByEngine.canVectorizeAggregators(inspector,
query.getAggregatorSpecs())
);
final Sequence<Result<TimeseriesResultValue>> result;
diff --git
a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
index 79354a4fa2a..a32a85d16c7 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
@@ -266,7 +266,12 @@ public class VirtualColumns implements Cacheable
public boolean canVectorize(ColumnInspector columnInspector)
{
final ColumnInspector inspector = wrapInspector(columnInspector);
- return virtualColumns.stream().allMatch(virtualColumn ->
virtualColumn.canVectorize(inspector));
+ for (VirtualColumn virtualColumn : virtualColumns) {
+ if (!virtualColumn.canVectorize(inspector)) {
+ return false;
+ }
+ }
+ return true;
}
/**
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
index d3869bd9ef5..7faf837db1c 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
@@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE;
@@ -915,13 +916,11 @@ public abstract class
CompressedNestedDataComplexColumn<TStringDictionary extend
);
// we should check this someday soon, but for now just read it to push
the buffer position ahead
int flags = dataBuffer.getInt();
- Preconditions.checkState(
- flags == DictionaryEncodedColumnPartSerde.NO_FLAGS,
- StringUtils.format(
- "Unrecognized bits set in space reserved for future flags for
field column [%s]",
- field
- )
- );
+ if (flags != DictionaryEncodedColumnPartSerde.NO_FLAGS) {
+ throw DruidException.defensive(
+ "Unrecognized bits set in space reserved for future flags for
field column [%s]", field
+ );
+ }
final Supplier<FixedIndexed<Integer>> localDictionarySupplier =
FixedIndexed.read(
dataBuffer,
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
index 59c7da7fd21..68e1da96756 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
@@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer
implements GenericColum
*/
public static String getInternalFileName(String fileNameBase, String field)
{
- return StringUtils.format("%s.%s", fileNameBase, field);
+ return fileNameBase + "." + field;
}
}
diff --git
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
index efa31a13a3f..3d39b31bbbc 100644
---
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
+++
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
@@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.RE;
-import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
@@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements
GenericColumnSerializer<Str
doubleDictionaryWriter.open();
rawWriter = new CompressedVariableSizedBlobColumnSerializer(
- getInternalFileName(name, RAW_FILE_NAME),
+ NestedCommonFormatColumnSerializer.getInternalFileName(name,
RAW_FILE_NAME),
segmentWriteOutMedium,
indexSpec.getJsonCompression() != null ?
indexSpec.getJsonCompression() : CompressionStrategy.LZ4
);
@@ -390,14 +389,9 @@ public class NestedDataColumnSerializerV4 implements
GenericColumnSerializer<Str
private void writeInternal(FileSmoosher smoosher, Serializer serializer,
String fileName) throws IOException
{
- final String internalName = getInternalFileName(name, fileName);
+ final String internalName =
NestedCommonFormatColumnSerializer.getInternalFileName(name, fileName);
try (SmooshedWriter smooshChannel =
smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) {
serializer.writeTo(smooshChannel, smoosher);
}
}
-
- public static String getInternalFileName(String fileNameBase, String field)
- {
- return StringUtils.format("%s.%s", fileNameBase, field);
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]