github-advanced-security[bot] commented on code in PR #19460: URL: https://github.com/apache/druid/pull/19460#discussion_r3236888356
########## processing/src/main/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactory.java: ########## @@ -0,0 +1,566 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import org.apache.druid.error.DruidException; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.ConstantExprEvalSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; + +import javax.annotation.Nullable; +import java.util.function.Supplier; + +/** + * {@link ColumnSelectorFactory} wrapper that intercepts requests for clustering columns and returns selectors + * carrying the group's constant value, while delegating all other column lookups to a wrapped factory. This is the + * mechanism by which a cluster group's clustering columns, which are NOT stored in the per-group column data since + * they're constant across the group, are made visible to query engines as if they were ordinary columns. + */ +public class ClusteringColumnSelectorFactory implements ColumnSelectorFactory +{ + private final RowSignature clusteringColumns; + private ColumnSelectorFactory delegate; + private Object[] clusteringValues; + // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their + // cached inner state + private long generation; + + public ClusteringColumnSelectorFactory( + ColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues + ) + { + this.clusteringColumns = clusteringColumns; + setDelegate(delegate, clusteringValues); + } + + /** + * Update the underlying factory and the constant values for the current cluster group. Called by a multi-group + * concatenating cursor on each group transition. Selectors previously returned by this factory will, on their next + * invocation, observe the updated state; see the per-call indirection in the inner selector classes. + */ + public void setDelegate(ColumnSelectorFactory delegate, Object[] clusteringValues) + { + if (clusteringValues == null || clusteringValues.length != clusteringColumns.size()) { + throw DruidException.defensive( + "clusteringValues length [%s] must match clusteringColumns size [%s]", + clusteringValues == null ? "null" : clusteringValues.length, + clusteringColumns.size() + ); + } + this.delegate = delegate; + this.clusteringValues = clusteringValues; + this.generation++; + } + + ColumnSelectorFactory getDelegate() + { + return delegate; + } + + long getGeneration() + { + return generation; + } + + @Override + public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) + { + final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension()); + if (idx < 0) { + return new DelegatingDimensionSelector(this, dimensionSpec); + } + return new ClusteringDimensionSelector(this, idx, dimensionSpec); + } + + @Override + public ColumnValueSelector makeColumnValueSelector(String columnName) + { + final int idx = clusteringColumns.indexOf(columnName); + if (idx < 0) { + return new DelegatingColumnValueSelector(this, columnName); + } + return new ClusteringColumnValueSelector(this, idx, clusteringColumns.getColumnType(idx).orElseThrow()); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return delegate.getColumnCapabilities(column); + } + final ColumnType type = clusteringColumns.getColumnType(idx).orElseThrow(); + if (type.is(ValueType.STRING)) { + return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(); + } + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); + } + + @Nullable + @Override + public RowIdSupplier getRowIdSupplier() + { + return delegate.getRowIdSupplier(); + } + + Object currentValue(int idx) + { + return clusteringValues[idx]; + } + + /** + * Dimension selector for a clustering column. Delegates the value lookup back to the parent factory each call so + * that group transitions (which mutate the parent's clustering values) are observed immediately. Internally + * decorates a {@link DimensionSelector#constant(String)} re-built when the underlying value changes. + */ + private static final class ClusteringDimensionSelector implements DimensionSelector + { + private final ClusteringColumnSelectorFactory parent; + private final int idx; + private final DimensionSpec spec; + private DimensionSelector cachedSelector; + private long cachedGeneration = -1; + + private ClusteringDimensionSelector(ClusteringColumnSelectorFactory parent, int idx, DimensionSpec spec) + { + this.parent = parent; + this.idx = idx; + this.spec = spec; + } + + private DimensionSelector currentSelector() + { + final long currentGeneration = parent.getGeneration(); + if (cachedGeneration == currentGeneration) { + return cachedSelector; + } + final Object raw = parent.currentValue(idx); + final String stringValue = raw == null ? null : String.valueOf(raw); + cachedSelector = DimensionSelector.constant(stringValue, spec.getExtractionFn()); Review Comment: ## CodeQL / Deprecated method or constructor invocation Invoking [DimensionSpec.getExtractionFn](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/11217) ########## processing/src/main/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchema.java: ########## @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.query.OrderBy; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.AggregateProjectionMetadata; +import org.apache.druid.segment.Metadata; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.utils.CollectionUtils; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Top-level summary for a clustered base table whose groups are identified by discrete clustering-value tuples. Each + * tuple group is internally stored as a separate table without storing the cluster columns, which are pulled into this + * metadata. This is optimizing for use cases which typically only need to read from a single group via filters present + * on a query. Cluster groups nest inside as {@link #getClusterGroups()}; their column data live in the V10 segment + * file under dictionary-id-tuple prefixes ({@code __base$<id0>_<id1>...<idK>/<col>}), where the ids index into + * {@link #getClusteringDictionaries()}. + */ +public class ClusteredValueGroupsBaseTableSchema implements BaseTableProjectionSchema +{ + public static final String TYPE_NAME = "clustered-value-groups-base-table"; + + private final VirtualColumns virtualColumns; + private final List<String> columnNames; + private final AggregatorFactory[] aggregators; + private final List<OrderBy> ordering; + private final RowSignature clusteringColumns; + private final List<String> sharedColumns; + private final ClusteringDictionaries clusteringDictionaries; + private final List<TableClusterGroupSpec> clusterGroups; + + // computed + private final int timeColumnPosition; + private final Granularity effectiveGranularity; + + @JsonCreator + public ClusteredValueGroupsBaseTableSchema( + @JsonProperty("virtualColumns") VirtualColumns virtualColumns, + @JsonProperty("columns") List<String> columns, + @JsonProperty("aggregators") @Nullable AggregatorFactory[] aggregators, + @JsonProperty("ordering") List<OrderBy> ordering, + @JsonProperty("clusteringColumns") RowSignature clusteringColumns, + @JsonProperty("sharedColumns") @Nullable List<String> sharedColumns, + @JsonProperty("clusteringDictionaries") @Nullable ClusteringDictionaries clusteringDictionaries, + @JsonProperty("clusterGroups") @Nullable List<TableClusterGroupSpec> clusterGroups + ) + { + if (CollectionUtils.isNullOrEmpty(columns)) { + throw DruidException.defensive("clustered base table schema columns must not be null or empty"); + } + if (ordering == null) { + throw DruidException.defensive("clustered base table schema ordering must not be null"); + } + if (clusteringColumns == null || clusteringColumns.size() == 0) { + throw DruidException.defensive( + "clustered base table schema clusteringColumns must not be null or empty" + ); + } + if (ordering.size() < clusteringColumns.size()) { + throw DruidException.defensive( + "ordering size [%s] must be at least clusteringColumns size [%s] (clustering columns must form a prefix" + + " of the segment ordering)", + ordering.size(), + clusteringColumns.size() + ); + } + for (int i = 0; i < clusteringColumns.size(); i++) { + final String clusteringColumn = clusteringColumns.getColumnName(i); + if (!columns.contains(clusteringColumn)) { + throw DruidException.defensive( + "clusteringColumn [%s] must appear in columns of the clustered base table summary", + clusteringColumn + ); + } + final ColumnType type = clusteringColumns.getColumnType(i).orElse(null); + if (!Projections.isAllowedClusteringType(type)) { + throw DruidException.defensive( + "clustering column [%s] has unsupported type [%s]; allowed types are STRING, LONG, DOUBLE, FLOAT", + clusteringColumn, + type + ); + } + // Per-group ordering is derived by dropping this prefix; pruning + cursor concatenation rely on it. + final String orderingColumn = ordering.get(i).getColumnName(); + if (!clusteringColumn.equals(orderingColumn)) { + throw DruidException.defensive( + "clustering column at position [%s] is [%s] but the segment ordering at the same position is [%s];" + + " clustering columns must form a prefix of the segment ordering", + i, + clusteringColumn, + orderingColumn + ); + } + } + final List<String> resolvedSharedColumns = sharedColumns == null ? List.of() : sharedColumns; + for (String shared : resolvedSharedColumns) { + if (!columns.contains(shared)) { + throw DruidException.defensive( + "sharedColumn [%s] must appear in columns of the clustered base table summary", + shared + ); + } + } + this.virtualColumns = virtualColumns == null ? VirtualColumns.EMPTY : virtualColumns; + this.columnNames = columns; + this.aggregators = aggregators == null ? new AggregatorFactory[0] : aggregators; + this.ordering = ordering; + this.clusteringColumns = clusteringColumns; + this.sharedColumns = resolvedSharedColumns; + this.clusterGroups = clusterGroups == null ? List.of() : List.copyOf(clusterGroups); + this.clusteringDictionaries = clusteringDictionaries == null + ? ClusteringDictionaries.EMPTY + : clusteringDictionaries; + + int foundTimePosition = -1; + Granularity granularity = null; + for (int i = 0; i < ordering.size(); i++) { + OrderBy orderBy = ordering.get(i); + if (orderBy.getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME)) { + foundTimePosition = i; + final VirtualColumn vc = this.virtualColumns.getVirtualColumn(Granularities.GRANULARITY_VIRTUAL_COLUMN_NAME); + if (vc != null) { + granularity = Granularities.fromVirtualColumn(vc); + } else { + granularity = Granularities.NONE; + } + } + } + if (granularity == null) { + throw DruidException.defensive( + "clustered base table doesn't have a [%s] column?", + ColumnHolder.TIME_COLUMN_NAME + ); + } + this.timeColumnPosition = foundTimePosition; + this.effectiveGranularity = granularity; + + // Specs always start unwired: there's a chicken-and-egg between the summary and its specs, resolved by + // deferring all summary-dependent state on the spec to setSummary, which we invoke here once the summary's + // own state is populated. + for (TableClusterGroupSpec spec : this.clusterGroups) { + spec.setSummary(this); + } + } + + @JsonIgnore + @Override + public List<String> getColumnNames() + { + List<String> columns = new ArrayList<>(columnNames.size() + aggregators.length); Review Comment: ## CodeQL / User-controlled data in arithmetic expression This arithmetic expression depends on a [user-provided value](1), potentially causing an overflow. This arithmetic expression depends on a [user-provided value](2), potentially causing an overflow. This arithmetic expression depends on a [user-provided value](3), potentially causing an overflow. [Show more details](https://github.com/apache/druid/security/code-scanning/11216) ########## processing/src/main/java/org/apache/druid/segment/QueryableIndexCursorFactory.java: ########## @@ -110,9 +143,328 @@ }; } + private CursorHolder makeClusteredCursorHolder(CursorBuildSpec spec, ClusteredValueGroupsBaseTableSchema clusterSummary) Review Comment: ## CodeQL / Useless parameter The parameter 'clusterSummary' is never used. [Show more details](https://github.com/apache/druid/security/code-scanning/11220) ########## processing/src/main/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactory.java: ########## @@ -0,0 +1,631 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import org.apache.druid.error.DruidException; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.ConstantVectorSelectors; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +/** + * Vectorized counterpart of {@link ClusteringColumnSelectorFactory}. Wraps a delegate + * {@link VectorColumnSelectorFactory} and intercepts requests for clustering columns, returning constant-typed + * vector selectors (via {@link ConstantVectorSelectors}). Other column requests pass through to delegating wrappers. + * + * The factory is mutable via {@link #setDelegate(VectorColumnSelectorFactory, Object[])}: a multi-group + * {@code ConcatenatingVectorCursor} swaps the underlying delegate + clustering values on each group transition. + * Selectors previously returned by this factory observe the new state on their next call thanks to a generation + * counter cache invalidation. + * + * For single-group dispatch, the factory is constructed once and {@code setDelegate} is never called; selectors' + * caches fill on first access and never invalidate. + */ +public class ClusteringVectorColumnSelectorFactory implements VectorColumnSelectorFactory +{ + private final RowSignature clusteringColumns; + private final int maxVectorSize; + private VectorColumnSelectorFactory delegate; + private Object[] clusteringValues; + // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their + // cached inner state. + private long generation; + + /** + * Convenience overload that derives {@code maxVectorSize} from the supplied delegate. Used by single-group + * dispatch where the delegate is the per-group {@link VectorColumnSelectorFactory} from the cursor itself. + */ + public ClusteringVectorColumnSelectorFactory( + VectorColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues + ) + { + this(delegate, clusteringColumns, clusteringValues, delegate.getMaxVectorSize()); + } + + public ClusteringVectorColumnSelectorFactory( + VectorColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues, + int maxVectorSize + ) + { + this.clusteringColumns = clusteringColumns; + this.maxVectorSize = maxVectorSize; + setDelegate(delegate, clusteringValues); + } + + /** + * Update the underlying delegate and the constant clustering values for the current cluster group. Called by a + * multi-group {@code ConcatenatingVectorCursor} on each group transition. + */ + public void setDelegate(VectorColumnSelectorFactory delegate, Object[] clusteringValues) + { + if (clusteringValues == null || clusteringValues.length != clusteringColumns.size()) { + throw DruidException.defensive( + "clusteringValues length [%s] must match clusteringColumns size [%s]", + clusteringValues == null ? "null" : clusteringValues.length, + clusteringColumns.size() + ); + } + this.delegate = delegate; + this.clusteringValues = clusteringValues; + this.generation++; + } + + VectorColumnSelectorFactory getDelegate() + { + return delegate; + } + + long getGeneration() + { + return generation; + } + + Object currentValue(int idx) + { + return clusteringValues[idx]; + } + + @Override + public ReadableVectorInspector getReadableVectorInspector() + { + return delegate.getReadableVectorInspector(); + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec) + { + final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension()); + if (idx < 0) { + return new DelegatingSingleValueDimensionVectorSelector(this, dimensionSpec); + } + return new ClusteringSingleValueDimensionVectorSelector(this, idx, dimensionSpec); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec) + { + final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension()); + if (idx < 0) { + return new DelegatingMultiValueDimensionVectorSelector(this, dimensionSpec); + } + // Clustering values are single-typed primitives. Multi-value requests on a clustering column shouldn't happen + // in practice; throw to surface caller bugs rather than silently misbehave. + throw DruidException.defensive( + "multi-value vector selector not supported for clustering column [" + dimensionSpec.getDimension() + "]" + ); + } + + @Override + public VectorValueSelector makeValueSelector(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return new DelegatingVectorValueSelector(this, column); + } + return new ClusteringVectorValueSelector(this, idx); + } + + @Override + public VectorObjectSelector makeObjectSelector(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return new DelegatingVectorObjectSelector(this, column); + } + return new ClusteringVectorObjectSelector(this, idx); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return delegate.getColumnCapabilities(column); + } + final ColumnType type = clusteringColumns.getColumnType(idx).orElseThrow(); + if (type.is(ValueType.STRING)) { + return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(); + } + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); + } + + private static final class ClusteringSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector + { + private final ClusteringVectorColumnSelectorFactory parent; + private final int idx; + private final DimensionSpec spec; + private long cachedGeneration = -1; + private SingleValueDimensionVectorSelector cachedInner; + + private ClusteringSingleValueDimensionVectorSelector( + ClusteringVectorColumnSelectorFactory parent, + int idx, + DimensionSpec spec + ) + { + this.parent = parent; + this.idx = idx; + this.spec = spec; + } + + private SingleValueDimensionVectorSelector currentInner() + { + final long currentGeneration = parent.getGeneration(); + if (cachedGeneration == currentGeneration) { + return cachedInner; + } + final Object raw = parent.currentValue(idx); + final String stringValue = raw == null ? null : String.valueOf(raw); + final String afterExtraction = + spec.getExtractionFn() == null ? stringValue : spec.getExtractionFn().apply(stringValue); Review Comment: ## CodeQL / Deprecated method or constructor invocation Invoking [DimensionSpec.getExtractionFn](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/11218) ########## processing/src/main/java/org/apache/druid/segment/projections/ClusteringVectorColumnSelectorFactory.java: ########## @@ -0,0 +1,631 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import org.apache.druid.error.DruidException; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; +import org.apache.druid.segment.vector.ConstantVectorSelectors; +import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; +import org.apache.druid.segment.vector.ReadableVectorInspector; +import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.segment.vector.VectorObjectSelector; +import org.apache.druid.segment.vector.VectorValueSelector; + +import javax.annotation.Nullable; + +/** + * Vectorized counterpart of {@link ClusteringColumnSelectorFactory}. Wraps a delegate + * {@link VectorColumnSelectorFactory} and intercepts requests for clustering columns, returning constant-typed + * vector selectors (via {@link ConstantVectorSelectors}). Other column requests pass through to delegating wrappers. + * + * The factory is mutable via {@link #setDelegate(VectorColumnSelectorFactory, Object[])}: a multi-group + * {@code ConcatenatingVectorCursor} swaps the underlying delegate + clustering values on each group transition. + * Selectors previously returned by this factory observe the new state on their next call thanks to a generation + * counter cache invalidation. + * + * For single-group dispatch, the factory is constructed once and {@code setDelegate} is never called; selectors' + * caches fill on first access and never invalidate. + */ +public class ClusteringVectorColumnSelectorFactory implements VectorColumnSelectorFactory +{ + private final RowSignature clusteringColumns; + private final int maxVectorSize; + private VectorColumnSelectorFactory delegate; + private Object[] clusteringValues; + // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their + // cached inner state. + private long generation; + + /** + * Convenience overload that derives {@code maxVectorSize} from the supplied delegate. Used by single-group + * dispatch where the delegate is the per-group {@link VectorColumnSelectorFactory} from the cursor itself. + */ + public ClusteringVectorColumnSelectorFactory( + VectorColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues + ) + { + this(delegate, clusteringColumns, clusteringValues, delegate.getMaxVectorSize()); + } + + public ClusteringVectorColumnSelectorFactory( + VectorColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues, + int maxVectorSize + ) + { + this.clusteringColumns = clusteringColumns; + this.maxVectorSize = maxVectorSize; + setDelegate(delegate, clusteringValues); + } + + /** + * Update the underlying delegate and the constant clustering values for the current cluster group. Called by a + * multi-group {@code ConcatenatingVectorCursor} on each group transition. + */ + public void setDelegate(VectorColumnSelectorFactory delegate, Object[] clusteringValues) + { + if (clusteringValues == null || clusteringValues.length != clusteringColumns.size()) { + throw DruidException.defensive( + "clusteringValues length [%s] must match clusteringColumns size [%s]", + clusteringValues == null ? "null" : clusteringValues.length, + clusteringColumns.size() + ); + } + this.delegate = delegate; + this.clusteringValues = clusteringValues; + this.generation++; + } + + VectorColumnSelectorFactory getDelegate() + { + return delegate; + } + + long getGeneration() + { + return generation; + } + + Object currentValue(int idx) + { + return clusteringValues[idx]; + } + + @Override + public ReadableVectorInspector getReadableVectorInspector() + { + return delegate.getReadableVectorInspector(); + } + + @Override + public int getMaxVectorSize() + { + return maxVectorSize; + } + + @Override + public SingleValueDimensionVectorSelector makeSingleValueDimensionSelector(DimensionSpec dimensionSpec) + { + final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension()); + if (idx < 0) { + return new DelegatingSingleValueDimensionVectorSelector(this, dimensionSpec); + } + return new ClusteringSingleValueDimensionVectorSelector(this, idx, dimensionSpec); + } + + @Override + public MultiValueDimensionVectorSelector makeMultiValueDimensionSelector(DimensionSpec dimensionSpec) + { + final int idx = clusteringColumns.indexOf(dimensionSpec.getDimension()); + if (idx < 0) { + return new DelegatingMultiValueDimensionVectorSelector(this, dimensionSpec); + } + // Clustering values are single-typed primitives. Multi-value requests on a clustering column shouldn't happen + // in practice; throw to surface caller bugs rather than silently misbehave. + throw DruidException.defensive( + "multi-value vector selector not supported for clustering column [" + dimensionSpec.getDimension() + "]" + ); + } + + @Override + public VectorValueSelector makeValueSelector(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return new DelegatingVectorValueSelector(this, column); + } + return new ClusteringVectorValueSelector(this, idx); + } + + @Override + public VectorObjectSelector makeObjectSelector(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return new DelegatingVectorObjectSelector(this, column); + } + return new ClusteringVectorObjectSelector(this, idx); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + final int idx = clusteringColumns.indexOf(column); + if (idx < 0) { + return delegate.getColumnCapabilities(column); + } + final ColumnType type = clusteringColumns.getColumnType(idx).orElseThrow(); + if (type.is(ValueType.STRING)) { + return ColumnCapabilitiesImpl.createSimpleSingleValueStringColumnCapabilities(); + } + return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type); + } + + private static final class ClusteringSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector + { + private final ClusteringVectorColumnSelectorFactory parent; + private final int idx; + private final DimensionSpec spec; + private long cachedGeneration = -1; + private SingleValueDimensionVectorSelector cachedInner; + + private ClusteringSingleValueDimensionVectorSelector( + ClusteringVectorColumnSelectorFactory parent, + int idx, + DimensionSpec spec + ) + { + this.parent = parent; + this.idx = idx; + this.spec = spec; + } + + private SingleValueDimensionVectorSelector currentInner() + { + final long currentGeneration = parent.getGeneration(); + if (cachedGeneration == currentGeneration) { + return cachedInner; + } + final Object raw = parent.currentValue(idx); + final String stringValue = raw == null ? null : String.valueOf(raw); + final String afterExtraction = + spec.getExtractionFn() == null ? stringValue : spec.getExtractionFn().apply(stringValue); Review Comment: ## CodeQL / Deprecated method or constructor invocation Invoking [DimensionSpec.getExtractionFn](1) should be avoided because it has been deprecated. [Show more details](https://github.com/apache/druid/security/code-scanning/11219) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
