clintropolis commented on code in PR #19460: URL: https://github.com/apache/druid/pull/19460#discussion_r3382234754
########## processing/src/main/java/org/apache/druid/segment/projections/ClusteringColumnSelectorFactory.java: ########## @@ -0,0 +1,566 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import org.apache.druid.error.DruidException; +import org.apache.druid.math.expr.ExprEval; +import org.apache.druid.math.expr.ExpressionType; +import org.apache.druid.query.dimension.DimensionSpec; +import org.apache.druid.query.filter.DruidPredicateFactory; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.ConstantExprEvalSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.IdLookup; +import org.apache.druid.segment.RowIdSupplier; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnCapabilitiesImpl; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.data.IndexedInts; + +import javax.annotation.Nullable; +import java.util.function.Supplier; + +/** + * {@link ColumnSelectorFactory} wrapper that intercepts requests for clustering columns and returns selectors + * carrying the group's constant value, while delegating all other column lookups to a wrapped factory. This is the + * mechanism by which a cluster group's clustering columns, which are NOT stored in the per-group column data since + * they're constant across the group, are made visible to query engines as if they were ordinary columns. + */ +public class ClusteringColumnSelectorFactory implements ColumnSelectorFactory +{ + private final RowSignature clusteringColumns; + private ColumnSelectorFactory delegate; + private Object[] clusteringValues; + // Bumped on every setDelegate(...) so per-call selector wrappers can detect group transitions and rebuild their + // cached inner state + private long generation; + + public ClusteringColumnSelectorFactory( + ColumnSelectorFactory delegate, + RowSignature clusteringColumns, + Object[] clusteringValues + ) + { + this.clusteringColumns = clusteringColumns; + setDelegate(delegate, clusteringValues); + } + + /** + * Update the underlying factory and the constant values for the current cluster group. Called by a multi-group + * concatenating cursor on each group transition. Selectors previously returned by this factory will, on their next + * invocation, observe the updated state; see the per-call indirection in the inner selector classes. + */ + public void setDelegate(ColumnSelectorFactory delegate, Object[] clusteringValues) Review Comment: generation isn't an index though currently, it is just a unique identifier for the current group, and only increases. Its use is similar to like how we use `ReadableVectorInspector.getId`. Currently on reset the concatenating cursor just creates fresh delegate cursors instead of individually reseting all of the delegate cursors so there isn't really even anything that could be cached since the delegate selector factories are tied to the delegate cursors (concat cursor doesn't currently save the delegate cursors anywhere either which it would need to if we were trying to reset them). Since this is all entirely new stuff that mostly doesnt impact any existing code paths I am mainly going for correct behavior in this PR and plan to continue to iterate on these implementations in follow-up PRs. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
