abhishekagarwal87 commented on a change in pull request #10518: URL: https://github.com/apache/druid/pull/10518#discussion_r533118333
########## File path: processing/src/main/java/org/apache/druid/query/aggregation/GroupingAggregatorFactory.java ########## @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.druid.annotations.EverythingIsNonnullByDefault; +import org.apache.druid.query.aggregation.constant.LongConstantAggregator; +import org.apache.druid.query.aggregation.constant.LongConstantBufferAggregator; +import org.apache.druid.query.aggregation.constant.LongConstantVectorAggregator; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ValueType; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; +import org.apache.druid.utils.CollectionUtils; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +@EverythingIsNonnullByDefault +public class GroupingAggregatorFactory extends AggregatorFactory +{ + private static final Comparator<Long> VALUE_COMPARATOR = Long::compare; + private final String name; + private final List<String> groupings; + private final long value; + @Nullable + private final Set<String> keyDimensions; + + @JsonCreator + public GroupingAggregatorFactory( + @JsonProperty("name") String name, + @JsonProperty("groupings") List<String> groupings + ) + { + this(name, groupings, null); + } + + @VisibleForTesting + GroupingAggregatorFactory( + String name, + List<String> groupings, + @Nullable Set<String> keyDimensions + ) + { + Preconditions.checkNotNull(name, "Must have a valid, non-null aggregator name"); + this.name = name; + this.groupings = groupings; + this.keyDimensions = keyDimensions; + value = groupingId(groupings, keyDimensions); + } + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + return new LongConstantAggregator(value); + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + return new LongConstantBufferAggregator(value); + } + + @Override + public VectorAggregator factorizeVector(VectorColumnSelectorFactory selectorFactory) + { + return new LongConstantVectorAggregator(value); + } + + @Override + public boolean canVectorize(ColumnInspector columnInspector) + { + return true; + } + + /** + * Replace the param {@code keyDimensions} with the new set of key dimensions + */ + public GroupingAggregatorFactory withKeyDimensions(Set<String> newKeyDimensions) + { + return new GroupingAggregatorFactory(name, groupings, newKeyDimensions); + } + + @Override + public Comparator getComparator() + { + return VALUE_COMPARATOR; + } + + @JsonProperty + public List<String> getGroupings() + { + return groupings; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + public long getValue() + { + return value; + } + + @Nullable + @Override + public Object combine(@Nullable Object lhs, @Nullable Object rhs) + { + if (null == lhs) { + return rhs; + } + return lhs; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new GroupingAggregatorFactory(name, groupings, keyDimensions); + } + + @Override + public List<AggregatorFactory> getRequiredColumns() + { + return Collections.singletonList(new GroupingAggregatorFactory(name, groupings, keyDimensions)); + } + + @Override + public Object deserialize(Object object) + { + return object; + } + + @Nullable + @Override + public Object finalizeComputation(@Nullable Object object) + { + return object; + } + + @Override + public List<String> requiredFields() + { + // The aggregator doesn't need to read any fields. + return Collections.emptyList(); + } + + @Override + public ValueType getType() + { + return ValueType.LONG; + } + + @Override + public ValueType getFinalizedType() + { + return ValueType.LONG; + } + + @Override + public int getMaxIntermediateSize() + { + return Long.BYTES; + } + + @Override + public byte[] getCacheKey() + { + CacheKeyBuilder keyBuilder = new CacheKeyBuilder(AggregatorUtil.GROUPING_CACHE_TYPE_ID) + .appendStrings(groupings); + if (null != keyDimensions) { + keyBuilder.appendStrings(keyDimensions); + } + return keyBuilder.build(); + } + + /** + * Gives the list of grouping dimensions, return a long value where each bit at position X in the returned value + * corresponds to the dimension in groupings at same position X. X is the position relative to the right end. if + * keyDimensions contain the grouping dimension at position X, the bit is set to 1 at position X, otherwise it is + * set to 0. An example adapted from Microsoft SQL documentation Review comment: Good catch @jihoonson. The doc is fixed in the most recent version. I will change our impl as well. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
