LakshSingla commented on code in PR #16068: URL: https://github.com/apache/druid/pull/16068#discussion_r1574136998
########## processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/DictionaryBuildingGroupByColumnSelectorStrategy.java: ########## @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import org.apache.druid.error.DruidException; +import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.NullableTypeStrategy; + +import javax.annotation.concurrent.NotThreadSafe; +import java.util.List; + +/** + * Strategy for grouping dimensions which can have variable-width objects, and aren't backed by prebuilt dictionaries. It + * encapsulates the dictionary building logic, along with providing the implementations for dimension to dictionary id + * encoding-decoding. + * <p> + * This strategy can handle any dimension that can be addressed on a reverse-dictionary. Reverse dictionary uses + * a sorted map, rather than a hashmap. + * <p> + * This is the most expensive of all the strategies, and hence must be used only when other strategies aren't valid. + */ +@NotThreadSafe +public class DictionaryBuildingGroupByColumnSelectorStrategy<DimensionType> + extends KeyMappingGroupByColumnSelectorStrategy<DimensionType> +{ + + /** + * Dictionary for mapping the dimension value to an index. i-th position in the dictionary holds the value represented + * by the dictionaryId "i". + * Therefore, if a value has a dictionary id "i", dictionary.get(i) = value + */ + private final List<DimensionType> dictionary; + + /** + * Reverse dictionary for faster lookup into the dictionary, and reusing pre-existing dictionary ids. + * <p> + * An entry of form (value, i) in the reverse dictionary represents that "value" is present at the i-th location in the + * {@link #dictionary}. + * Absence of mapping of a "value" (denoted by returning {@link GroupByColumnSelectorStrategy#GROUP_BY_MISSING_VALUE}) + * represents that the value is absent in the dictionary + */ + private final Object2IntMap<DimensionType> reverseDictionary; + + private DictionaryBuildingGroupByColumnSelectorStrategy( + DimensionToIdConverter<DimensionType> dimensionToIdConverter, + ColumnType columnType, + NullableTypeStrategy<DimensionType> nullableTypeStrategy, + DimensionType defaultValue, + IdToDimensionConverter<DimensionType> idToDimensionConverter, + List<DimensionType> dictionary, + Object2IntMap<DimensionType> reverseDictionary + ) + { + super(dimensionToIdConverter, columnType, nullableTypeStrategy, defaultValue, idToDimensionConverter); + this.dictionary = dictionary; + this.reverseDictionary = reverseDictionary; + } + + /** + * Creates an implementation of the strategy for the given type + */ + public static GroupByColumnSelectorStrategy forType(final ColumnType columnType) + { + if (columnType.equals(ColumnType.STRING)) { + // String types are handled specially because they can have multi-value dimensions + throw DruidException.defensive("Should use special variant which handles multi-value dimensions"); + } else if ( + // Defensive check, primitives should be using a faster fixed-width strategy + columnType.equals(ColumnType.DOUBLE) + || columnType.equals(ColumnType.FLOAT) + || columnType.equals(ColumnType.LONG)) { + throw DruidException.defensive("Could used a fixed width strategy"); + } + + // Catch-all for all other types, that can only have single-valued dimensions + return forArrayAndComplexTypes(columnType); + } + + /** + * Implemenatation of dictionary building strategy for types other than strings (since they can be multi-valued and need + * to be handled separately) and numeric primitives (since they can be handled by fixed-width strategy). + * This also means that we handle array and complex types here, which simplifies the generics a lot, as everything can be + * treated as Object in this class. + * <p> + * Also, there isn't any concept of multi-values here, therefore Dimension == DimensionHolderType == Object. We still + * homogenize rogue selectors which can return non-standard implementation of arrays (like Long[] for long arrays instead of + * Object[]) to what the callers would expect (i.e. Object[] in this case). + */ + private static GroupByColumnSelectorStrategy forArrayAndComplexTypes(final ColumnType columnType) + { + final List<Object> dictionary = DictionaryBuildingUtils.createDictionary(); + final Object2IntMap<Object> reverseDictionary = + DictionaryBuildingUtils.createReverseDictionary(columnType.getNullableStrategy()); Review Comment: Even though the generics aren't used while being called, they are still helping with static type checks in the implementing class. Also, the generics arent' used properly, since there isn't any mapping between the type strategy and the java object that it requires. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org