LakshSingla commented on code in PR #16068: URL: https://github.com/apache/druid/pull/16068#discussion_r1533315580
########## processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/PrebuiltDictionaryStringGroupByColumnSelectorStrategy.java: ########## @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.groupby.epinephelinae.column; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.error.DruidException; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.DimensionSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.data.IndexedInts; + +import javax.annotation.Nullable; + +/** + * Implementation of {@link KeyMappingGroupByColumnSelectorStrategy} that relies on a prebuilt dictionary to map the + * dimension to the dictionaryId. It is more like a helper class, that handles the different ways that dictionaries can be + * provided for different types. Currently, it only handles String dimensions. Array dimensions are also backed by dictionaries, + * but not exposed via the ColumnValueSelector interface, hence this strategy cannot handle array dimensions. + */ +public class PrebuiltDictionaryStringGroupByColumnSelectorStrategy +{ + + /** + * Create the strategy for the provided column type + */ + public static GroupByColumnSelectorStrategy forType( + final ColumnType columnType, + final ColumnValueSelector columnValueSelector, + final ColumnCapabilities columnCapabilities + ) + { + if (columnType.equals(ColumnType.STRING)) { + return forString(columnValueSelector, columnCapabilities); + } else { + // This will change with array columns + throw DruidException.defensive("Only string columns expose prebuilt dictionaries"); + } + } + + private static GroupByColumnSelectorStrategy forString( + final ColumnValueSelector columnValueSelector, + final ColumnCapabilities columnCapabilities + ) + { + return new KeyMappingGroupByColumnSelectorStrategy<>( + new StringDimensionToIdConverter(), + ColumnType.STRING, + ColumnType.STRING.getNullableStrategy(), + NullHandling.defaultStringValue(), + new StringIdToDimensionConverter((DimensionSelector) columnValueSelector, columnCapabilities) + ); + } + + /** + * Dimension to id converter for string dimensions and {@link DimensionSelector}, where the dictionaries are prebuilt. + * The callers must ensure that's the case by checking that {@link DimensionSelector#getValueCardinality()} is known + * and {@link DimensionSelector#nameLookupPossibleInAdvance()} is true. + */ + private static class StringDimensionToIdConverter implements DimensionToIdConverter<IndexedInts> + { + @Override + public MemoryEstimate<IndexedInts> getMultiValueHolder( + final ColumnValueSelector selector, + final IndexedInts reusableValue + ) + { + return new MemoryEstimate<>(((DimensionSelector) selector).getRow(), 0); + } + + @Override + public int multiValueSize(IndexedInts multiValueHolder) + { + return multiValueHolder.size(); + } + + @Override + public MemoryEstimate<Integer> getIndividualValueDictId(IndexedInts multiValueHolder, int index) + { + // dictId is already encoded in the indexedInt supplied by the column value selector + return new MemoryEstimate<>(multiValueHolder.get(index), 0); + } + } + + /** + * ID to dimension converter for {@link DimensionSelector} with prebuilt dictionary + */ + private static class StringIdToDimensionConverter implements IdToDimensionConverter<String> + { + + final DimensionSelector dimensionSelector; + + @Nullable + final ColumnCapabilities columnCapabilities; + + public StringIdToDimensionConverter( + final DimensionSelector dimensionSelector, + @Nullable final ColumnCapabilities columnCapabilities + ) + { + this.dimensionSelector = dimensionSelector; + this.columnCapabilities = columnCapabilities; + } + + @Override + public String idToKey(int id) + { + // Converting back to the value is as simple as looking up the value in the prebuilt dictionary + return dimensionSelector.lookupName(id); + } + + @Override + public boolean canCompareIds() + { + return columnCapabilities != null + && columnCapabilities.hasBitmapIndexes() + && (columnCapabilities.areDictionaryValuesSorted() + .and(columnCapabilities.areDictionaryValuesUnique())).isTrue(); Review Comment: Thanks for catching this. I was also confused about the condition, but then I chose to go ahead with the pre-existing code: https://github.com/apache/druid/blob/master/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/column/StringGroupByColumnSelectorStrategy.java#L165. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org