jihoonson closed pull request #5965: [Backport] Fix topN lexicographic sort URL: https://github.com/apache/incubator-druid/pull/5965
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java index 14ce9694b78..dcb4b0519ab 100644 --- a/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/AggregateTopNMetricFirstAlgorithm.java @@ -26,8 +26,8 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorUtil; import io.druid.query.aggregation.PostAggregator; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import javax.annotation.Nullable; import java.nio.ByteBuffer; @@ -39,17 +39,17 @@ */ public class AggregateTopNMetricFirstAlgorithm implements TopNAlgorithm<int[], TopNParams> { - private final Capabilities capabilities; + private final StorageAdapter storageAdapter; private final TopNQuery query; private final NonBlockingPool<ByteBuffer> bufferPool; public AggregateTopNMetricFirstAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query, NonBlockingPool<ByteBuffer> bufferPool ) { - this.capabilities = capabilities; + this.storageAdapter = storageAdapter; this.query = query; this.bufferPool = bufferPool; } @@ -91,7 +91,7 @@ public void run( .build(); final TopNResultBuilder singleMetricResultBuilder = BaseTopNAlgorithm.makeResultBuilder(params, singleMetricQuery); - PooledTopNAlgorithm singleMetricAlgo = new PooledTopNAlgorithm(capabilities, singleMetricQuery, bufferPool); + PooledTopNAlgorithm singleMetricAlgo = new PooledTopNAlgorithm(storageAdapter, singleMetricQuery, bufferPool); PooledTopNAlgorithm.PooledTopNParams singleMetricParam = null; int[] dimValSelector = null; try { @@ -110,7 +110,7 @@ public void run( singleMetricAlgo.cleanup(singleMetricParam); } - PooledTopNAlgorithm allMetricAlgo = new PooledTopNAlgorithm(capabilities, query, bufferPool); + PooledTopNAlgorithm allMetricAlgo = new PooledTopNAlgorithm(storageAdapter, query, bufferPool); PooledTopNAlgorithm.PooledTopNParams allMetricsParam = null; try { // Run topN for all metrics for top N dimension values diff --git a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java index c6ec6b71e86..a058ddc1a7c 100644 --- a/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/BaseTopNAlgorithm.java @@ -19,6 +19,7 @@ package io.druid.query.topn; +import com.google.common.annotations.VisibleForTesting; import io.druid.java.util.common.IAE; import io.druid.java.util.common.Pair; import io.druid.query.aggregation.Aggregator; @@ -29,6 +30,7 @@ import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.IdLookup; +import io.druid.segment.StorageAdapter; import javax.annotation.Nullable; import java.util.Arrays; @@ -62,11 +64,11 @@ return aggregators; } - protected final Capabilities capabilities; + protected final StorageAdapter storageAdapter; - protected BaseTopNAlgorithm(Capabilities capabilities) + protected BaseTopNAlgorithm(StorageAdapter storageAdapter) { - this.capabilities = capabilities; + this.storageAdapter = storageAdapter; } @Override @@ -134,7 +136,8 @@ private void runWithCardinalityKnown( * This function currently handles TopNs on long and float columns, which do not provide cardinality or an ID lookup. * When cardinality is unknown, process everything in one pass. * Existing implementations of makeDimValSelector() require cardinality as well, so the DimValSelector is not used. - * @param params TopN parameters from run() + * + * @param params TopN parameters from run() * @param resultBuilder Result builder from run() */ private void runWithCardinalityUnknown( @@ -162,9 +165,9 @@ private void runWithCardinalityUnknown( /** * Skip invalid value, calculate length to have enough valid value to process or hit the end. * - * @param dimValSelector the dim value selector which record value is valid or invalid. - * @param numProcessed the start position to process - * @param numToProcess the number of valid value to process + * @param dimValSelector the dim value selector which record value is valid or invalid. + * @param numProcessed the start position to process + * @param numToProcess the number of valid value to process * * @return the length between which have enough valid value to process or hit the end. */ @@ -206,9 +209,14 @@ protected abstract void closeAggregators( Aggregator[][] expansionAggs; int cardinality; - public AggregatorArrayProvider(DimensionSelector dimSelector, TopNQuery query, int cardinality, Capabilities capabilities) + public AggregatorArrayProvider( + DimensionSelector dimSelector, + TopNQuery query, + int cardinality, + StorageAdapter storageAdapter + ) { - super(dimSelector, query, capabilities); + super(dimSelector, query, storageAdapter); this.expansionAggs = new Aggregator[cardinality][]; this.cardinality = cardinality; @@ -236,17 +244,17 @@ public AggregatorArrayProvider(DimensionSelector dimSelector, TopNQuery query, i private final IdLookup idLookup; private final TopNQuery query; - private final Capabilities capabilities; + private final StorageAdapter storageAdapter; public BaseArrayProvider( DimensionSelector dimSelector, TopNQuery query, - Capabilities capabilities + StorageAdapter storageAdapter ) { this.idLookup = dimSelector.idLookup(); this.query = query; - this.capabilities = capabilities; + this.storageAdapter = storageAdapter; previousStop = null; ignoreAfterThreshold = false; @@ -261,6 +269,7 @@ public BaseArrayProvider( @Override public void skipTo(String previousStop) { + Capabilities capabilities = storageAdapter.getCapabilities(); if (capabilities.dimensionValuesSorted()) { this.previousStop = previousStop; } @@ -284,7 +293,8 @@ public void keepOnlyN(int n) keepOnlyN = n; } - protected Pair<Integer, Integer> computeStartEnd(int cardinality) + @VisibleForTesting + public Pair<Integer, Integer> computeStartEnd(int cardinality) { int startIndex = ignoreFirstN; @@ -305,7 +315,9 @@ public void keepOnlyN(int n) int endIndex = Math.min(ignoreFirstN + keepOnlyN, cardinality); - if (ignoreAfterThreshold && query.getDimensionsFilter() == null) { + if (ignoreAfterThreshold && + query.getDimensionsFilter() == null && + query.getIntervals().stream().anyMatch(interval -> interval.contains(storageAdapter.getInterval()))) { endIndex = Math.min(endIndex, startIndex + query.getThreshold()); } diff --git a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java index 06cc5df1b3e..47373721c18 100644 --- a/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/DimExtractionTopNAlgorithm.java @@ -23,24 +23,25 @@ import io.druid.query.ColumnSelectorPlus; import io.druid.query.aggregation.Aggregator; import io.druid.query.topn.types.TopNColumnSelectorStrategy; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import java.util.Map; /** * This has to be its own strategy because the pooled topn algorithm assumes each index is unique, and cannot handle multiple index numerals referencing the same dimension value. */ -public class DimExtractionTopNAlgorithm extends BaseTopNAlgorithm<Aggregator[][], Map<Comparable, Aggregator[]>, TopNParams> +public class DimExtractionTopNAlgorithm + extends BaseTopNAlgorithm<Aggregator[][], Map<Comparable, Aggregator[]>, TopNParams> { private final TopNQuery query; public DimExtractionTopNAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query ) { - super(capabilities); + super(storageAdapter); this.query = query; } @@ -65,7 +66,7 @@ public TopNParams makeInitParams( throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality"); } ColumnSelectorPlus<TopNColumnSelectorStrategy> selectorPlus = params.getSelectorPlus(); - return selectorPlus.getColumnSelectorStrategy().getDimExtractionRowSelector(query, params, capabilities); + return selectorPlus.getColumnSelectorStrategy().getDimExtractionRowSelector(query, params, storageAdapter); } @Override diff --git a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java index d1bbf954d95..c191f4a6d7a 100644 --- a/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/PooledTopNAlgorithm.java @@ -33,10 +33,10 @@ import io.druid.query.monomorphicprocessing.SpecializationService; import io.druid.query.monomorphicprocessing.SpecializationState; import io.druid.query.monomorphicprocessing.StringRuntimeShape; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; import io.druid.segment.FilteredOffset; +import io.druid.segment.StorageAdapter; import io.druid.segment.column.ValueType; import io.druid.segment.data.IndexedInts; import io.druid.segment.data.Offset; @@ -64,7 +64,9 @@ private static boolean specializeHistoricalSingleValueDimSelector1SimpleDoubleAggPooledTopN = !Boolean.getBoolean("dontSpecializeHistoricalSingleValueDimSelector1SimpleDoubleAggPooledTopN"); - /** See TopNQueryRunnerTest */ + /** + * See TopNQueryRunnerTest + */ @VisibleForTesting static void setSpecializeGeneric1AggPooledTopN(boolean value) { @@ -116,6 +118,7 @@ long scanAndAggregate( } private static final List<ScanAndAggregate> specializedScanAndAggregateImplementations = new ArrayList<>(); + static { computeSpecializedScanAndAggregateImplementations(); } @@ -197,12 +200,12 @@ private static void computeSpecializedScanAndAggregateImplementations() private static final int AGG_UNROLL_COUNT = 8; // Must be able to fit loop below public PooledTopNAlgorithm( - Capabilities capabilities, + StorageAdapter storageAdapter, TopNQuery query, NonBlockingPool<ByteBuffer> bufferPool ) { - super(capabilities); + super(storageAdapter); this.query = query; this.bufferPool = bufferPool; } @@ -226,7 +229,7 @@ public PooledTopNParams makeInitParams( final TopNMetricSpecBuilder<int[]> arrayProvider = new BaseArrayProvider<int[]>( dimSelector, query, - capabilities + storageAdapter ) { private final int[] positions = new int[cardinality]; diff --git a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java index 6b5bed59a72..791c6544bfc 100644 --- a/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java +++ b/processing/src/main/java/io/druid/query/topn/TimeExtractionTopNAlgorithm.java @@ -20,11 +20,11 @@ package io.druid.query.topn; import com.google.common.collect.Maps; -import io.druid.query.aggregation.Aggregator; import io.druid.query.ColumnSelectorPlus; -import io.druid.segment.Capabilities; +import io.druid.query.aggregation.Aggregator; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; +import io.druid.segment.StorageAdapter; import java.util.Map; @@ -33,9 +33,9 @@ public static final int[] EMPTY_INTS = new int[]{}; private final TopNQuery query; - public TimeExtractionTopNAlgorithm(Capabilities capabilities, TopNQuery query) + public TimeExtractionTopNAlgorithm(StorageAdapter storageAdapter, TopNQuery query) { - super(capabilities); + super(storageAdapter); this.query = query; } diff --git a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java index a43f6b5abe0..bc573b9dea9 100644 --- a/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java +++ b/processing/src/main/java/io/druid/query/topn/TopNQueryEngine.java @@ -30,7 +30,6 @@ import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.Filter; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.SegmentMissingException; import io.druid.segment.StorageAdapter; @@ -109,7 +108,6 @@ private TopNMapFn getMapFn( final @Nullable TopNQueryMetrics queryMetrics ) { - final Capabilities capabilities = adapter.getCapabilities(); final String dimension = query.getDimensionSpec().getDimension(); final int cardinality = adapter.getDimensionCardinality(dimension); if (queryMetrics != null) { @@ -137,19 +135,19 @@ private TopNMapFn getMapFn( ) { // A special TimeExtractionTopNAlgorithm is required, since DimExtractionTopNAlgorithm // currently relies on the dimension cardinality to support lexicographic sorting - topNAlgorithm = new TimeExtractionTopNAlgorithm(capabilities, query); + topNAlgorithm = new TimeExtractionTopNAlgorithm(adapter, query); } else if (selector.isHasExtractionFn()) { - topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query); + topNAlgorithm = new DimExtractionTopNAlgorithm(adapter, query); } else if (columnCapabilities != null && !(columnCapabilities.getType() == ValueType.STRING - && columnCapabilities.isDictionaryEncoded())) { + && columnCapabilities.isDictionaryEncoded())) { // Use DimExtraction for non-Strings and for non-dictionary-encoded Strings. - topNAlgorithm = new DimExtractionTopNAlgorithm(capabilities, query); + topNAlgorithm = new DimExtractionTopNAlgorithm(adapter, query); } else if (selector.isAggregateAllMetrics()) { - topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool); + topNAlgorithm = new PooledTopNAlgorithm(adapter, query, bufferPool); } else if (selector.isAggregateTopNMetricFirst() || query.getContextBoolean("doAggregateTopNMetricFirst", false)) { - topNAlgorithm = new AggregateTopNMetricFirstAlgorithm(capabilities, query, bufferPool); + topNAlgorithm = new AggregateTopNMetricFirstAlgorithm(adapter, query, bufferPool); } else { - topNAlgorithm = new PooledTopNAlgorithm(capabilities, query, bufferPool); + topNAlgorithm = new PooledTopNAlgorithm(adapter, query, bufferPool); } if (queryMetrics != null) { queryMetrics.algorithm(topNAlgorithm); @@ -162,7 +160,9 @@ public static boolean canApplyExtractionInPost(TopNQuery query) { return query.getDimensionSpec() != null && query.getDimensionSpec().getExtractionFn() != null - && ExtractionFn.ExtractionType.ONE_TO_ONE.equals(query.getDimensionSpec().getExtractionFn().getExtractionType()) + && ExtractionFn.ExtractionType.ONE_TO_ONE.equals(query.getDimensionSpec() + .getExtractionFn() + .getExtractionType()) && query.getTopNMetricSpec().canBeOptimizedUnordered(); } } diff --git a/processing/src/main/java/io/druid/query/topn/types/NumericTopNColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/topn/types/NumericTopNColumnSelectorStrategy.java index 6a8202dbfc6..24084c1e3be 100644 --- a/processing/src/main/java/io/druid/query/topn/types/NumericTopNColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/topn/types/NumericTopNColumnSelectorStrategy.java @@ -28,8 +28,8 @@ import io.druid.segment.BaseDoubleColumnValueSelector; import io.druid.segment.BaseFloatColumnValueSelector; import io.druid.segment.BaseLongColumnValueSelector; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import io.druid.segment.column.ValueType; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -51,7 +51,7 @@ public int getCardinality(ValueSelectorType selector) @Override public Aggregator[][] getDimExtractionRowSelector( - TopNQuery query, TopNParams params, Capabilities capabilities + TopNQuery query, TopNParams params, StorageAdapter storageAdapter ) { return null; diff --git a/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java index 26bcfc05dfd..724c4f09780 100644 --- a/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/topn/types/StringTopNColumnSelectorStrategy.java @@ -26,9 +26,9 @@ import io.druid.query.topn.TopNParams; import io.druid.query.topn.TopNQuery; import io.druid.query.topn.TopNResultBuilder; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; import io.druid.segment.DimensionSelector; +import io.druid.segment.StorageAdapter; import io.druid.segment.column.ValueType; import io.druid.segment.data.IndexedInts; @@ -50,7 +50,7 @@ public ValueType getValueType() } @Override - public Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, Capabilities capabilities) + public Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, StorageAdapter storageAdapter) { if (params.getCardinality() < 0) { throw new UnsupportedOperationException("Cannot operate on a dimension with unknown cardinality"); @@ -64,7 +64,7 @@ public ValueType getValueType() (DimensionSelector) params.getSelectorPlus().getSelector(), query, params.getCardinality(), - capabilities + storageAdapter ); return provider.build(); diff --git a/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java index d720a0269ec..a8f5d327339 100644 --- a/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java +++ b/processing/src/main/java/io/druid/query/topn/types/TopNColumnSelectorStrategy.java @@ -25,8 +25,8 @@ import io.druid.query.topn.TopNParams; import io.druid.query.topn.TopNQuery; import io.druid.query.topn.TopNResultBuilder; -import io.druid.segment.Capabilities; import io.druid.segment.Cursor; +import io.druid.segment.StorageAdapter; import io.druid.segment.column.ValueType; import javax.annotation.Nullable; @@ -51,12 +51,14 @@ * * A dimension type that does not have integer values should return null. * - * @param query The TopN query being served - * @param params Parameters for the TopN query being served - * @param capabilities Object indicating if dimension values are sorted + * @param query The TopN query being served + * @param params Parameters for the TopN query being served + * @param storageAdapter Column storage adapter, to provide information about the column that can be used for + * query optimization, e.g. whether dimension values are sorted or not + * * @return an Aggregator[][] for integer-valued dimensions, null otherwise */ - Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, Capabilities capabilities); + Aggregator[][] getDimExtractionRowSelector(TopNQuery query, TopNParams params, StorageAdapter storageAdapter); /** * Used by DimExtractionTopNAlgorithm. @@ -73,21 +75,22 @@ * * Iterate through the cursor, reading the current row from a dimension value selector, and for each row value: * 1. Retrieve the Aggregator[] for the row value from rowSelector (fast integer lookup) or from - * aggregatesStore (slower map). + * aggregatesStore (slower map). * * 2. If the rowSelector and/or aggregatesStore did not have an entry for a particular row value, - * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the - * provided cursor and query, storing them in rowSelector and aggregatesStore + * this function should retrieve the current Aggregator[] using BaseTopNAlgorithm.makeAggregators() and the + * provided cursor and query, storing them in rowSelector and aggregatesStore * * 3. Call aggregate() on each of the aggregators. * * If a dimension type doesn't have integer values, it should ignore rowSelector and use the aggregatesStore map only. * - * @param query The TopN query being served. - * @param selector Dimension value selector - * @param cursor Cursor for the segment being queried - * @param rowSelector Integer lookup containing aggregators + * @param query The TopN query being served. + * @param selector Dimension value selector + * @param cursor Cursor for the segment being queried + * @param rowSelector Integer lookup containing aggregators * @param aggregatesStore Map containing aggregators + * * @return the number of processed rows (after postFilters are applied inside the cursor being processed) */ long dimExtractionScanAndAggregate( @@ -104,9 +107,9 @@ long dimExtractionScanAndAggregate( * Read entries from the aggregates store, adding the keys and associated values to the resultBuilder, applying the * valueTransformer to the keys if present * - * @param aggregatesStore Map created by makeDimExtractionAggregateStore() + * @param aggregatesStore Map created by makeDimExtractionAggregateStore() * @param valueTransformer Converts keys to different types, if null no conversion is needed - * @param resultBuilder TopN result builder + * @param resultBuilder TopN result builder */ void updateDimExtractionResults( DimExtractionAggregateStoreType aggregatesStore, diff --git a/processing/src/main/java/io/druid/segment/Capabilities.java b/processing/src/main/java/io/druid/segment/Capabilities.java index e2f981b7494..7c5bcbf371e 100644 --- a/processing/src/main/java/io/druid/segment/Capabilities.java +++ b/processing/src/main/java/io/druid/segment/Capabilities.java @@ -21,6 +21,7 @@ /** */ + public class Capabilities { private final boolean dimensionValuesSorted; @@ -37,6 +38,10 @@ private Capabilities( this.dimensionValuesSorted = dimensionValuesSorted; } + /** + * Is dimension value dictionary sorted? + * @return + */ public boolean dimensionValuesSorted() { return dimensionValuesSorted; diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java index 58f175adbce..5c846b68c60 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtils.java @@ -192,7 +192,7 @@ public static ColumnValueSelector getColumnValueSelectorFromDimensionSpec( } } - // When determining the capabilites of a column during query processing, this function + // When determining the capabilities of a column during query processing, this function // adjusts the capabilities for columns that cannot be handled as-is to manageable defaults // (e.g., treating missing columns as empty String columns) private static ColumnCapabilities getEffectiveCapabilities( diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java b/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java index 2f0d3122c47..48400ca634e 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexColumnSelectorFactory.java @@ -146,6 +146,6 @@ public ColumnCapabilities getColumnCapabilities(String columnName) return virtualColumns.getColumnCapabilities(columnName); } - return QueryableIndexStorageAdapter.getColumnCapabilites(index, columnName); + return QueryableIndexStorageAdapter.getColumnCapabilities(index, columnName); } } diff --git a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java index 4845434e65b..ed11ee0c1a1 100644 --- a/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/QueryableIndexStorageAdapter.java @@ -165,7 +165,7 @@ public Capabilities getCapabilities() @Nullable public ColumnCapabilities getColumnCapabilities(String column) { - return getColumnCapabilites(index, column); + return getColumnCapabilities(index, column); } @Override @@ -314,7 +314,7 @@ public DateTime getMaxIngestedEventTime() } @Nullable - static ColumnCapabilities getColumnCapabilites(ColumnSelector index, String columnName) + static ColumnCapabilities getColumnCapabilities(ColumnSelector index, String columnName) { Column columnObj = index.getColumn(columnName); if (columnObj == null) { diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java index a752c5d4425..bf342315e94 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndex.java @@ -281,7 +281,8 @@ protected IncrementalIndex( for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) { ValueType type = TYPE_MAP.get(dimSchema.getValueType()); String dimName = dimSchema.getName(); - ColumnCapabilitiesImpl capabilities = makeCapabilitesFromValueType(type); + ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type); + if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { capabilities.setHasSpatialIndexes(true); } else { @@ -720,7 +721,7 @@ public Integer getDimensionIndex(String dimension) } } - private ColumnCapabilitiesImpl makeCapabilitesFromValueType(ValueType type) + private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type) { ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl(); capabilities.setDictionaryEncoded(type == ValueType.STRING); diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 325ff4f8902..223721da0c6 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -142,6 +142,7 @@ public Comparable getMaxValue(String column) return indexer.getMaxValue(); } + @Override public Capabilities getCapabilities() { diff --git a/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java b/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java index cc687f23f9d..afbe12d7d31 100644 --- a/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java +++ b/processing/src/test/java/io/druid/query/topn/PooledTopNAlgorithmTest.java @@ -20,7 +20,7 @@ package io.druid.query.topn; import io.druid.collections.ResourceHolder; -import io.druid.segment.Capabilities; +import io.druid.segment.StorageAdapter; import org.easymock.EasyMock; import org.junit.Test; @@ -32,14 +32,14 @@ @Test public void testCleanupWithNullParams() { - PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(Capabilities.builder().build(), null, null); + PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(EasyMock.mock(StorageAdapter.class), null, null); pooledTopNAlgorithm.cleanup(null); } @Test public void cleanup() throws IOException { - PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(Capabilities.builder().build(), null, null); + PooledTopNAlgorithm pooledTopNAlgorithm = new PooledTopNAlgorithm(EasyMock.mock(StorageAdapter.class), null, null); PooledTopNAlgorithm.PooledTopNParams params = EasyMock.createMock(PooledTopNAlgorithm.PooledTopNParams.class); ResourceHolder<ByteBuffer> resourceHolder = EasyMock.createMock(ResourceHolder.class); EasyMock.expect(params.getResultsBufHolder()).andReturn(resourceHolder).times(1); diff --git a/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java b/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java new file mode 100644 index 00000000000..14305ea775a --- /dev/null +++ b/processing/src/test/java/io/druid/query/topn/TopNMetricSpecOptimizationsTest.java @@ -0,0 +1,480 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.topn; + +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import io.druid.java.util.common.DateTimes; +import io.druid.java.util.common.Intervals; +import io.druid.java.util.common.Pair; +import io.druid.java.util.common.granularity.Granularity; +import io.druid.java.util.common.guava.Sequence; +import io.druid.query.QueryMetrics; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.DoubleMaxAggregatorFactory; +import io.druid.query.aggregation.DoubleMinAggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.filter.Filter; +import io.druid.query.filter.ValueMatcher; +import io.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import io.druid.segment.Capabilities; +import io.druid.segment.Cursor; +import io.druid.segment.DimensionSelector; +import io.druid.segment.IdLookup; +import io.druid.segment.Metadata; +import io.druid.segment.StorageAdapter; +import io.druid.segment.VirtualColumns; +import io.druid.segment.column.ColumnCapabilities; +import io.druid.segment.data.Indexed; +import io.druid.segment.data.IndexedInts; +import org.joda.time.DateTime; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; + +import javax.annotation.Nullable; +import java.util.Arrays; + +import static io.druid.query.QueryRunnerTestHelper.addRowsIndexConstant; +import static io.druid.query.QueryRunnerTestHelper.allGran; +import static io.druid.query.QueryRunnerTestHelper.commonDoubleAggregators; +import static io.druid.query.QueryRunnerTestHelper.dataSource; +import static io.druid.query.QueryRunnerTestHelper.indexMetric; +import static io.druid.query.QueryRunnerTestHelper.marketDimension; +import static io.druid.query.QueryRunnerTestHelper.qualityDimension; + +public class TopNMetricSpecOptimizationsTest +{ + @Test + public void testShouldOptimizeLexicographic() + { + // query interval is greater than segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.<AggregatorFactory>newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(threshold), thePair.rhs); + } + + @Test + public void testAlsoShouldOptimizeLexicographic() + { + // query interval is same as segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z") + .aggregators( + Lists.<AggregatorFactory>newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(threshold), thePair.rhs); + } + + @Test + public void testShouldNotOptimizeLexicographic() + { + // query interval is smaller than segment interval, no filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-30T01:00:00Z") + .aggregators( + Lists.<AggregatorFactory>newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-31T00:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + @Test + public void testAlsoShouldNotOptimizeLexicographic() + { + // query interval is larger than segment interval, but has filters, can ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .filters(qualityDimension, "entertainment") + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.<AggregatorFactory>newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)) + .build(); + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + arrayProviderToTest.ignoreAfterThreshold(); + Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + @Test + public void testAgainShouldNotOptimizeLexicographic() + { + // query interval is larger than segment interval, no filters, can NOT ignoreAfterThreshold + int cardinality = 1234; + int threshold = 4; + TopNQuery query = new TopNQueryBuilder() + .dataSource(dataSource) + .granularity(allGran) + .dimension(marketDimension) + .metric(indexMetric) + .threshold(threshold) + .intervals("2018-05-30T00:00:00Z/2018-05-31T00:00:00Z") + .aggregators( + Lists.<AggregatorFactory>newArrayList( + Iterables.concat( + commonDoubleAggregators, + Lists.newArrayList( + new DoubleMaxAggregatorFactory("maxIndex", "index"), + new DoubleMinAggregatorFactory("minIndex", "index") + ) + ) + ) + ) + .postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)) + .build(); + + + StorageAdapter adapter = + makeFakeStorageAdapter("2018-05-30T00:00:00Z", "2018-05-30T01:00:00Z", cardinality); + + DimensionSelector dimSelector = makeFakeDimSelector(cardinality); + + BaseTopNAlgorithm.AggregatorArrayProvider arrayProviderToTest = new BaseTopNAlgorithm.AggregatorArrayProvider( + dimSelector, + query, + cardinality, + adapter + ); + + Pair<Integer, Integer> thePair = arrayProviderToTest.computeStartEnd(cardinality); + Assert.assertEquals(new Integer(0), thePair.lhs); + Assert.assertEquals(new Integer(cardinality), thePair.rhs); + } + + private StorageAdapter makeFakeStorageAdapter(String start, String end, int cardinality) + { + StorageAdapter adapter = new StorageAdapter() + { + @Override + public Interval getInterval() + { + return Intervals.of(start + "/" + end); + } + + @Override + public int getDimensionCardinality(String column) + { + return cardinality; + } + + @Override + public DateTime getMinTime() + { + return DateTimes.of(start); + } + + + @Override + public DateTime getMaxTime() + { + return DateTimes.of(end); + } + + // stubs below this line not important for tests + @Override + public String getSegmentIdentifier() + { + return null; + } + + + @Override + public Indexed<String> getAvailableDimensions() + { + return null; + } + + @Override + public Iterable<String> getAvailableMetrics() + { + return null; + } + + @Nullable + @Override + public Comparable getMinValue(String column) + { + return null; + } + + @Nullable + @Override + public Comparable getMaxValue(String column) + { + return null; + } + + @Override + public Capabilities getCapabilities() + { + return Capabilities.builder().dimensionValuesSorted(true).build(); + } + + @Nullable + @Override + public ColumnCapabilities getColumnCapabilities(String column) + { + return null; + } + + @Nullable + @Override + public String getColumnTypeName(String column) + { + return null; + } + + @Override + public int getNumRows() + { + return 0; + } + + @Override + public DateTime getMaxIngestedEventTime() + { + return null; + } + + @Override + public Metadata getMetadata() + { + return null; + } + + @Override + public Sequence<Cursor> makeCursors( + @Nullable Filter filter, + Interval interval, + VirtualColumns virtualColumns, + Granularity gran, + boolean descending, + @Nullable QueryMetrics<?> queryMetrics + ) + { + return null; + } + }; + + return adapter; + } + + private DimensionSelector makeFakeDimSelector(int cardinality) + { + + DimensionSelector dimSelector = new DimensionSelector() + { + @Override + public int getValueCardinality() + { + return cardinality; + } + + // stubs below this line not important for tests + @Override + public IndexedInts getRow() + { + return null; + } + + @Override + public ValueMatcher makeValueMatcher(@Nullable String value) + { + return null; + } + + @Override + public ValueMatcher makeValueMatcher(Predicate<String> predicate) + { + return null; + } + + @Nullable + @Override + public String lookupName(int id) + { + return null; + } + + @Override + public boolean nameLookupPossibleInAdvance() + { + return false; + } + + @Nullable + @Override + public IdLookup idLookup() + { + return null; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Nullable + @Override + public Object getObject() + { + return null; + } + + @Override + public Class classOfObject() + { + return null; + } + }; + + return dimSelector; + } +} ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@druid.apache.org For additional commands, e-mail: dev-h...@druid.apache.org