jasonk000 commented on code in PR #18885: URL: https://github.com/apache/druid/pull/18885#discussion_r2666346336
########## extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramCountPostAggregator.java: ########## @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Longs; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Post-aggregator that returns the total count of observations in a SpectatorHistogram. + * This is the sum of all bucket counts. + */ +public class SpectatorHistogramCountPostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + + public static final String TYPE_NAME = "countSpectatorHistogram"; + + @JsonCreator + public SpectatorHistogramCountPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.LONG; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @Override + public Object compute(final Map<String, Object> combinedAggregators) + { + final SpectatorHistogram sketch = (SpectatorHistogram) field.compute(combinedAggregators); + if (sketch == null) { + return null; + } + return sketch.getSum(); + } + + @Override + public Comparator<Long> getComparator() + { + return Longs::compare; + } + + @Override + public Set<String> getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder( + PostAggregatorIds.SPECTATOR_HISTOGRAM_SKETCH_COUNT_CACHE_TYPE_ID) + .appendCacheable(field) + .build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SpectatorHistogramCountPostAggregator that = (SpectatorHistogramCountPostAggregator) o; + return Objects.equals(name, that.name) && + Objects.equals(field, that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field); Review Comment: nit, allocates if used in hot path ########## docs/development/extensions-contrib/spectator-histogram.md: ########## @@ -272,6 +274,134 @@ array of percentiles. | field | A field reference pointing to the aggregated histogram. | yes | | percentiles | Non-empty array of decimal percentiles between 0.0 and 100.0 | yes | +#### Count Post-Aggregator + +This returns the total count of observations (data points) that were recorded in the histogram. +This is useful for understanding the population size without needing a separate count metric. + +```json +{ + "type": "countSpectatorHistogram", + "name": "<output name>", + "field": { + "type": "fieldAccess", + "fieldName": "<name of aggregated SpectatorHistogram>" + } +} +``` + +| Property | Description | Required? | +|----------|------------------------------------------------------------|-----------| +| type | This String should always be "countSpectatorHistogram" | yes | +| name | A String for the output (result) name of the calculation. | yes | +| field | A field reference pointing to the aggregated histogram. | yes | + +## SQL Functions + +In addition to the native query aggregators and post-aggregators, this extension provides SQL functions for easier use in Druid SQL queries. + +### SPECTATOR_COUNT + +Returns the total count of observations (data points) in a Spectator histogram. + +**Syntax:** +```sql +SPECTATOR_COUNT(expr) +``` + +**Arguments:** +- `expr`: A numeric column to aggregate into a histogram, or a pre-aggregated Spectator histogram column. + +**Returns:** BIGINT - the total number of observations. Review Comment: May also return null if histogram has zero samples (if I've read later code correctly). Similar for other docs. ########## docs/development/extensions-contrib/spectator-histogram.md: ########## @@ -272,6 +274,134 @@ array of percentiles. | field | A field reference pointing to the aggregated histogram. | yes | | percentiles | Non-empty array of decimal percentiles between 0.0 and 100.0 | yes | +#### Count Post-Aggregator + +This returns the total count of observations (data points) that were recorded in the histogram. +This is useful for understanding the population size without needing a separate count metric. Review Comment: Is an extra line-break intended here, or continuation of previous line? ########## extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/sql/SpectatorHistogramPercentileSqlAggregator.java: ########## @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram.sql; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Optionality; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.spectator.histogram.SpectatorHistogramAggregatorFactory; +import org.apache.druid.spectator.histogram.SpectatorHistogramPercentilePostAggregator; +import org.apache.druid.spectator.histogram.SpectatorHistogramPercentilesPostAggregator; +import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.Aggregations; +import org.apache.druid.sql.calcite.aggregation.SqlAggregator; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.InputAccessor; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * SQL aggregator for computing percentiles from SpectatorHistograms. + * <p> + * Supports two forms: + * - SPECTATOR_PERCENTILE(column, percentile) -> DOUBLE (single percentile) + * - SPECTATOR_PERCENTILE(column, ARRAY[p1, p2, ...]) -> DOUBLE ARRAY (multiple percentiles) + * <p> + * Percentile values should be in the range [0, 100] (e.g., 95 for p95). + */ +public class SpectatorHistogramPercentileSqlAggregator implements SqlAggregator +{ + private static final SqlAggFunction FUNCTION_INSTANCE = new SpectatorHistogramPercentileSqlAggFunction(); + private static final String NAME = "SPECTATOR_PERCENTILE"; + + @Override + public SqlAggFunction calciteFunction() + { + return FUNCTION_INSTANCE; + } + + @Nullable + @Override + public Aggregation toDruidAggregation( + final PlannerContext plannerContext, + final VirtualColumnRegistry virtualColumnRegistry, + final String name, + final AggregateCall aggregateCall, + final InputAccessor inputAccessor, + final List<Aggregation> existingAggregations, + final boolean finalizeAggregations + ) + { + final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator( + plannerContext, + inputAccessor.getInputRowSignature(), + inputAccessor.getField(aggregateCall.getArgList().get(0)) + ); + if (input == null) { + return null; + } + + final RexNode percentileArg = inputAccessor.getField(aggregateCall.getArgList().get(1)); + + // Check if percentile argument is an array or a single value + if (percentileArg.isA(SqlKind.ARRAY_VALUE_CONSTRUCTOR)) { + return handleArrayPercentiles( + virtualColumnRegistry, + name, + input, + percentileArg, + existingAggregations + ); + } else if (percentileArg.isA(SqlKind.LITERAL)) { + return handleSinglePercentile( + virtualColumnRegistry, + name, + input, + percentileArg, + existingAggregations + ); + } + + // Cannot handle non-literal percentile arguments + return null; + } + + private Aggregation handleSinglePercentile( + final VirtualColumnRegistry virtualColumnRegistry, + final String name, + final DruidExpression input, + final RexNode percentileArg, + final List<Aggregation> existingAggregations + ) + { + final double percentile = ((Number) RexLiteral.value(percentileArg)).doubleValue(); + + final String histogramName = StringUtils.format("%s:agg", name); + + // Look for existing matching aggregatorFactory + final SpectatorHistogramAggregatorFactory existingFactory = + SpectatorHistogramSqlUtils.findMatchingAggregatorFactory( + virtualColumnRegistry, + input, + existingAggregations + ); + + if (existingFactory != null) { + return Aggregation.create( + ImmutableList.of(), + new SpectatorHistogramPercentilePostAggregator( + name, + new FieldAccessPostAggregator( + existingFactory.getName(), + existingFactory.getName() + ), + percentile + ) + ); + } + + // No existing match found. Create a new one. + final SpectatorHistogramAggregatorFactory aggregatorFactory = + SpectatorHistogramSqlUtils.createAggregatorFactory( + virtualColumnRegistry, + input, + histogramName + ); + + return Aggregation.create( + ImmutableList.of(aggregatorFactory), + new SpectatorHistogramPercentilePostAggregator( + name, + new FieldAccessPostAggregator(histogramName, histogramName), + percentile + ) + ); + } + + @Nullable + private Aggregation handleArrayPercentiles( + final VirtualColumnRegistry virtualColumnRegistry, + final String name, + final DruidExpression input, + final RexNode percentileArg, + final List<Aggregation> existingAggregations + ) + { + // Extract array elements + final List<RexNode> arrayElements = ((RexCall) percentileArg).getOperands(); + final double[] percentiles = new double[arrayElements.size()]; + + for (int i = 0; i < arrayElements.size(); i++) { + RexNode element = arrayElements.get(i); + if (!element.isA(SqlKind.LITERAL)) { + return null; // All array elements must be literals + } + percentiles[i] = ((Number) RexLiteral.value(element)).doubleValue(); + } + + final String histogramName = StringUtils.format("%s:agg", name); + + // Look for existing matching aggregatorFactory + final SpectatorHistogramAggregatorFactory existingFactory = + SpectatorHistogramSqlUtils.findMatchingAggregatorFactory( + virtualColumnRegistry, + input, + existingAggregations + ); + + if (existingFactory != null) { + return Aggregation.create( + ImmutableList.of(), + new SpectatorHistogramPercentilesPostAggregator( + name, + new FieldAccessPostAggregator( + existingFactory.getName(), + existingFactory.getName() + ), + percentiles + ) + ); + } + + // No existing match found. Create a new one. + final SpectatorHistogramAggregatorFactory aggregatorFactory = + SpectatorHistogramSqlUtils.createAggregatorFactory( + virtualColumnRegistry, + input, + histogramName + ); + + return Aggregation.create( + ImmutableList.of(aggregatorFactory), + new SpectatorHistogramPercentilesPostAggregator( + name, + new FieldAccessPostAggregator(histogramName, histogramName), + percentiles + ) + ); + } + + /** + * Return type inference that returns DOUBLE for single percentile value + * and DOUBLE ARRAY when an array of percentiles is provided. + */ + static class SpectatorHistogramPercentileReturnTypeInference implements SqlReturnTypeInference + { + @Override + public RelDataType inferReturnType(SqlOperatorBinding sqlOperatorBinding) + { + RelDataType secondArgType = sqlOperatorBinding.getOperandType(1); + if (secondArgType.getSqlTypeName() == SqlTypeName.ARRAY) { + // Return DOUBLE ARRAY when input is an array of percentiles + return sqlOperatorBinding.getTypeFactory().createArrayType( + sqlOperatorBinding.getTypeFactory().createSqlType(SqlTypeName.DOUBLE), + -1 + ); + } + // Return DOUBLE for single percentile value + return sqlOperatorBinding.getTypeFactory().createSqlType(SqlTypeName.DOUBLE); Review Comment: maybe needs a guard here that the input really is a double? ########## extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/sql/SpectatorHistogramPercentileSqlAggregator.java: ########## @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram.sql; + +import com.google.common.collect.ImmutableList; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperatorBinding; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.Optionality; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator; +import org.apache.druid.spectator.histogram.SpectatorHistogramAggregatorFactory; +import org.apache.druid.spectator.histogram.SpectatorHistogramPercentilePostAggregator; +import org.apache.druid.spectator.histogram.SpectatorHistogramPercentilesPostAggregator; +import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.Aggregations; +import org.apache.druid.sql.calcite.aggregation.SqlAggregator; +import org.apache.druid.sql.calcite.expression.DruidExpression; +import org.apache.druid.sql.calcite.planner.PlannerContext; +import org.apache.druid.sql.calcite.rel.InputAccessor; +import org.apache.druid.sql.calcite.rel.VirtualColumnRegistry; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * SQL aggregator for computing percentiles from SpectatorHistograms. + * <p> + * Supports two forms: + * - SPECTATOR_PERCENTILE(column, percentile) -> DOUBLE (single percentile) + * - SPECTATOR_PERCENTILE(column, ARRAY[p1, p2, ...]) -> DOUBLE ARRAY (multiple percentiles) + * <p> + * Percentile values should be in the range [0, 100] (e.g., 95 for p95). + */ +public class SpectatorHistogramPercentileSqlAggregator implements SqlAggregator +{ + private static final SqlAggFunction FUNCTION_INSTANCE = new SpectatorHistogramPercentileSqlAggFunction(); + private static final String NAME = "SPECTATOR_PERCENTILE"; + + @Override + public SqlAggFunction calciteFunction() + { + return FUNCTION_INSTANCE; + } + + @Nullable + @Override + public Aggregation toDruidAggregation( + final PlannerContext plannerContext, + final VirtualColumnRegistry virtualColumnRegistry, + final String name, + final AggregateCall aggregateCall, + final InputAccessor inputAccessor, + final List<Aggregation> existingAggregations, + final boolean finalizeAggregations + ) + { + final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator( + plannerContext, + inputAccessor.getInputRowSignature(), + inputAccessor.getField(aggregateCall.getArgList().get(0)) + ); + if (input == null) { + return null; + } + + final RexNode percentileArg = inputAccessor.getField(aggregateCall.getArgList().get(1)); + + // Check if percentile argument is an array or a single value + if (percentileArg.isA(SqlKind.ARRAY_VALUE_CONSTRUCTOR)) { + return handleArrayPercentiles( + virtualColumnRegistry, + name, + input, + percentileArg, + existingAggregations + ); + } else if (percentileArg.isA(SqlKind.LITERAL)) { + return handleSinglePercentile( + virtualColumnRegistry, + name, + input, + percentileArg, + existingAggregations + ); + } + + // Cannot handle non-literal percentile arguments + return null; + } + + private Aggregation handleSinglePercentile( + final VirtualColumnRegistry virtualColumnRegistry, + final String name, + final DruidExpression input, + final RexNode percentileArg, + final List<Aggregation> existingAggregations + ) + { + final double percentile = ((Number) RexLiteral.value(percentileArg)).doubleValue(); Review Comment: Generally, not sure what happens if this is not a numeric literal (eg string literal). I assume worst case that the query parse fails. I see `RexLiteral` has `numberValue()` and `valueMatchesType` that may be helpful. (similar for array) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
