NicoK commented on a change in pull request #8887: [FLINK-12982][metrics] improve DescriptiveStatisticsHistogramStatistics performance URL: https://github.com/apache/flink/pull/8887#discussion_r314405857
########## File path: flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java ########## @@ -19,53 +19,174 @@ import org.apache.flink.metrics.HistogramStatistics; +import org.apache.commons.math3.exception.MathIllegalArgumentException; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.commons.math3.stat.descriptive.UnivariateStatistic; +import org.apache.commons.math3.stat.descriptive.moment.SecondMoment; +import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation; +import org.apache.commons.math3.stat.descriptive.rank.Percentile; +import org.apache.commons.math3.stat.ranking.NaNStrategy; import java.util.Arrays; /** * DescriptiveStatistics histogram statistics implementation returned by {@link DescriptiveStatisticsHistogram}. - * The statistics class wraps a {@link DescriptiveStatistics} instance and forwards the method calls accordingly. + * + * <p>The statistics takes a point-in-time snapshot of a {@link DescriptiveStatistics} instance and + * allows optimised metrics retrieval from this. */ public class DescriptiveStatisticsHistogramStatistics extends HistogramStatistics { - private final DescriptiveStatistics descriptiveStatistics; + private final CommonMetricsSnapshot statisticsSummary = new CommonMetricsSnapshot(); public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics latencyHistogram) { - this.descriptiveStatistics = latencyHistogram; + latencyHistogram.apply(statisticsSummary); } @Override public double getQuantile(double quantile) { - return descriptiveStatistics.getPercentile(quantile * 100); + return statisticsSummary.getPercentile(quantile * 100); } @Override public long[] getValues() { - return Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long) i).toArray(); + return Arrays.stream(statisticsSummary.getValues()).mapToLong(i -> (long) i).toArray(); } @Override public int size() { - return (int) descriptiveStatistics.getN(); + return (int) statisticsSummary.getCount(); } @Override public double getMean() { - return descriptiveStatistics.getMean(); + return statisticsSummary.getMean(); } @Override public double getStdDev() { - return descriptiveStatistics.getStandardDeviation(); + return statisticsSummary.getStandardDeviation(); } @Override public long getMax() { - return (long) descriptiveStatistics.getMax(); + return (long) statisticsSummary.getMax(); } @Override public long getMin() { - return (long) descriptiveStatistics.getMin(); + return (long) statisticsSummary.getMin(); + } + + /** + * Function to extract several commonly used metrics in an optimised way, i.e. with as few runs + * over the data / calculations as possible. + * + * <p>Note that calls to {@link #evaluate(double[])} or {@link #evaluate(double[], int, int)} + * will not return a value but instead populate this class so that further values can be + * retrieved from it. + */ + private static class CommonMetricsSnapshot implements UnivariateStatistic { + long count = 0; + double min = Double.NaN; + double max = Double.NaN; + double mean = Double.NaN; + double stddev = Double.NaN; + private Percentile percentilesImpl = new Percentile().withNaNStrategy(NaNStrategy.FIXED); Review comment: cannot be due to the `copy()` method ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services