NicoK commented on a change in pull request #8887: [FLINK-12982][metrics]
improve DescriptiveStatisticsHistogramStatistics performance
URL: https://github.com/apache/flink/pull/8887#discussion_r314405857
##########
File path:
flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java
##########
@@ -19,53 +19,174 @@
import org.apache.flink.metrics.HistogramStatistics;
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
import java.util.Arrays;
/**
* DescriptiveStatistics histogram statistics implementation returned by
{@link DescriptiveStatisticsHistogram}.
- * The statistics class wraps a {@link DescriptiveStatistics} instance and
forwards the method calls accordingly.
+ *
+ * <p>The statistics takes a point-in-time snapshot of a {@link
DescriptiveStatistics} instance and
+ * allows optimised metrics retrieval from this.
*/
public class DescriptiveStatisticsHistogramStatistics extends
HistogramStatistics {
- private final DescriptiveStatistics descriptiveStatistics;
+ private final CommonMetricsSnapshot statisticsSummary = new
CommonMetricsSnapshot();
public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics
latencyHistogram) {
- this.descriptiveStatistics = latencyHistogram;
+ latencyHistogram.apply(statisticsSummary);
}
@Override
public double getQuantile(double quantile) {
- return descriptiveStatistics.getPercentile(quantile * 100);
+ return statisticsSummary.getPercentile(quantile * 100);
}
@Override
public long[] getValues() {
- return
Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long)
i).toArray();
+ return Arrays.stream(statisticsSummary.getValues()).mapToLong(i
-> (long) i).toArray();
}
@Override
public int size() {
- return (int) descriptiveStatistics.getN();
+ return (int) statisticsSummary.getCount();
}
@Override
public double getMean() {
- return descriptiveStatistics.getMean();
+ return statisticsSummary.getMean();
}
@Override
public double getStdDev() {
- return descriptiveStatistics.getStandardDeviation();
+ return statisticsSummary.getStandardDeviation();
}
@Override
public long getMax() {
- return (long) descriptiveStatistics.getMax();
+ return (long) statisticsSummary.getMax();
}
@Override
public long getMin() {
- return (long) descriptiveStatistics.getMin();
+ return (long) statisticsSummary.getMin();
+ }
+
+ /**
+ * Function to extract several commonly used metrics in an optimised
way, i.e. with as few runs
+ * over the data / calculations as possible.
+ *
+ * <p>Note that calls to {@link #evaluate(double[])} or {@link
#evaluate(double[], int, int)}
+ * will not return a value but instead populate this class so that
further values can be
+ * retrieved from it.
+ */
+ private static class CommonMetricsSnapshot implements
UnivariateStatistic {
+ long count = 0;
+ double min = Double.NaN;
+ double max = Double.NaN;
+ double mean = Double.NaN;
+ double stddev = Double.NaN;
+ private Percentile percentilesImpl = new
Percentile().withNaNStrategy(NaNStrategy.FIXED);
Review comment:
cannot be due to the `copy()` method
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services