[GitHub] [flink] zentol commented on a change in pull request #8887: [FLINK-12982][metrics] improve DescriptiveStatisticsHistogramStatistics performance

2019-08-15 Thread GitBox
zentol commented on a change in pull request #8887: [FLINK-12982][metrics] 
improve DescriptiveStatisticsHistogramStatistics performance
URL: https://github.com/apache/flink/pull/8887#discussion_r314324982
 
 

 ##
 File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java
 ##
 @@ -19,53 +19,174 @@
 
 import org.apache.flink.metrics.HistogramStatistics;
 
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
 
 import java.util.Arrays;
 
 /**
  * DescriptiveStatistics histogram statistics implementation returned by 
{@link DescriptiveStatisticsHistogram}.
- * The statistics class wraps a {@link DescriptiveStatistics} instance and 
forwards the method calls accordingly.
+ *
+ * The statistics takes a point-in-time snapshot of a {@link 
DescriptiveStatistics} instance and
+ * allows optimised metrics retrieval from this.
  */
 public class DescriptiveStatisticsHistogramStatistics extends 
HistogramStatistics {
-   private final DescriptiveStatistics descriptiveStatistics;
+   private final CommonMetricsSnapshot statisticsSummary = new 
CommonMetricsSnapshot();
 
public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics 
latencyHistogram) {
-   this.descriptiveStatistics = latencyHistogram;
+   latencyHistogram.apply(statisticsSummary);
}
 
@Override
public double getQuantile(double quantile) {
-   return descriptiveStatistics.getPercentile(quantile * 100);
+   return statisticsSummary.getPercentile(quantile * 100);
}
 
@Override
public long[] getValues() {
-   return 
Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long) 
i).toArray();
+   return Arrays.stream(statisticsSummary.getValues()).mapToLong(i 
-> (long) i).toArray();
}
 
@Override
public int size() {
-   return (int) descriptiveStatistics.getN();
+   return (int) statisticsSummary.getCount();
}
 
@Override
public double getMean() {
-   return descriptiveStatistics.getMean();
+   return statisticsSummary.getMean();
}
 
@Override
public double getStdDev() {
-   return descriptiveStatistics.getStandardDeviation();
+   return statisticsSummary.getStandardDeviation();
}
 
@Override
public long getMax() {
-   return (long) descriptiveStatistics.getMax();
+   return (long) statisticsSummary.getMax();
}
 
@Override
public long getMin() {
-   return (long) descriptiveStatistics.getMin();
+   return (long) statisticsSummary.getMin();
+   }
+
+   /**
+* Function to extract several commonly used metrics in an optimised 
way, i.e. with as few runs
+* over the data / calculations as possible.
+*
+* Note that calls to {@link #evaluate(double[])} or {@link 
#evaluate(double[], int, int)}
+* will not return a value but instead populate this class so that 
further values can be
+* retrieved from it.
+*/
+   private static class CommonMetricsSnapshot implements 
UnivariateStatistic {
+   long count = 0;
+   double min = Double.NaN;
+   double max = Double.NaN;
+   double mean = Double.NaN;
+   double stddev = Double.NaN;
+   private Percentile percentilesImpl = new 
Percentile().withNaNStrategy(NaNStrategy.FIXED);
 
 Review comment:
   can be final


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [flink] zentol commented on a change in pull request #8887: [FLINK-12982][metrics] improve DescriptiveStatisticsHistogramStatistics performance

2019-08-15 Thread GitBox
zentol commented on a change in pull request #8887: [FLINK-12982][metrics] 
improve DescriptiveStatisticsHistogramStatistics performance
URL: https://github.com/apache/flink/pull/8887#discussion_r314325761
 
 

 ##
 File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java
 ##
 @@ -19,53 +19,174 @@
 
 import org.apache.flink.metrics.HistogramStatistics;
 
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
 
 import java.util.Arrays;
 
 /**
  * DescriptiveStatistics histogram statistics implementation returned by 
{@link DescriptiveStatisticsHistogram}.
- * The statistics class wraps a {@link DescriptiveStatistics} instance and 
forwards the method calls accordingly.
+ *
+ * The statistics takes a point-in-time snapshot of a {@link 
DescriptiveStatistics} instance and
+ * allows optimised metrics retrieval from this.
  */
 public class DescriptiveStatisticsHistogramStatistics extends 
HistogramStatistics {
-   private final DescriptiveStatistics descriptiveStatistics;
+   private final CommonMetricsSnapshot statisticsSummary = new 
CommonMetricsSnapshot();
 
public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics 
latencyHistogram) {
-   this.descriptiveStatistics = latencyHistogram;
+   latencyHistogram.apply(statisticsSummary);
}
 
@Override
public double getQuantile(double quantile) {
-   return descriptiveStatistics.getPercentile(quantile * 100);
+   return statisticsSummary.getPercentile(quantile * 100);
}
 
@Override
public long[] getValues() {
-   return 
Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long) 
i).toArray();
+   return Arrays.stream(statisticsSummary.getValues()).mapToLong(i 
-> (long) i).toArray();
}
 
@Override
public int size() {
-   return (int) descriptiveStatistics.getN();
+   return (int) statisticsSummary.getCount();
}
 
@Override
public double getMean() {
-   return descriptiveStatistics.getMean();
+   return statisticsSummary.getMean();
}
 
@Override
public double getStdDev() {
-   return descriptiveStatistics.getStandardDeviation();
+   return statisticsSummary.getStandardDeviation();
}
 
@Override
public long getMax() {
-   return (long) descriptiveStatistics.getMax();
+   return (long) statisticsSummary.getMax();
}
 
@Override
public long getMin() {
-   return (long) descriptiveStatistics.getMin();
+   return (long) statisticsSummary.getMin();
+   }
+
+   /**
+* Function to extract several commonly used metrics in an optimised 
way, i.e. with as few runs
+* over the data / calculations as possible.
+*
+* Note that calls to {@link #evaluate(double[])} or {@link 
#evaluate(double[], int, int)}
+* will not return a value but instead populate this class so that 
further values can be
+* retrieved from it.
+*/
+   private static class CommonMetricsSnapshot implements 
UnivariateStatistic {
+   long count = 0;
+   double min = Double.NaN;
+   double max = Double.NaN;
+   double mean = Double.NaN;
+   double stddev = Double.NaN;
+   private Percentile percentilesImpl = new 
Percentile().withNaNStrategy(NaNStrategy.FIXED);
+
+   @Override
+   public double evaluate(final double[] values) throws 
MathIllegalArgumentException {
+   return evaluate(values, 0, values.length);
+   }
+
+   @Override
+   public double evaluate(double[] values, int begin, int length)
+   throws MathIllegalArgumentException {
+   this.count = length;
+   percentilesImpl.setData(values, begin, length);
+
+   SimpleStats secondMoment = new SimpleStats();
+   secondMoment.evaluate(values, begin, length);
+   this.mean = secondMoment.getMean();
+   this.min = secondMoment.getMin();
+   this.max = secondMoment.getMax();
+
+   this.stddev = new 
StandardDeviation(secondMoment).getResult();
+
+  

[GitHub] [flink] zentol commented on a change in pull request #8887: [FLINK-12982][metrics] improve DescriptiveStatisticsHistogramStatistics performance

2019-08-15 Thread GitBox
zentol commented on a change in pull request #8887: [FLINK-12982][metrics] 
improve DescriptiveStatisticsHistogramStatistics performance
URL: https://github.com/apache/flink/pull/8887#discussion_r314324918
 
 

 ##
 File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java
 ##
 @@ -19,53 +19,174 @@
 
 import org.apache.flink.metrics.HistogramStatistics;
 
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
 
 import java.util.Arrays;
 
 /**
  * DescriptiveStatistics histogram statistics implementation returned by 
{@link DescriptiveStatisticsHistogram}.
- * The statistics class wraps a {@link DescriptiveStatistics} instance and 
forwards the method calls accordingly.
+ *
+ * The statistics takes a point-in-time snapshot of a {@link 
DescriptiveStatistics} instance and
+ * allows optimised metrics retrieval from this.
  */
 public class DescriptiveStatisticsHistogramStatistics extends 
HistogramStatistics {
-   private final DescriptiveStatistics descriptiveStatistics;
+   private final CommonMetricsSnapshot statisticsSummary = new 
CommonMetricsSnapshot();
 
public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics 
latencyHistogram) {
-   this.descriptiveStatistics = latencyHistogram;
+   latencyHistogram.apply(statisticsSummary);
}
 
@Override
public double getQuantile(double quantile) {
-   return descriptiveStatistics.getPercentile(quantile * 100);
+   return statisticsSummary.getPercentile(quantile * 100);
}
 
@Override
public long[] getValues() {
-   return 
Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long) 
i).toArray();
+   return Arrays.stream(statisticsSummary.getValues()).mapToLong(i 
-> (long) i).toArray();
}
 
@Override
public int size() {
-   return (int) descriptiveStatistics.getN();
+   return (int) statisticsSummary.getCount();
}
 
@Override
public double getMean() {
-   return descriptiveStatistics.getMean();
+   return statisticsSummary.getMean();
}
 
@Override
public double getStdDev() {
-   return descriptiveStatistics.getStandardDeviation();
+   return statisticsSummary.getStandardDeviation();
}
 
@Override
public long getMax() {
-   return (long) descriptiveStatistics.getMax();
+   return (long) statisticsSummary.getMax();
}
 
@Override
public long getMin() {
-   return (long) descriptiveStatistics.getMin();
+   return (long) statisticsSummary.getMin();
+   }
+
+   /**
+* Function to extract several commonly used metrics in an optimised 
way, i.e. with as few runs
+* over the data / calculations as possible.
+*
+* Note that calls to {@link #evaluate(double[])} or {@link 
#evaluate(double[], int, int)}
+* will not return a value but instead populate this class so that 
further values can be
+* retrieved from it.
+*/
+   private static class CommonMetricsSnapshot implements 
UnivariateStatistic {
+   long count = 0;
 
 Review comment:
   these should all be private since we access them through a getter anyway


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [flink] zentol commented on a change in pull request #8887: [FLINK-12982][metrics] improve DescriptiveStatisticsHistogramStatistics performance

2019-08-15 Thread GitBox
zentol commented on a change in pull request #8887: [FLINK-12982][metrics] 
improve DescriptiveStatisticsHistogramStatistics performance
URL: https://github.com/apache/flink/pull/8887#discussion_r314327170
 
 

 ##
 File path: 
flink-runtime/src/main/java/org/apache/flink/runtime/metrics/DescriptiveStatisticsHistogramStatistics.java
 ##
 @@ -19,53 +19,174 @@
 
 import org.apache.flink.metrics.HistogramStatistics;
 
+import org.apache.commons.math3.exception.MathIllegalArgumentException;
 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
+import org.apache.commons.math3.stat.descriptive.UnivariateStatistic;
+import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
+import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.apache.commons.math3.stat.ranking.NaNStrategy;
 
 import java.util.Arrays;
 
 /**
  * DescriptiveStatistics histogram statistics implementation returned by 
{@link DescriptiveStatisticsHistogram}.
- * The statistics class wraps a {@link DescriptiveStatistics} instance and 
forwards the method calls accordingly.
+ *
+ * The statistics takes a point-in-time snapshot of a {@link 
DescriptiveStatistics} instance and
+ * allows optimised metrics retrieval from this.
  */
 public class DescriptiveStatisticsHistogramStatistics extends 
HistogramStatistics {
-   private final DescriptiveStatistics descriptiveStatistics;
+   private final CommonMetricsSnapshot statisticsSummary = new 
CommonMetricsSnapshot();
 
public DescriptiveStatisticsHistogramStatistics(DescriptiveStatistics 
latencyHistogram) {
-   this.descriptiveStatistics = latencyHistogram;
+   latencyHistogram.apply(statisticsSummary);
}
 
@Override
public double getQuantile(double quantile) {
-   return descriptiveStatistics.getPercentile(quantile * 100);
+   return statisticsSummary.getPercentile(quantile * 100);
}
 
@Override
public long[] getValues() {
-   return 
Arrays.stream(descriptiveStatistics.getValues()).mapToLong(i -> (long) 
i).toArray();
+   return Arrays.stream(statisticsSummary.getValues()).mapToLong(i 
-> (long) i).toArray();
}
 
@Override
public int size() {
-   return (int) descriptiveStatistics.getN();
+   return (int) statisticsSummary.getCount();
}
 
@Override
public double getMean() {
-   return descriptiveStatistics.getMean();
+   return statisticsSummary.getMean();
}
 
@Override
public double getStdDev() {
-   return descriptiveStatistics.getStandardDeviation();
+   return statisticsSummary.getStandardDeviation();
}
 
@Override
public long getMax() {
-   return (long) descriptiveStatistics.getMax();
+   return (long) statisticsSummary.getMax();
}
 
@Override
public long getMin() {
-   return (long) descriptiveStatistics.getMin();
+   return (long) statisticsSummary.getMin();
+   }
+
+   /**
+* Function to extract several commonly used metrics in an optimised 
way, i.e. with as few runs
+* over the data / calculations as possible.
+*
+* Note that calls to {@link #evaluate(double[])} or {@link 
#evaluate(double[], int, int)}
+* will not return a value but instead populate this class so that 
further values can be
+* retrieved from it.
+*/
+   private static class CommonMetricsSnapshot implements 
UnivariateStatistic {
+   long count = 0;
+   double min = Double.NaN;
+   double max = Double.NaN;
+   double mean = Double.NaN;
+   double stddev = Double.NaN;
+   private Percentile percentilesImpl = new 
Percentile().withNaNStrategy(NaNStrategy.FIXED);
+
+   @Override
+   public double evaluate(final double[] values) throws 
MathIllegalArgumentException {
+   return evaluate(values, 0, values.length);
+   }
+
+   @Override
+   public double evaluate(double[] values, int begin, int length)
+   throws MathIllegalArgumentException {
+   this.count = length;
+   percentilesImpl.setData(values, begin, length);
+
+   SimpleStats secondMoment = new SimpleStats();
+   secondMoment.evaluate(values, begin, length);
+   this.mean = secondMoment.getMean();
+   this.min = secondMoment.getMin();
+   this.max = secondMoment.getMax();
+
+   this.stddev = new 
StandardDeviation(secondMoment).getResult();
+
+