Author: tdunning
Date: Thu Mar 24 23:39:32 2011
New Revision: 1085197
URL: http://svn.apache.org/viewvc?rev=1085197&view=rev
Log:
MAHOUT-634 - Time embedded moving averages and rates
Added:
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
Added:
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java?rev=1085197&view=auto
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
(added)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
Thu Mar 24 23:39:32 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.stats;
+
+/**
+ * Computes an online average that is exponentially weighted toward recent
time-embedded samples.
+ */
+public class OnlineExponentialAverage {
+ private double alpha;
+ private double lastT = 0;
+ private double s = 0;
+ private double w = 0;
+ private double T = 0;
+
+ /**
+ * Creates an averager that has a specified time constant for discounting
old data. The time
+ * constant, alpha, is the time at which an older sample is discounted to
1/e relative to current
+ * data. Roughly speaking, data that is more than 3*alpha old doesn't
matter any more and data
+ * that is more recent than alpha/3 is about as important as current data.
+ *
+ * See
http://tdunning.blogspot.com/2011/03/exponential-weighted-averages-with.html
for a
+ * derivation. See
http://tdunning.blogspot.com/2011/03/exponentially-weighted-averaging-for.html
+ * for the rate method.
+ *
+ * @param alpha The time constant for discounting old data and state.
+ */
+ public OnlineExponentialAverage(double alpha) {
+ this.alpha = alpha;
+ }
+
+ public void add(double t, double x) {
+ double pi = Math.exp(-(t - lastT) / alpha);
+ s = x + pi * s;
+ w = 1 + pi * w;
+ T = (t - lastT) + pi * T;
+ lastT = t;
+ }
+
+ public double mean() {
+ return s / w;
+ }
+
+ public double meanRate() {
+ return s / T;
+ }
+}
Added:
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java?rev=1085197&view=auto
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
(added)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
Thu Mar 24 23:39:32 2011
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.stats;
+
+import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.jet.random.Poisson;
+import org.junit.Test;
+
+import java.util.Random;
+
+public class OnlineExponentialAverageTest extends TestCase {
+ @Test
+ public void testAverage() {
+ double[] t = {11.35718, 21.54637, 28.91061, 33.03586, 39.57767};
+ double[] x = {1.5992071, -1.3577032, -0.3405638, 0.7048632, 0.3020558};
+ double[] m = {1.5992071, -1.0168100, -0.4797436, 0.2836447, 0.2966159};
+
+ OnlineExponentialAverage averager = new OnlineExponentialAverage(5);
+
+ for (int i = 0; i < t.length; i++) {
+ averager.add(t[i], x[i]);
+ assertEquals("Step " + i, m[i], averager.mean(), 1e-6);
+ }
+ }
+
+ @Test
+ public void testRate() {
+ Random gen = RandomUtils.getRandom();
+
+ Poisson p = new Poisson(5, gen);
+ double lastT = 0;
+
+ double[] k = new double[1000];
+ double[] t = new double[1000];
+ for (int i = 1; i < 1000; i++) {
+ // we sample every 5-15 seconds
+ double dt = gen.nextDouble() * 10 + 5;
+ t[i] = lastT + dt;
+
+ // at those points, we get a Poisson count
+ k[i] = p.nextInt(dt * .2);
+ lastT = t[i];
+ }
+
+ OnlineExponentialAverage averager = new OnlineExponentialAverage(2000);
+
+ for (int i = 1; i < 1000; i++) {
+ averager.add(t[i], k[i]);
+ }
+
+ assertEquals("Expected rate", .2, averager.meanRate(), 0.01);
+ }
+}