Author: tdunning
Date: Thu Mar 24 23:39:32 2011
New Revision: 1085197

URL: http://svn.apache.org/viewvc?rev=1085197&view=rev
Log:
MAHOUT-634 - Time embedded moving averages and rates

Added:
    
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
    
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java

Added: 
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java?rev=1085197&view=auto
==============================================================================
--- 
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
 (added)
+++ 
mahout/trunk/math/src/main/java/org/apache/mahout/math/stats/OnlineExponentialAverage.java
 Thu Mar 24 23:39:32 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.stats;
+
+/**
+ * Computes an online average that is exponentially weighted toward recent 
time-embedded samples.
+ */
+public class OnlineExponentialAverage {
+  private double alpha;
+  private double lastT = 0;
+  private double s = 0;
+  private double w = 0;
+  private double T = 0;
+
+  /**
+   * Creates an averager that has a specified time constant for discounting 
old data. The time
+   * constant, alpha, is the time at which an older sample is discounted to 
1/e relative to current
+   * data.  Roughly speaking, data that is more than 3*alpha old doesn't 
matter any more and data
+   * that is more recent than alpha/3 is about as important as current data.
+   *
+   * See 
http://tdunning.blogspot.com/2011/03/exponential-weighted-averages-with.html 
for a
+   * derivation.  See 
http://tdunning.blogspot.com/2011/03/exponentially-weighted-averaging-for.html
+   * for the rate method.
+   *
+   * @param alpha The time constant for discounting old data and state.
+   */
+  public OnlineExponentialAverage(double alpha) {
+    this.alpha = alpha;
+  }
+
+  public void add(double t, double x) {
+    double pi = Math.exp(-(t - lastT) / alpha);
+    s = x + pi * s;
+    w = 1 + pi * w;
+    T = (t - lastT) + pi * T;
+    lastT = t;
+  }
+
+  public double mean() {
+    return s / w;
+  }
+
+  public double meanRate() {
+    return s / T;
+  }
+}

Added: 
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java?rev=1085197&view=auto
==============================================================================
--- 
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
 (added)
+++ 
mahout/trunk/math/src/test/java/org/apache/mahout/math/stats/OnlineExponentialAverageTest.java
 Thu Mar 24 23:39:32 2011
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.stats;
+
+import junit.framework.TestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.jet.random.Poisson;
+import org.junit.Test;
+
+import java.util.Random;
+
+public class OnlineExponentialAverageTest extends TestCase {
+  @Test
+  public void testAverage() {
+    double[] t = {11.35718, 21.54637, 28.91061, 33.03586, 39.57767};
+    double[] x = {1.5992071, -1.3577032, -0.3405638, 0.7048632, 0.3020558};
+    double[] m = {1.5992071, -1.0168100, -0.4797436, 0.2836447, 0.2966159};
+
+    OnlineExponentialAverage averager = new OnlineExponentialAverage(5);
+
+    for (int i = 0; i < t.length; i++) {
+      averager.add(t[i], x[i]);
+      assertEquals("Step " + i, m[i], averager.mean(), 1e-6);
+    }
+  }
+
+  @Test
+  public void testRate() {
+    Random gen = RandomUtils.getRandom();
+
+    Poisson p = new Poisson(5, gen);
+    double lastT = 0;
+
+    double[] k = new double[1000];
+    double[] t = new double[1000];
+    for (int i = 1; i < 1000; i++) {
+      // we sample every 5-15 seconds
+      double dt = gen.nextDouble() * 10 + 5;
+      t[i] = lastT + dt;
+
+      // at those points, we get a Poisson count
+      k[i] = p.nextInt(dt * .2);
+      lastT = t[i];
+    }
+
+    OnlineExponentialAverage averager = new OnlineExponentialAverage(2000);
+
+    for (int i = 1; i < 1000; i++) {
+      averager.add(t[i], k[i]);
+    }
+
+    assertEquals("Expected rate", .2, averager.meanRate(), 0.01);
+  }
+}


Reply via email to