Author: tdunning
Date: Wed Aug 18 17:26:42 2010
New Revision: 986799

URL: http://svn.apache.org/viewvc?rev=986799&view=rev
Log:
Added recorded-step evolutionary optimization to be used in on-line SGD 
modeling.

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/
    
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
    mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html
    mahout/trunk/core/src/test/java/org/apache/mahout/ep/
    
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=986799&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java 
(added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java 
Wed Aug 18 17:26:42 2010
@@ -0,0 +1,8 @@
+package org.apache.mahout.ep;
+
+/**
+ * Created by IntelliJ IDEA. User: tdunning Date: Aug 17, 2010 Time: 12:04:41 
PM To change this
+ * template use File | Settings | File Templates.
+ */
+public class EvolutionaryProcess {
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java Wed Aug 
18 17:26:42 2010
@@ -0,0 +1,95 @@
+package org.apache.mahout.ep;
+
+import org.apache.mahout.math.function.UnaryFunction;
+
+/**
+ * Provides coordinate tranformations so that evolution can proceed on the 
entire space of
+ * reals but have the output limited and squished in convenient ways.
+ */
+public abstract class Mapping implements UnaryFunction {
+  /**
+   * Maps input to the open interval (min, max) with 0 going to the mean of 
min and
+   * max.  When scale is large, a larger proportion of values are mapped to 
points
+   * near the boundaries.  When scale is small, a larger proportion of values 
are mapped to
+   * points well within the boundaries.
+   * @param min The largest lower bound on values to be returned.
+   * @param max The least upper bound on values to be returned.
+   * @param scale  Defines how sharp the boundaries are.
+   * @return A mapping that satisfies the desired constraint.
+   */
+  public static Mapping softLimit(final double min, final double max, final 
double scale) {
+    return new Mapping() {
+      @Override
+      public double apply(double v) {
+        return min + (max - min) * 1 / (1 + Math.exp(-v * scale));
+      }
+    };
+  }
+
+  /**
+   * Maps input to the open interval (min, max) with 0 going to the mean of 
min and
+   * max.  When scale is large, a larger proportion of values are mapped to 
points
+   * near the boundaries.
+   * @see #softLimit(double, double, double)
+   * @param min The largest lower bound on values to be returned.
+   * @param max The least upper bound on values to be returned.
+   * @return A mapping that satisfies the desired constraint.
+   */
+  public static Mapping softLimit(double min, double max) {
+    return softLimit(min, max, 1);
+  }
+
+  /**
+   * Maps input to positive values in the open interval (min, max) with
+   * 0 going to the geometric mean.  Near the geometric mean, values are
+   * distributed roughly geometrically.
+   * @param low   The largest lower bound for output results.  Must be >0.
+   * @param high  The least upper bound for output results.  Must be >0.
+   * @return A mapped value.
+   */
+  public static Mapping logLimit(final double low, final double high) {
+    return new Mapping() {
+      Mapping wrapped = softLimit(Math.log(low), Math.log(high));
+
+      @Override
+      public double apply(double v) {
+        return Math.exp(wrapped.apply(v));
+      }
+    };
+  }
+
+  /**
+   * Maps results to positive values.
+   * @return A positive value.
+   */
+  public static Mapping exponential() {
+    return exponential(1);
+  }
+
+  /**
+   * Maps results to positive values.
+   * @param scale  If large, then large values are more likely.
+   * @return A positive value.
+   */
+  public static Mapping exponential(final double scale) {
+    return new Mapping() {
+      @Override
+      public double apply(double v) {
+        return Math.exp(v * scale);
+      }
+    };
+  }
+
+  /**
+   * Maps results to themselves.
+   * @return The original value.
+   */
+  public static Mapping identity() {
+    return new Mapping() {
+      @Override
+      public double apply(double v) {
+        return v;
+      }
+    };
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java Wed Aug 18 
17:26:42 2010
@@ -0,0 +1,153 @@
+package org.apache.mahout.ep;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Recorded step evolutionary optimization.  You provide the value, this class 
provides the
+ * mutation.
+ */
+public class State implements Comparable<State> {
+  // object count is kept to break ties in comparison.
+  static volatile int objectCount = 0;
+  private Random gen = new Random();
+
+  int id = objectCount++;
+
+  // current state
+  private double[] params;
+
+  // mappers to transform state
+  private Mapping[] maps;
+
+  // omni-directional mutation
+  private double omni;
+
+  // directional mutation
+  private double[] step;
+
+  // current fitness value
+  private double value;
+
+  /**
+   * Invent a new state with no momentum (yet).
+   */
+  public State(double[] x0, double omni) {
+    params = Arrays.copyOf(x0, x0.length);
+    this.omni = omni;
+    step = new double[params.length];
+    maps = new Mapping[params.length];
+  }
+
+  /**
+   * Deep clones a state, useful in mutation.
+   *
+   * @param params Current state
+   * @param omni   Current omni-directional mutation
+   * @param step   The step taken to get to this point
+   */
+  private State(double[] params, double omni, double[] step, Mapping[] maps) {
+    this.params = Arrays.copyOf(params, params.length);
+    this.omni = omni;
+    this.step = Arrays.copyOf(step, step.length);
+    this.maps = Arrays.copyOf(maps, maps.length);
+  }
+
+  /**
+   * Clone this state with a random change in position.
+   *
+   * @return A new state.
+   */
+  public State mutate() {
+    double sum = 0;
+    for (double v : step) {
+      sum += v * v;
+    }
+    sum = Math.sqrt(sum);
+    double lambda = 0.9 + gen.nextGaussian();
+    State r = new State(params, omni, step, maps);
+    r.omni = -Math.log(1 - gen.nextDouble()) * (0.9 * omni + sum / 10);
+    for (int i = 0; i < step.length; i++) {
+      r.step[i] = lambda * step[i] + r.omni * gen.nextGaussian();
+      r.params[i] += r.step[i];
+    }
+    return r;
+  }
+
+  /**
+   * Defines the transformation for a parameter.
+   * @param i Which mapping to define.
+   * @param m The mapping to use.
+   */
+  public void setMap(int i, Mapping m) {
+    maps[i] = m;
+  }
+
+  /**
+   * Returns a transformed parameter.
+   * @param i  The parameter to return.
+   * @return The value of the parameter.
+   */
+  public double get(int i) {
+    Mapping m = maps[i];
+    if (m == null) {
+      return params[i];
+    } else {
+      return m.apply(params[i]);
+    }
+  }
+
+  /**
+   * Returns all the parameters in mapped form.
+   * @return An array of parameters.
+   */
+  public double[] getMappedParams() {
+    double[] r = Arrays.copyOf(params, params.length);
+    for (int i = 0; i < params.length; i++) {
+      r[i] = get(i);
+    }
+    return r;
+  }
+
+  public double[] getParams() {
+    return params;
+  }
+
+  public double getOmni() {
+    return omni;
+  }
+
+  public double[] getStep() {
+    return step;
+  }
+
+
+  /**
+   * Natural order is to sort in descending order of score.  Creation order is 
used as a
+   * tie-breaker.
+   *
+   * @param other The state to compare with.
+   * @return -1, 0, 1 if the other state is better, identical or worse than 
this one.
+   */
+  @Override
+  public int compareTo(State other) {
+    int r = Double.compare(other.value, this.value);
+    if (r != 0) {
+      return r;
+    } else {
+      return this.id - other.id;
+    }
+  }
+
+  public void setRand(Random rand) {
+    this.gen = rand;
+  }
+
+  public void setOmni(double omni) {
+    this.omni = omni;
+  }
+
+  public void setValue(double v) {
+    value = v;
+  }
+}

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java?rev=986799&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
 Wed Aug 18 17:26:42 2010
@@ -0,0 +1,139 @@
+package org.apache.mahout.ep;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.sun.xml.internal.bind.v2.util.QNameMap;
+import org.apache.mahout.math.function.UnaryFunction;
+
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Implements threaded evaluation of an objective function for evolutionary 
optimization.
+ */
+public class ThreadedEvolutionaryProcess {
+  private volatile int taskCount = 0;
+  private volatile int processCount = 0;
+
+  private volatile int maxTask = 0;
+
+  private Deque<State> pending = new LinkedList<State>();
+  private Set<Future<State>> working = Sets.newHashSet();
+  static PriorityQueue<State> resultPopulation = new PriorityQueue<State>();
+
+  private ExecutorService pool;
+  private ExecutorCompletionService<State> ecs;
+  private int threadCount;
+  private Map<Integer, Mapping> mappingTable = Maps.newHashMap();
+
+  public ThreadedEvolutionaryProcess(int threadCount) {
+    this.threadCount = threadCount;
+    pool = Executors.newFixedThreadPool(threadCount);
+    ecs = new ExecutorCompletionService<State>(pool);
+  }
+
+  public void setMap(int i, Mapping m) {
+    mappingTable.put(i, m);
+  }
+
+  public State optimize(Function f, int dim, long timeLimit, int parentDepth) 
throws InterruptedException, ExecutionException {
+    long t0 = System.currentTimeMillis();
+
+    // start with a few points near 0.  These will get transformed
+    State s0 = new State(new double[dim], 0.5);
+    for (Integer key : mappingTable.keySet()) {
+      s0.setMap(key, mappingTable.get(key));
+    }
+
+    pending.add(s0);
+    while (pending.size() < threadCount) {
+      pending.add(s0.mutate());
+    }
+
+    // then work until the clock runs out
+    do {
+      // launch new tasks until we fill the available slots
+      while (working.size() < threadCount && pending.size() > 0) {
+        State next = pending.removeFirst();
+        working.add(ecs.submit(new EvalTask(f, next)));
+        processCount++;
+      }
+
+      // wait for at least one result, then grab any additional results
+      Future<State> result = ecs.take();
+      while (result != null) {
+        State r = result.get();
+        resultPopulation.add(r);
+        working.remove(result);
+        result = ecs.poll();
+      }
+
+      // now spawn new pending tasks from the best in recent history
+      State[] parents = new State[parentDepth];
+      Iterator<State> j = resultPopulation.iterator();
+      for (int i = 0; i < parentDepth && j.hasNext(); i++) {
+        parents[i] = j.next();
+      }
+
+      int k = 0;
+      pending.clear();    // should already be empty (i like belt AND 
suspenders)
+      while (pending.size() + working.size() < threadCount) {
+        State tmp = parents[(k++) % parentDepth];
+        pending.add(tmp.mutate());
+      }
+    } while (System.currentTimeMillis() - t0 < timeLimit);
+
+    // wait for last results to dribble in
+    while (working.size() > 0) {
+      Future<State> result = ecs.take();
+      working.remove(result);
+      resultPopulation.add(result.get());
+    }
+    pool.shutdown();
+
+    // now do a final pass over the data to get scores
+    return resultPopulation.peek();
+  }
+
+  public class EvalTask implements Callable<State> {
+    private Function f;
+    private State what;
+
+    public EvalTask(Function f, State what) {
+      this.f = f;
+      this.what = what;
+    }
+
+    /**
+     * Computes a result, or throws an exception if unable to do so.
+     *
+     * @return computed result
+     * @throws Exception if unable to compute a result
+     */
+    @Override
+    public State call() throws Exception {
+      taskCount++;
+      maxTask = Math.max(taskCount, maxTask);
+      try {
+        what.setValue(f.apply(what.getMappedParams()));
+        return what;
+      } finally {
+        taskCount--;
+      }
+    }
+  }
+
+  public abstract static class Function {
+    public abstract double apply(double[] params);
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html Wed Aug 
18 17:26:42 2010
@@ -0,0 +1,24 @@
+<html>
+<body>
+Provides basic evolutionary optimization using <a 
href="http://arxiv.org/abs/0803.3838";>recorded-step</a> mutation.
+<p>
+With this style of optimization, we can optimize a function f: R^n -> R by 
stochastic hill-climbing with some of
+the benefits of conjugate gradient style history encoded in the mutation 
function.  This mutation function will
+adapt to allow weakly directed search rather than using the somewhat more 
conventional symmetric Gaussian.
+<p>
+With recorded-step mutation, the meta-mutation parameters are all auto-encoded 
in the current state of each point.
+This avoids the classic problem of having more mutation rate parameters than 
are in the original state and then
+requiring even more parameters to describe the meta-mutation rate.  Instead, 
we store the previous point and one
+omni-directional mutation component.  Mutation is performed by first mutating 
along the line formed by the previous
+and current points and then adding a scaled symmetric Gaussian.  The magnitude 
of the omni-directional mutation is
+then mutated using itself as a scale.
+<p>
+Because it is convenient to not restrict the parameter space, this package 
also provides convenient parameter mapping
+methods.  These mapping methods map the set of reals to a finite open interval 
(a, b) in such a way that
+lim_{x->-\inf} f(x) = a and lim_{x->\inf} f(x) = b.  The linear mapping is 
defined so that f(0) = (a+b)/2 and the
+exponential mapping requires that a and b are both positive and has f(0) = 
sqrt(ab).  The linear mapping is useful
+for values that must stay roughly within a range but which are roughly uniform 
within the center of that range.  The exponential
+mapping is useful for values that must stay within a range but whose 
distribution is roughly exponential near
+geometric mean of the end-points.  An identity mapping is also supplied.
+</body>
+</html>
\ No newline at end of file

Added: 
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java?rev=986799&view=auto
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
 (added)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
 Wed Aug 18 17:26:42 2010
@@ -0,0 +1,44 @@
+package org.apache.mahout.ep;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.concurrent.ExecutionException;
+
+public class ThreadedEvolutionaryProcessTest {
+  @Test
+  public void testOptimize() throws ExecutionException, InterruptedException {
+    ThreadedEvolutionaryProcess ep = new ThreadedEvolutionaryProcess(50);
+    State x = ep.optimize(new ThreadedEvolutionaryProcess.Function() {
+      /**
+       * Implements a skinny quadratic bowl.
+       */
+      @Override
+      public double apply(double[] params) {
+        Random rand = new Random();
+        double sum = 0;
+        int i = 0;
+        for (double x : params) {
+          x = (i + 1) * (x - i);
+          i++;
+          sum += x * x;
+        }
+        try {
+          // variable delays to emulate a tricky function
+          Thread.sleep((long) Math.floor(-2 * Math.log(1 - 
rand.nextDouble())));
+        } catch (InterruptedException e) {
+          // ignore interruptions
+        }
+
+        return -sum;
+      }
+    }, 5, 200, 2);
+
+    double[] r = x.getMappedParams();
+    int i = 0;
+    for (double v : r) {
+      Assert.assertEquals(i++, v, 1e-3);
+    }
+  }
+}


Reply via email to