Author: tdunning
Date: Wed Aug 18 17:26:42 2010
New Revision: 986799
URL: http://svn.apache.org/viewvc?rev=986799&view=rev
Log:
Added recorded-step evolutionary optimization to be used in on-line SGD
modeling.
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/ep/
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html
mahout/trunk/core/src/test/java/org/apache/mahout/ep/
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=986799&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
Wed Aug 18 17:26:42 2010
@@ -0,0 +1,8 @@
+package org.apache.mahout.ep;
+
+/**
+ * Created by IntelliJ IDEA. User: tdunning Date: Aug 17, 2010 Time: 12:04:41
PM To change this
+ * template use File | Settings | File Templates.
+ */
+public class EvolutionaryProcess {
+}
Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/Mapping.java Wed Aug
18 17:26:42 2010
@@ -0,0 +1,95 @@
+package org.apache.mahout.ep;
+
+import org.apache.mahout.math.function.UnaryFunction;
+
+/**
+ * Provides coordinate tranformations so that evolution can proceed on the
entire space of
+ * reals but have the output limited and squished in convenient ways.
+ */
+public abstract class Mapping implements UnaryFunction {
+ /**
+ * Maps input to the open interval (min, max) with 0 going to the mean of
min and
+ * max. When scale is large, a larger proportion of values are mapped to
points
+ * near the boundaries. When scale is small, a larger proportion of values
are mapped to
+ * points well within the boundaries.
+ * @param min The largest lower bound on values to be returned.
+ * @param max The least upper bound on values to be returned.
+ * @param scale Defines how sharp the boundaries are.
+ * @return A mapping that satisfies the desired constraint.
+ */
+ public static Mapping softLimit(final double min, final double max, final
double scale) {
+ return new Mapping() {
+ @Override
+ public double apply(double v) {
+ return min + (max - min) * 1 / (1 + Math.exp(-v * scale));
+ }
+ };
+ }
+
+ /**
+ * Maps input to the open interval (min, max) with 0 going to the mean of
min and
+ * max. When scale is large, a larger proportion of values are mapped to
points
+ * near the boundaries.
+ * @see #softLimit(double, double, double)
+ * @param min The largest lower bound on values to be returned.
+ * @param max The least upper bound on values to be returned.
+ * @return A mapping that satisfies the desired constraint.
+ */
+ public static Mapping softLimit(double min, double max) {
+ return softLimit(min, max, 1);
+ }
+
+ /**
+ * Maps input to positive values in the open interval (min, max) with
+ * 0 going to the geometric mean. Near the geometric mean, values are
+ * distributed roughly geometrically.
+ * @param low The largest lower bound for output results. Must be >0.
+ * @param high The least upper bound for output results. Must be >0.
+ * @return A mapped value.
+ */
+ public static Mapping logLimit(final double low, final double high) {
+ return new Mapping() {
+ Mapping wrapped = softLimit(Math.log(low), Math.log(high));
+
+ @Override
+ public double apply(double v) {
+ return Math.exp(wrapped.apply(v));
+ }
+ };
+ }
+
+ /**
+ * Maps results to positive values.
+ * @return A positive value.
+ */
+ public static Mapping exponential() {
+ return exponential(1);
+ }
+
+ /**
+ * Maps results to positive values.
+ * @param scale If large, then large values are more likely.
+ * @return A positive value.
+ */
+ public static Mapping exponential(final double scale) {
+ return new Mapping() {
+ @Override
+ public double apply(double v) {
+ return Math.exp(v * scale);
+ }
+ };
+ }
+
+ /**
+ * Maps results to themselves.
+ * @return The original value.
+ */
+ public static Mapping identity() {
+ return new Mapping() {
+ @Override
+ public double apply(double v) {
+ return v;
+ }
+ };
+ }
+}
Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/State.java Wed Aug 18
17:26:42 2010
@@ -0,0 +1,153 @@
+package org.apache.mahout.ep;
+
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Recorded step evolutionary optimization. You provide the value, this class
provides the
+ * mutation.
+ */
+public class State implements Comparable<State> {
+ // object count is kept to break ties in comparison.
+ static volatile int objectCount = 0;
+ private Random gen = new Random();
+
+ int id = objectCount++;
+
+ // current state
+ private double[] params;
+
+ // mappers to transform state
+ private Mapping[] maps;
+
+ // omni-directional mutation
+ private double omni;
+
+ // directional mutation
+ private double[] step;
+
+ // current fitness value
+ private double value;
+
+ /**
+ * Invent a new state with no momentum (yet).
+ */
+ public State(double[] x0, double omni) {
+ params = Arrays.copyOf(x0, x0.length);
+ this.omni = omni;
+ step = new double[params.length];
+ maps = new Mapping[params.length];
+ }
+
+ /**
+ * Deep clones a state, useful in mutation.
+ *
+ * @param params Current state
+ * @param omni Current omni-directional mutation
+ * @param step The step taken to get to this point
+ */
+ private State(double[] params, double omni, double[] step, Mapping[] maps) {
+ this.params = Arrays.copyOf(params, params.length);
+ this.omni = omni;
+ this.step = Arrays.copyOf(step, step.length);
+ this.maps = Arrays.copyOf(maps, maps.length);
+ }
+
+ /**
+ * Clone this state with a random change in position.
+ *
+ * @return A new state.
+ */
+ public State mutate() {
+ double sum = 0;
+ for (double v : step) {
+ sum += v * v;
+ }
+ sum = Math.sqrt(sum);
+ double lambda = 0.9 + gen.nextGaussian();
+ State r = new State(params, omni, step, maps);
+ r.omni = -Math.log(1 - gen.nextDouble()) * (0.9 * omni + sum / 10);
+ for (int i = 0; i < step.length; i++) {
+ r.step[i] = lambda * step[i] + r.omni * gen.nextGaussian();
+ r.params[i] += r.step[i];
+ }
+ return r;
+ }
+
+ /**
+ * Defines the transformation for a parameter.
+ * @param i Which mapping to define.
+ * @param m The mapping to use.
+ */
+ public void setMap(int i, Mapping m) {
+ maps[i] = m;
+ }
+
+ /**
+ * Returns a transformed parameter.
+ * @param i The parameter to return.
+ * @return The value of the parameter.
+ */
+ public double get(int i) {
+ Mapping m = maps[i];
+ if (m == null) {
+ return params[i];
+ } else {
+ return m.apply(params[i]);
+ }
+ }
+
+ /**
+ * Returns all the parameters in mapped form.
+ * @return An array of parameters.
+ */
+ public double[] getMappedParams() {
+ double[] r = Arrays.copyOf(params, params.length);
+ for (int i = 0; i < params.length; i++) {
+ r[i] = get(i);
+ }
+ return r;
+ }
+
+ public double[] getParams() {
+ return params;
+ }
+
+ public double getOmni() {
+ return omni;
+ }
+
+ public double[] getStep() {
+ return step;
+ }
+
+
+ /**
+ * Natural order is to sort in descending order of score. Creation order is
used as a
+ * tie-breaker.
+ *
+ * @param other The state to compare with.
+ * @return -1, 0, 1 if the other state is better, identical or worse than
this one.
+ */
+ @Override
+ public int compareTo(State other) {
+ int r = Double.compare(other.value, this.value);
+ if (r != 0) {
+ return r;
+ } else {
+ return this.id - other.id;
+ }
+ }
+
+ public void setRand(Random rand) {
+ this.gen = rand;
+ }
+
+ public void setOmni(double omni) {
+ this.omni = omni;
+ }
+
+ public void setValue(double v) {
+ value = v;
+ }
+}
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java?rev=986799&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
Wed Aug 18 17:26:42 2010
@@ -0,0 +1,139 @@
+package org.apache.mahout.ep;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.sun.xml.internal.bind.v2.util.QNameMap;
+import org.apache.mahout.math.function.UnaryFunction;
+
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+/**
+ * Implements threaded evaluation of an objective function for evolutionary
optimization.
+ */
+public class ThreadedEvolutionaryProcess {
+ private volatile int taskCount = 0;
+ private volatile int processCount = 0;
+
+ private volatile int maxTask = 0;
+
+ private Deque<State> pending = new LinkedList<State>();
+ private Set<Future<State>> working = Sets.newHashSet();
+ static PriorityQueue<State> resultPopulation = new PriorityQueue<State>();
+
+ private ExecutorService pool;
+ private ExecutorCompletionService<State> ecs;
+ private int threadCount;
+ private Map<Integer, Mapping> mappingTable = Maps.newHashMap();
+
+ public ThreadedEvolutionaryProcess(int threadCount) {
+ this.threadCount = threadCount;
+ pool = Executors.newFixedThreadPool(threadCount);
+ ecs = new ExecutorCompletionService<State>(pool);
+ }
+
+ public void setMap(int i, Mapping m) {
+ mappingTable.put(i, m);
+ }
+
+ public State optimize(Function f, int dim, long timeLimit, int parentDepth)
throws InterruptedException, ExecutionException {
+ long t0 = System.currentTimeMillis();
+
+ // start with a few points near 0. These will get transformed
+ State s0 = new State(new double[dim], 0.5);
+ for (Integer key : mappingTable.keySet()) {
+ s0.setMap(key, mappingTable.get(key));
+ }
+
+ pending.add(s0);
+ while (pending.size() < threadCount) {
+ pending.add(s0.mutate());
+ }
+
+ // then work until the clock runs out
+ do {
+ // launch new tasks until we fill the available slots
+ while (working.size() < threadCount && pending.size() > 0) {
+ State next = pending.removeFirst();
+ working.add(ecs.submit(new EvalTask(f, next)));
+ processCount++;
+ }
+
+ // wait for at least one result, then grab any additional results
+ Future<State> result = ecs.take();
+ while (result != null) {
+ State r = result.get();
+ resultPopulation.add(r);
+ working.remove(result);
+ result = ecs.poll();
+ }
+
+ // now spawn new pending tasks from the best in recent history
+ State[] parents = new State[parentDepth];
+ Iterator<State> j = resultPopulation.iterator();
+ for (int i = 0; i < parentDepth && j.hasNext(); i++) {
+ parents[i] = j.next();
+ }
+
+ int k = 0;
+ pending.clear(); // should already be empty (i like belt AND
suspenders)
+ while (pending.size() + working.size() < threadCount) {
+ State tmp = parents[(k++) % parentDepth];
+ pending.add(tmp.mutate());
+ }
+ } while (System.currentTimeMillis() - t0 < timeLimit);
+
+ // wait for last results to dribble in
+ while (working.size() > 0) {
+ Future<State> result = ecs.take();
+ working.remove(result);
+ resultPopulation.add(result.get());
+ }
+ pool.shutdown();
+
+ // now do a final pass over the data to get scores
+ return resultPopulation.peek();
+ }
+
+ public class EvalTask implements Callable<State> {
+ private Function f;
+ private State what;
+
+ public EvalTask(Function f, State what) {
+ this.f = f;
+ this.what = what;
+ }
+
+ /**
+ * Computes a result, or throws an exception if unable to do so.
+ *
+ * @return computed result
+ * @throws Exception if unable to compute a result
+ */
+ @Override
+ public State call() throws Exception {
+ taskCount++;
+ maxTask = Math.max(taskCount, maxTask);
+ try {
+ what.setValue(f.apply(what.getMappedParams()));
+ return what;
+ } finally {
+ taskCount--;
+ }
+ }
+ }
+
+ public abstract static class Function {
+ public abstract double apply(double[] params);
+ }
+}
Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html?rev=986799&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/package.html Wed Aug
18 17:26:42 2010
@@ -0,0 +1,24 @@
+<html>
+<body>
+Provides basic evolutionary optimization using <a
href="http://arxiv.org/abs/0803.3838">recorded-step</a> mutation.
+<p>
+With this style of optimization, we can optimize a function f: R^n -> R by
stochastic hill-climbing with some of
+the benefits of conjugate gradient style history encoded in the mutation
function. This mutation function will
+adapt to allow weakly directed search rather than using the somewhat more
conventional symmetric Gaussian.
+<p>
+With recorded-step mutation, the meta-mutation parameters are all auto-encoded
in the current state of each point.
+This avoids the classic problem of having more mutation rate parameters than
are in the original state and then
+requiring even more parameters to describe the meta-mutation rate. Instead,
we store the previous point and one
+omni-directional mutation component. Mutation is performed by first mutating
along the line formed by the previous
+and current points and then adding a scaled symmetric Gaussian. The magnitude
of the omni-directional mutation is
+then mutated using itself as a scale.
+<p>
+Because it is convenient to not restrict the parameter space, this package
also provides convenient parameter mapping
+methods. These mapping methods map the set of reals to a finite open interval
(a, b) in such a way that
+lim_{x->-\inf} f(x) = a and lim_{x->\inf} f(x) = b. The linear mapping is
defined so that f(0) = (a+b)/2 and the
+exponential mapping requires that a and b are both positive and has f(0) =
sqrt(ab). The linear mapping is useful
+for values that must stay roughly within a range but which are roughly uniform
within the center of that range. The exponential
+mapping is useful for values that must stay within a range but whose
distribution is roughly exponential near
+geometric mean of the end-points. An identity mapping is also supplied.
+</body>
+</html>
\ No newline at end of file
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java?rev=986799&view=auto
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
(added)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/ep/ThreadedEvolutionaryProcessTest.java
Wed Aug 18 17:26:42 2010
@@ -0,0 +1,44 @@
+package org.apache.mahout.ep;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.concurrent.ExecutionException;
+
+public class ThreadedEvolutionaryProcessTest {
+ @Test
+ public void testOptimize() throws ExecutionException, InterruptedException {
+ ThreadedEvolutionaryProcess ep = new ThreadedEvolutionaryProcess(50);
+ State x = ep.optimize(new ThreadedEvolutionaryProcess.Function() {
+ /**
+ * Implements a skinny quadratic bowl.
+ */
+ @Override
+ public double apply(double[] params) {
+ Random rand = new Random();
+ double sum = 0;
+ int i = 0;
+ for (double x : params) {
+ x = (i + 1) * (x - i);
+ i++;
+ sum += x * x;
+ }
+ try {
+ // variable delays to emulate a tricky function
+ Thread.sleep((long) Math.floor(-2 * Math.log(1 -
rand.nextDouble())));
+ } catch (InterruptedException e) {
+ // ignore interruptions
+ }
+
+ return -sum;
+ }
+ }, 5, 200, 2);
+
+ double[] r = x.getMappedParams();
+ int i = 0;
+ for (double v : r) {
+ Assert.assertEquals(i++, v, 1e-3);
+ }
+ }
+}