Author: dlyubimov
Date: Fri Apr 22 05:18:33 2011
New Revision: 1095866
URL: http://svn.apache.org/viewvc?rev=1095866&view=rev
Log:
MAHOUT-673:changing random projection to use uniform (-1;1]
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java?rev=1095866&r1=1095865&r2=1095866&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
Fri Apr 22 05:18:33 2011
@@ -18,7 +18,6 @@
package org.apache.mahout.math.hadoop.stochasticsvd;
import java.util.Arrays;
-import java.util.Random;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
@@ -29,9 +28,9 @@ import org.apache.mahout.math.Vector.Ele
*/
public class Omega {
+ private static final double UNIFORM_DIVISOR = Math.pow(2d, 64);
+
private final long seed;
- // TODO find way to make the test pass while using RandomUtils.getRandom()
- private final Random rnd = new Random();
private final int kp;
public Omega(long seed, int k, int p) {
@@ -40,10 +39,25 @@ public class Omega {
}
+ /**
+ * Get omega element at (x,y) uniformly distributed within [-1...1)
+ *
+ * @param row
+ * omega row
+ * @param column
+ * omega column
+ * @return
+ */
+ public double getQuick(int row, int column) {
+ long hash = murmur64(row << Integer.SIZE | column, 8, seed);
+ double result = hash / UNIFORM_DIVISOR;
+ assert result >= -1d && result < 1d;
+ return result;
+ }
+
public void accumDots(int aIndex, double aElement, double[] yRow) {
- rnd.setSeed(getOmegaRowSeed(aIndex, seed, rnd));
for (int i = 0; i < kp; i++) {
- yRow[i] += rnd.nextGaussian() * aElement;
+ yRow[i] += getQuick(aIndex, i) * aElement;
}
}
@@ -60,6 +74,7 @@ public class Omega {
Arrays.fill(yRow, 0);
if (!aRow.isDense()) {
+ int j = 0;
for (Element el : aRow) {
accumDots(el.index(), el.get(), yRow);
}
@@ -73,12 +88,40 @@ public class Omega {
}
- public long getOmegaRowSeed(int omegaRow, long omegaSeed, Random rnd) {
- rnd.setSeed(omegaSeed);
- long rowSeed = rnd.nextLong();
- rnd.setSeed(rowSeed ^ omegaRow);
- return rowSeed ^ rnd.nextLong();
+ /**
+ * Shortened version for data < 8 bytes packed into <code>len</code> lowest
+ * bytes of <code>val</code>.
+ * <P>
+ *
+ * @param val
+ * the value
+ * @param len
+ * the length of data packed into this many low bytes of
+ * <code>val</code>
+ * @param seed
+ * the seed to use
+ * @return murmur hash
+ */
+ public static long murmur64(long val, int len, long seed) {
+ assert len > 0 && len <= 8;
+ long m = 0xc6a4a7935bd1e995L;
+ int r = 47;
+ long h = seed ^ (len * m);
+
+ long k = val;
+
+ k *= m;
+ k ^= k >>> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+
+ h ^= h >>> r;
+ h *= m;
+ h ^= h >>> r;
+ return h;
}
public static long murmur64(byte[] val, int offset, int len, long seed) {