Author: jmannix
Date: Sat Apr 30 04:35:18 2011
New Revision: 1098041
URL: http://svn.apache.org/viewvc?rev=1098041&view=rev
Log:
Fixes MAHOUT-639
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
Sat Apr 30 04:35:18 2011
@@ -131,11 +131,12 @@ public class MatrixMultiplicationJob ext
Vector outFrag = firstIsOutFrag ? ((VectorWritable)v.get(0)).get() :
((VectorWritable)v.get(1)).get();
Vector multiplier = firstIsOutFrag ? ((VectorWritable)v.get(1)).get() :
((VectorWritable)v.get(0)).get();
+ VectorWritable outVector = new VectorWritable();
Iterator<Vector.Element> it = multiplier.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
row.set(e.index());
- VectorWritable outVector = new VectorWritable(outFrag.times(e.get()));
+ outVector.set(outFrag.times(e.get()));
out.collect(row, outVector);
}
}
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java
Sat Apr 30 04:35:18 2011
@@ -17,12 +17,12 @@
package org.apache.mahout.math;
-import java.util.Iterator;
-
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.function.DoubleFunction;
+import java.util.Iterator;
+
/** Implementations of generic capabilities like sum of elements and dot
products */
public abstract class AbstractVector implements Vector {
@@ -417,13 +417,15 @@ public abstract class AbstractVector imp
@Override
public Vector times(double x) {
+ if (x == 0.0) {
+ return like();
+ }
+
Vector result = like().assign(this);
if (x == 1.0) {
return result;
}
- if (x == 0.0) {
- return like();
- }
+
Iterator<Element> iter = result.iterateNonZero();
while (iter.hasNext()) {
Element element = iter.next();
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
Sat Apr 30 04:35:18 2011
@@ -38,7 +38,7 @@ final class OrderedIntDoubleMapping impl
numMappings = 0;
}
- private OrderedIntDoubleMapping(int[] indices, double[] values, int
numMappings) {
+ OrderedIntDoubleMapping(int[] indices, double[] values, int numMappings) {
this.indices = indices;
this.values = values;
this.numMappings = numMappings;
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
Sat Apr 30 04:35:18 2011
@@ -20,6 +20,7 @@ package org.apache.mahout.math;
import com.google.common.collect.AbstractIterator;
import org.apache.mahout.math.function.Functions;
+import java.util.Arrays;
import java.util.Iterator;
/**
@@ -63,11 +64,40 @@ public class SequentialAccessSparseVecto
public SequentialAccessSparseVector(Vector other) {
this(other.size(), other.getNumNondefaultElements());
+
+ if (!other.isSequentialAccess()) {
+ // If the incoming Vector to copy is random, then adding items
+ // from the Iterator can degrade performance dramatically if
+ // the number of elements is large as this Vector tries to stay
+ // in order as items are added, so it's better to sort the other
+ // Vector's elements by index and then add them to this
+ copySortedRandomAccessSparseVector(other);
+ } else {
+ Iterator<Element> it = other.iterateNonZero();
+ Element e;
+ while (it.hasNext() && (e = it.next()) != null) {
+ set(e.index(), e.get());
+ }
+ }
+ }
+
+ // Sorts a RandomAccessSparseVectors Elements before adding them to this
+ private int copySortedRandomAccessSparseVector(Vector other) {
+ int elementCount = other.getNumNondefaultElements();
+ OrderedElement[] sortableElements = new OrderedElement[elementCount];
Iterator<Element> it = other.iterateNonZero();
Element e;
+ int s=0;
while (it.hasNext() && (e = it.next()) != null) {
- set(e.index(), e.get());
+ sortableElements[s++] = new OrderedElement(e.index(), e.get());
+ }
+ Arrays.sort(sortableElements);
+ for (int i = 0; i < sortableElements.length; i++) {
+ values.getIndices()[i] = sortableElements[i].index;
+ values.getValues()[i] = sortableElements[i].value;
}
+ values = new OrderedIntDoubleMapping(values.getIndices(),
values.getValues(), elementCount);
+ return elementCount;
}
public SequentialAccessSparseVector(SequentialAccessSparseVector other,
boolean shallowCopy) {
@@ -188,7 +218,7 @@ public class SequentialAccessSparseVecto
if (this == x) {
return dotSelf();
}
-
+
if (x instanceof SequentialAccessSparseVector) {
// For sparse SeqAccVectors. do dot product without lookup in a linear
fashion
Iterator<Element> myIter = iterateNonZero();
@@ -220,7 +250,7 @@ public class SequentialAccessSparseVecto
}
return result;
} else { // seq.rand. seq.dense
- double result = 0.0;
+ double result = 0.0;
Iterator<Element> iter = iterateNonZero();
while (iter.hasNext()) {
Element element = iter.next();
@@ -305,7 +335,7 @@ public class SequentialAccessSparseVecto
@Override
public void set(double value) {
- lengthSquared = -1;
+ lengthSquared = -1;
values.getValues()[offset] = value;
}
}
@@ -341,7 +371,7 @@ public class SequentialAccessSparseVecto
@Override
public void set(double value) {
- lengthSquared = -1;
+ lengthSquared = -1;
if (index == values.getIndices()[nextOffset]) {
values.getValues()[nextOffset] = value;
} else {
@@ -350,5 +380,23 @@ public class SequentialAccessSparseVecto
}
}
}
+
+ // Comparable Element for sorting Elements by index
+ private static final class OrderedElement implements
Comparable<OrderedElement> {
+ private final int index;
+ private final double value;
+
+ OrderedElement(int index, double value) {
+ this.index = index;
+ this.value = value;
+ }
+
+ @Override
+ public int compareTo(final OrderedElement that) {
+ // both indexes are positive, and neither can be Integer.MAX_VALUE
(otherwise there would be
+ // an array somewhere with Integer.MAX_VALUE + 1 elements)
+ return this.index - that.index;
+ }
+ }
}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java?rev=1098041&r1=1098040&r2=1098041&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSequentialAccessSparseVector.java
Sat Apr 30 04:35:18 2011
@@ -39,6 +39,4 @@ public final class TestSequentialAccessS
assertEquals("dot2", -0.666666667, v.dot(w), EPSILON);
}
-
-
}
\ No newline at end of file