Repository: mahout Updated Branches: refs/heads/master 6fbf3663b -> 45d88031c
MAHOUT-1580 Optimize getNumNonZeroElements() closes apache/mahout#17 Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/45d88031 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/45d88031 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/45d88031 Branch: refs/heads/master Commit: 45d88031c8e782a624eaf6cefea9700600805647 Parents: 6fbf366 Author: ssc <[email protected]> Authored: Wed Jun 18 22:31:34 2014 +0200 Committer: ssc <[email protected]> Committed: Wed Jun 18 22:31:34 2014 +0200 ---------------------------------------------------------------------- CHANGELOG | 2 ++ .../org/apache/mahout/math/DenseVector.java | 11 +++++++ .../apache/mahout/math/PermutedVectorView.java | 13 +++++--- .../mahout/math/RandomAccessSparseVector.java | 14 +++++++++ .../math/SequentialAccessSparseVector.java | 13 ++++++++ .../java/org/apache/mahout/math/VectorTest.java | 32 ++++++++++++++++++++ 6 files changed, 80 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 1d5bd3d..47aacf6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ Mahout Change Log Release 1.0 - unreleased + MAHOUT-1580 Optimize getNumNonZeroElements() (ssc) + MAHOUT-1464: Cooccurrence Analysis on Spark (pat) MAHOUT-1578: Optimizations in matrix serialization (ssc) http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/DenseVector.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/DenseVector.java b/math/src/main/java/org/apache/mahout/math/DenseVector.java index 3c3d6b4..5b3dea7 100644 --- a/math/src/main/java/org/apache/mahout/math/DenseVector.java +++ b/math/src/main/java/org/apache/mahout/math/DenseVector.java @@ -159,6 +159,17 @@ public class DenseVector extends AbstractVector { return values.length; } + @Override + public int getNumNonZeroElements() { + int numNonZeros = 0; + for (int index = 0; index < values.length; index++) { + if (values[index] != 0) { + numNonZeros++; + } + } + return numNonZeros; + } + public Vector assign(DenseVector vector) { // make sure the data field has the correct length if (vector.values.length != this.values.length) { http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java b/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java index e2d2261..f34f2b0 100644 --- a/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java +++ b/math/src/main/java/org/apache/mahout/math/PermutedVectorView.java @@ -215,17 +215,20 @@ public class PermutedVectorView extends AbstractVector { vector.setQuick(pivot[index], value); } - /** - * Return the number of values in the recipient - * - * @return an int - */ + /** Return the number of values in the recipient */ @Override public int getNumNondefaultElements() { return vector.getNumNondefaultElements(); } @Override + public int getNumNonZeroElements() { + // Return the number of nonzeros in the recipient, + // so potentially don't have to go through our iterator + return vector.getNumNonZeroElements(); + } + + @Override public double getLookupCost() { return vector.getLookupCost(); } http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java b/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java index 70523ff..dbe5d3a 100644 --- a/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java +++ b/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java @@ -20,6 +20,7 @@ package org.apache.mahout.math; import java.util.Iterator; import java.util.NoSuchElementException; +import org.apache.mahout.math.list.DoubleArrayList; import org.apache.mahout.math.map.OpenIntDoubleHashMap; import org.apache.mahout.math.map.OpenIntDoubleHashMap.MapElement; import org.apache.mahout.math.set.AbstractSet; @@ -146,6 +147,19 @@ public class RandomAccessSparseVector extends AbstractVector { } @Override + public int getNumNonZeroElements() { + DoubleArrayList elementValues = values.values(); + int numMappedElements = elementValues.size(); + int numNonZeros = 0; + for (int index = 0; index < numMappedElements; index++) { + if (elementValues.getQuick(index) != 0) { + numNonZeros++; + } + } + return numNonZeros; + } + + @Override public double getLookupCost() { return 1; } http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java b/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java index f3067ef..331662c 100644 --- a/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java +++ b/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java @@ -185,6 +185,19 @@ public class SequentialAccessSparseVector extends AbstractVector { } @Override + public int getNumNonZeroElements() { + double[] elementValues = values.getValues(); + int numMappedElements = values.getNumMappings(); + int numNonZeros = 0; + for (int index = 0; index < numMappedElements; index++) { + if (elementValues[index] != 0) { + numNonZeros++; + } + } + return numNonZeros; + } + + @Override public double getLookupCost() { return Math.max(1, Math.round(Functions.LOG2.apply(getNumNondefaultElements()))); } http://git-wip-us.apache.org/repos/asf/mahout/blob/45d88031/math/src/test/java/org/apache/mahout/math/VectorTest.java ---------------------------------------------------------------------- diff --git a/math/src/test/java/org/apache/mahout/math/VectorTest.java b/math/src/test/java/org/apache/mahout/math/VectorTest.java index d15e7d9..67dc1e9 100644 --- a/math/src/test/java/org/apache/mahout/math/VectorTest.java +++ b/math/src/test/java/org/apache/mahout/math/VectorTest.java @@ -1021,6 +1021,38 @@ public final class VectorTest extends MahoutTestCase { } } + @Test + public void testNumNonZerosDense() { + DenseVector vector = new DenseVector(10); + vector.assign(1); + vector.setQuick(3, 0); + vector.set(5, 0); + + assertEquals(8, vector.getNumNonZeroElements()); + } + + @Test + public void testNumNonZerosRandomAccessSparse() { + RandomAccessSparseVector vector = new RandomAccessSparseVector(10); + vector.setQuick(3, 1); + vector.set(5, 1); + vector.setQuick(7, 0); + vector.set(9, 0); + + assertEquals(2, vector.getNumNonZeroElements()); + } + + @Test + public void testNumNonZerosSequentialAccessSparse() { + SequentialAccessSparseVector vector = new SequentialAccessSparseVector(10); + vector.setQuick(3, 1); + vector.set(5, 1); + vector.setQuick(7, 0); + vector.set(9, 0); + + assertEquals(2, vector.getNumNonZeroElements()); + } + // Test NonZeroIterator on an list with 1 elements private static void testSingleNonZeroIterator(Vector vector) { vector.set(1, 6);
