Author: dfilimon
Date: Fri May 3 16:26:50 2013
New Revision: 1478865
URL: http://svn.apache.org/r1478865
Log:
Part of MAHOUT-1202.
Extended OrderedIntDoubleMapping to support merging and zero values.
This is required for faster SequentialAccessSparseVector updates.
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestOrderedIntDoubleMapping.java
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java?rev=1478865&r1=1478864&r2=1478865&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java
Fri May 3 16:26:50 2013
@@ -19,7 +19,7 @@ package org.apache.mahout.math;
import java.io.Serializable;
-final class OrderedIntDoubleMapping implements Serializable, Cloneable {
+public final class OrderedIntDoubleMapping implements Serializable, Cloneable {
static final double DEFAULT_VALUE = 0.0;
@@ -27,6 +27,15 @@ final class OrderedIntDoubleMapping impl
private double[] values;
private int numMappings;
+ // If true, doesn't allow DEFAULT_VALUEs in the mapping (adding a zero
discards it). Otherwise, a DEFAULT_VALUE is
+ // treated like any other value.
+ private boolean noDefault = true;
+
+ OrderedIntDoubleMapping(boolean noDefault) {
+ this();
+ this.noDefault = noDefault;
+ }
+
OrderedIntDoubleMapping() {
// no-arg constructor for deserializer
this(11);
@@ -44,28 +53,28 @@ final class OrderedIntDoubleMapping impl
this.numMappings = numMappings;
}
- int[] getIndices() {
+ public int[] getIndices() {
return indices;
}
- int indexAt(int offset) {
+ public int indexAt(int offset) {
return indices[offset];
}
- void setIndexAt(int offset, int index) {
+ public void setIndexAt(int offset, int index) {
indices[offset] = index;
}
- double[] getValues() {
+ public double[] getValues() {
return values;
}
- void setValueAt(int offset, double value) {
+ public void setValueAt(int offset, double value) {
values[offset] = value;
}
- int getNumMappings() {
+ public int getNumMappings() {
return numMappings;
}
@@ -103,14 +112,72 @@ final class OrderedIntDoubleMapping impl
}
public void set(int index, double value) {
- int offset = find(index);
- if (offset >= 0) {
- insertOrUpdateValueIfPresent(offset, value);
+ if (numMappings == 0 || index > indices[numMappings - 1]) {
+ if (!noDefault || value != DEFAULT_VALUE) {
+ if (numMappings >= indices.length) {
+ growTo(Math.max((int) (1.2 * numMappings), numMappings + 1));
+ }
+ indices[numMappings] = index;
+ values[numMappings] = value;
+ ++numMappings;
+ }
} else {
- insertValueIfNotDefault(index, offset, value);
+ int offset = find(index);
+ if (offset >= 0) {
+ insertOrUpdateValueIfPresent(offset, value);
+ } else {
+ insertValueIfNotDefault(index, offset, value);
+ }
}
}
+ /**
+ * Merges the updates in linear time by allocating new arrays and iterating
through the existing indices and values
+ * and the updates' indices and values at the same time while selecting the
minimum index to set at each step.
+ * @param updates another list of mappings to be merged in.
+ */
+ public void merge(OrderedIntDoubleMapping updates) {
+ int updateIndices[] = updates.getIndices();
+ double updateValues[] = updates.getValues();
+
+ int newNumMappings = numMappings + updates.getNumMappings();
+ int newCapacity = Math.max((int) (1.2 * newNumMappings), newNumMappings +
1);
+ int newIndices[] = new int[newCapacity];
+ double newValues[] = new double[newCapacity];
+
+ int k = 0;
+ int i = 0, j = 0;
+ for (; i < numMappings && j < updates.getNumMappings(); ++k) {
+ if (indices[i] < updateIndices[j]) {
+ newIndices[k] = indices[i];
+ newValues[k] = values[i];
+ ++i;
+ } else if (indices[i] > updateIndices[j]) {
+ newIndices[k] = updateIndices[j];
+ newValues[k] = updateValues[j];
+ ++j;
+ } else {
+ newIndices[k] = updateIndices[j];
+ newValues[k] = updateValues[j];
+ ++i;
+ ++j;
+ }
+ }
+
+ for (; i < numMappings; ++i, ++k) {
+ newIndices[k] = indices[i];
+ newValues[k] = values[i];
+ }
+ for (; j < updates.getNumMappings(); ++j, ++k) {
+ newIndices[k] = updateIndices[j];
+ newValues[k] = updateValues[j];
+ }
+
+ indices = newIndices;
+ values = newValues;
+ numMappings = k;
+ }
+
@Override
public int hashCode() {
int result = 0;
@@ -150,6 +217,7 @@ final class OrderedIntDoubleMapping impl
return result.toString();
}
+ @SuppressWarnings("CloneDoesntCallSuperClone")
@Override
public OrderedIntDoubleMapping clone() {
return new OrderedIntDoubleMapping(indices.clone(), values.clone(),
numMappings);
@@ -161,13 +229,12 @@ final class OrderedIntDoubleMapping impl
double newValue = values[offset] + increment;
insertOrUpdateValueIfPresent(offset, newValue);
} else {
- double newValue = increment;
- insertValueIfNotDefault(index, offset, newValue);
+ insertValueIfNotDefault(index, offset, increment);
}
}
private void insertValueIfNotDefault(int index, int offset, double value) {
- if (value != DEFAULT_VALUE) {
+ if (!noDefault || value != DEFAULT_VALUE) {
if (numMappings >= indices.length) {
growTo(Math.max((int) (1.2 * numMappings), numMappings + 1));
}
@@ -185,7 +252,7 @@ final class OrderedIntDoubleMapping impl
}
private void insertOrUpdateValueIfPresent(int offset, double newValue) {
- if (newValue == DEFAULT_VALUE) {
+ if (noDefault && newValue == DEFAULT_VALUE) {
for (int i = offset + 1, j = offset; i < numMappings; i++, j++) {
indices[j] = indices[i];
values[j] = values[i];
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestOrderedIntDoubleMapping.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestOrderedIntDoubleMapping.java?rev=1478865&r1=1478864&r2=1478865&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestOrderedIntDoubleMapping.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestOrderedIntDoubleMapping.java
Fri May 3 16:26:50 2013
@@ -68,4 +68,37 @@ public final class TestOrderedIntDoubleM
assertEquals(0.0, clone.get(6), EPSILON);
}
+ @Test
+ public void testAddDefaultElements() {
+ OrderedIntDoubleMapping mapping = new OrderedIntDoubleMapping(false);
+ mapping.set(1, 1.1);
+ assertEquals(1, mapping.getNumMappings());
+ mapping.set(2, 0);
+ assertEquals(2, mapping.getNumMappings());
+ mapping.set(0, 0);
+ assertEquals(3, mapping.getNumMappings());
+ }
+
+ @Test
+ public void testMerge() {
+ OrderedIntDoubleMapping mappingOne = new OrderedIntDoubleMapping(false);
+ mappingOne.set(0, 0);
+ mappingOne.set(2, 2);
+ mappingOne.set(4, 4);
+ mappingOne.set(10, 10);
+
+ OrderedIntDoubleMapping mappingTwo = new OrderedIntDoubleMapping();
+ mappingTwo.set(1, 1);
+ mappingTwo.set(3, 3);
+ mappingTwo.set(5, 5);
+ mappingTwo.set(10, 20);
+
+ mappingOne.merge(mappingTwo);
+
+ assertEquals(7, mappingOne.getNumMappings());
+ for (int i = 0; i < 6; ++i) {
+ assertEquals(i, mappingOne.get(i), i);
+ }
+ assertEquals(20, mappingOne.get(10), 0);
+ }
}