Author: gsingers
Date: Tue Jun 16 20:44:12 2009
New Revision: 785386
URL: http://svn.apache.org/viewvc?rev=785386&view=rev
Log:
MAHOUT-65: Add ability to name a vector, also update what equals means for
Vectors
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java?rev=785386&r1=785385&r2=785386&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
Tue Jun 16 20:44:12 2009
@@ -37,6 +37,15 @@
* transient so that it will not be serialized with each vector instance.
*/
private transient Map<String, Integer> bindings;
+ protected String name;
+
+
+ protected AbstractVector() {
+ }
+
+ protected AbstractVector(String name) {
+ this.name = name;
+ }
/**
* Subclasses must override to return an appropriately sparse or dense result
@@ -316,8 +325,16 @@
Gson gson = builder.create();
return gson.fromJson(formattedString, vectorType);
}
-
- /* (non-Javadoc)
+
+ public String getName() {
+ return name;
+
+ }
+
+ public void setName(String name) {
+ this.name = name;
+
+ }/* (non-Javadoc)
* @see org.apache.mahout.matrix.Vector#asFormatString()
*/
public String asFormatString(){
@@ -330,17 +347,23 @@
}
/**
- * Compare whether two Vector implementations are the same, regardless of the
- * implementation. Two Vectors are the same if they have the same cardinality
+ * Compare whether two Vector implementations have the same elements,
regardless of the
+ * implementation and name. Two Vectors are equivalent if they have the same
cardinality
* and all of their values are the same.
+ * <p/>
+ * Does not compare {...@link Vector#getName()}.
*
*
* @param left The left hand Vector to compare
* @param right The right hand Vector
* @return true if the two Vectors have the same cardinality and the same
* values
+ *
+ * @see #strictEquivalence(Vector, Vector)
+ * @see Vector#equals(Object)
*/
public static boolean equivalent(Vector left, Vector right) {
+ if (left == right) return true;
boolean result = true;
int leftCardinality = left.cardinality();
if (leftCardinality == right.cardinality()) {
@@ -356,6 +379,44 @@
return result;
}
+ /**
+ * Compare whether two Vector implementations are the same, including the
+ * underlying implementation. Two Vectors are the same if they have the same
cardinality, same name
+ * and all of their values are the same.
+ *
+ *
+ * @param left The left hand Vector to compare
+ * @param right The right hand Vector
+ * @return true if the two Vectors have the same cardinality and the same
+ * values
+ */
+ public static boolean strictEquivalence(Vector left, Vector right) {
+ if (left == right) return true;
+ if (!(left.getClass().equals(right.getClass()))) return false;
+ String leftName = left.getName();
+ String rightName = right.getName();
+ if (leftName != null && rightName != null && !leftName.equals(rightName)){
+ return false;
+ } else if ((leftName != null && rightName == null) || (rightName != null
&& leftName == null)){
+ return false;
+ }
+
+ boolean result = true;
+ int leftCardinality = left.cardinality();
+ if (leftCardinality == right.cardinality()) {
+ for (int i = 0; i < leftCardinality; i++) {
+ if (left.getQuick(i) != right.getQuick(i)) {
+ return false;
+ }
+
+ }
+ } else {
+ return false;
+ }
+ return result;
+ }
+
+
/* (non-Javadoc)
* @see org.apache.mahout.matrix.Vector#get(java.lang.String)
*/
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=785386&r1=785385&r2=785386&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
Tue Jun 16 20:44:12 2009
@@ -35,6 +35,10 @@
public DenseVector() {
}
+ public DenseVector(String name){
+ super(name);
+ }
+
private double[] values;
/**
@@ -46,12 +50,22 @@
this.values = values.clone();
}
+ public DenseVector(String name, double[] values) {
+ super(name);
+ this.values = values.clone();
+ }
+
/**
* Construct a new instance of the given cardinality
*
* @param cardinality
*/
public DenseVector(int cardinality) {
+ this(null, cardinality);
+ }
+
+ public DenseVector(String name, int cardinality) {
+ super(name);
this.values = new double[cardinality];
}
@@ -183,13 +197,27 @@
this.values = values;
}
- @Override
+ /**
+ * Indicate whether the two objects are the same or not. Two {...@link
org.apache.mahout.matrix.Vector}s can be equal
+ * even if the underlying implementation is not equal.
+ * @param o The object to compare
+ * @return true if the objects have the same cell values and same name,
false otherwise.
+ *
+ * @see AbstractVector#strictEquivalence(Vector, Vector)
+ * @see AbstractVector#equivalent(Vector, Vector)
+ */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Vector)) return false;
Vector that = (Vector) o;
+ String thatName = that.getName();
if (this.cardinality() != that.cardinality()) return false;
+ if (name != null && thatName != null && !name.equals(thatName)){
+ return false;
+ } else if ((name != null && thatName == null) || (thatName != null && name
== null)){
+ return false;
+ }
if (that instanceof DenseVector) {
if (!Arrays.equals(values, ((DenseVector) that).values)) return false;
@@ -203,7 +231,8 @@
@Override
public int hashCode() {
int result = (values != null ? values.hashCode() : 0);
- result = 31 * result + values.length;
+ result = 31 * result + name.hashCode();
+
return result;
}
}
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java?rev=785386&r1=785385&r2=785386&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
Tue Jun 16 20:44:12 2009
@@ -35,6 +35,10 @@
public SparseVector() {
}
+ public SparseVector(String name) {
+ super(name);
+ }
+
private Map<Integer, Double> values;
private int cardinality;
@@ -42,6 +46,11 @@
public static boolean optimizeTimes = true;
public SparseVector(int cardinality) {
+ this(null, cardinality);
+ }
+
+ public SparseVector(String name, int cardinality) {
+ super(name);
values = new HashMap<Integer, Double>();
this.cardinality = cardinality;
}
@@ -136,13 +145,28 @@
return new Iterator();
}
- @Override
+ /**
+ * Indicate whether the two objects are the same or not. Two {...@link
org.apache.mahout.matrix.Vector}s can be equal
+ * even if the underlying implementation is not equal.
+ *
+ * @param o The object to compare
+ * @return true if the objects have the same cell values and same name,
false otherwise.
+ *
+ * * @see AbstractVector#strictEquivalence(Vector, Vector)
+ * @see AbstractVector#equivalent(Vector, Vector)
+ */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Vector)) return false;
Vector that = (Vector) o;
if (this.cardinality() != that.cardinality()) return false;
+ String thatName = that.getName();
+ if (name != null && thatName != null && !name.equals(thatName)){
+ return false;
+ } else if ((name != null && thatName == null) || (thatName != null && name
== null)){
+ return false;
+ }
if (that instanceof SparseVector) {
return (values == null ? ((SparseVector) that).values == null :
values.equals(((SparseVector) that).values));
@@ -158,6 +182,7 @@
public int hashCode() {
int result = (values != null ? values.hashCode() : 0);
result = 31 * result + cardinality;
+ result = 31 * result + name.hashCode();
return result;
}
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java?rev=785386&r1=785385&r2=785386&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
Tue Jun 16 20:44:12 2009
@@ -28,6 +28,19 @@
public interface Vector extends Iterable<Vector.Element>, Writable {
/**
+ * Vectors may have a name associated with them, which makes them easy to
identify
+ * @return The name, or null if one has not been set
+ */
+ String getName();
+
+ /**
+ * Set a name for this vector. Need not be unique in a set of Vectors, but
probably is more useful if it is.
+ * In other words, Mahout does not check for uniqueness.
+ * @param name The name
+ */
+ void setName(String name);
+
+ /**
* Return a formatted WritableComparable<?> suitable for output
*
* @return formatted WritableComparable
Modified:
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=785386&r1=785385&r2=785386&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
(original)
+++
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
Tue Jun 16 20:44:12 2009
@@ -42,8 +42,9 @@
}
public void testEquivalent() throws Exception {
- SparseVector left = new SparseVector(3);
- DenseVector right = new DenseVector(3);
+ //names are not used for equivalent
+ SparseVector left = new SparseVector("foo", 3);
+ DenseVector right = new DenseVector("foo", 3);
left.setQuick(0, 1);
left.setQuick(1, 2);
left.setQuick(2, 3);
@@ -52,6 +53,8 @@
right.setQuick(2, 3);
assertTrue("equivalent didn't work", AbstractVector.equivalent(left,
right));
assertTrue("equals didn't work", left.equals(right));
+ assertTrue("equivalent didn't work",
AbstractVector.strictEquivalence(left, right) == false);
+
right.setQuick(2, 4);
assertTrue("equivalent didn't work",
AbstractVector.equivalent(left, right) == false);
@@ -330,6 +333,22 @@
}
+
+ public void testNameSerialization() throws Exception {
+ double[] values = { 1.1, 2.2, 3.3 };
+ Vector test = new DenseVector("foo", values);
+ String formatString = test.asFormatString();
+
+ Vector decode = AbstractVector.decodeVector(formatString);
+ assertTrue("test and decode are not equal", test.equals(decode));
+
+ Vector noName = new DenseVector(values);
+ formatString = noName.asFormatString();
+
+ decode = AbstractVector.decodeVector(formatString);
+ assertTrue("noName and decode are not equal", noName.equals(decode));
+ }
+
public void testLabelSerializationSparse() {
double[] values = { 1.1, 2.2, 3.3 };
Vector test = new SparseVector(3);