Author: srowen
Date: Fri Feb  4 17:48:46 2011
New Revision: 1067241

URL: http://svn.apache.org/viewvc?rev=1067241&view=rev
Log:
MAHOUT-571 add new distance measure implementations

Added:
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java?rev=1067241&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
 Fri Feb  4 17:48:46 2011
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
+/**
+ * This class implements a "Chebyshev distance" metric by finding the maximum 
difference
+ * between each coordinate. Also 'chessboard distance' due to the moves a king 
can make.
+ */
+public class ChebyshevDistanceMeasure implements DistanceMeasure {
+  
+  @Override
+  public void configure(Configuration job) {
+    // nothing to do
+  }
+  
+  @Override
+  public Collection<Parameter<?>> getParameters() {
+    return Collections.emptyList();
+  }
+  
+  @Override
+  public void createParameters(String prefix, Configuration jobConf) {
+    // nothing to do
+  }
+  
+  @Override
+  public double distance(Vector v1, Vector v2) {
+    if (v1.size() != v2.size()) {
+      throw new CardinalityException(v1.size(), v2.size());
+    }
+    double result = 0.0;
+    Vector vector = v1.minus(v2);
+    Iterator<Vector.Element> iter = vector.iterateNonZero(); 
+    while (iter.hasNext()) {
+      Vector.Element e = iter.next();
+      double d = Math.abs(e.get());
+      if (d > result) {
+        result = d;
+      }
+    }
+    return result;
+  }
+  
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector 
v) {
+    return distance(centroid, v); // TODO
+  }
+  
+}

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java?rev=1067241&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
 Fri Feb  4 17:48:46 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.mahout.common.parameters.DoubleParameter;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
+
+/** 
+ * Implement Minkowski distance, a real-valued generalization of the 
+ * integral L(n) distances: Manhattan = L1, Euclidean = L2. 
+ * For high numbers of dimensions, very high exponents give more useful 
distances. 
+ * 
+ * Note: Math.pow is clever about integer-valued doubles.
+ **/
+public class MinkowskiDistanceMeasure implements DistanceMeasure {
+
+  private static final double EXPONENT = 3.0;
+
+  private List<Parameter<?>> parameters;
+  private double exponent = EXPONENT;
+  
+  public MinkowskiDistanceMeasure() {
+  }
+  
+  public MinkowskiDistanceMeasure(double exponent) {
+    this.exponent = exponent;
+  }
+
+  @Override
+  public void createParameters(String prefix, Configuration conf) {
+    parameters = new ArrayList<Parameter<?>>();
+    Parameter<?> param =
+        new DoubleParameter(prefix, "exponent", conf, EXPONENT, "Exponent for 
Fractional Lagrange distance");
+    parameters.add(param);
+  }
+
+  @Override
+  public Collection<Parameter<?>> getParameters() {
+    return parameters;
+  }
+
+  @Override
+  public void configure(Configuration jobConf) {
+    if (parameters == null) {
+      ParameteredGeneralizations.configureParameters(this, jobConf);
+    }
+  }
+
+  public double getExponent() {
+    return exponent;
+  }
+
+  public void setExponent(double exponent) {
+    this.exponent = exponent;
+  }
+
+  /**
+   *  Math.pow is clever about integer-valued doubles
+   */
+  @Override
+  public double distance(Vector v1, Vector v2) {
+    Vector distVector = v1.minus(v2);
+    double sum = 0.0;
+    Iterator<Element> it = distVector.iterateNonZero();
+    while (it.hasNext()) {
+      Element e = it.next();
+      sum += Math.pow(Math.abs(e.get()), exponent);
+    }
+    return Math.pow(sum, 1.0 / exponent);
+  }
+
+  // TODO: how?
+  @Override
+  public double distance(double centroidLengthSquare, Vector centroid, Vector 
v) {
+    return distance(centroid, v); // TODO - can this use centroidLengthSquare 
somehow?
+  }
+
+}

Added: 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java?rev=1067241&view=auto
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
 (added)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
 Fri Feb  4 17:48:46 2011
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestChebyshevMeasure extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure chebyshevDistanceMeasure = new ChebyshevDistanceMeasure();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+    };
+    double[][] distances = {{0.0, 1.0, 1.0}, {1.0, 0.0, 1.0}, {1.0, 1.0, 0.0}};
+    
+    double[][] chebyshevDistanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        chebyshevDistanceMatrix[a][b] = 
chebyshevDistanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        assertEquals(distances[a][b], chebyshevDistanceMatrix[a][b], EPSILON);
+      }
+    }
+
+    assertEquals(0.0, chebyshevDistanceMatrix[0][0], EPSILON);
+  }
+
+}

Added: 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java?rev=1067241&view=auto
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
 (added)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
 Fri Feb  4 17:48:46 2011
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestMinkowskiMeasure extends MahoutTestCase {
+
+  @Test
+  public void testMeasure() {
+
+    DistanceMeasure minkowskiDistanceMeasure = new 
MinkowskiDistanceMeasure(1.5);
+    DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
+    DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
+
+    Vector[] vectors = {
+        new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+        new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+    };
+
+    double[][] minkowskiDistanceMatrix = new double[3][3];
+    double[][] manhattanDistanceMatrix = new double[3][3];
+    double[][] euclideanDistanceMatrix = new double[3][3];
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        minkowskiDistanceMatrix[a][b] = 
minkowskiDistanceMeasure.distance(vectors[a], vectors[b]);
+        manhattanDistanceMatrix[a][b] = 
manhattanDistanceMeasure.distance(vectors[a], vectors[b]);
+        euclideanDistanceMatrix[a][b] = 
euclideanDistanceMeasure.distance(vectors[a], vectors[b]);
+      }
+    }
+
+    for (int a = 0; a < 3; a++) {
+      for (int b = 0; b < 3; b++) {
+        assertTrue(minkowskiDistanceMatrix[a][b] <= 
manhattanDistanceMatrix[a][b]);
+        assertTrue(minkowskiDistanceMatrix[a][b] >= 
euclideanDistanceMatrix[a][b]);
+      }
+    }
+
+    assertEquals(0.0, minkowskiDistanceMatrix[0][0], EPSILON);
+    assertTrue(minkowskiDistanceMatrix[0][0] < minkowskiDistanceMatrix[0][1]);
+    assertTrue(minkowskiDistanceMatrix[0][1] < minkowskiDistanceMatrix[0][2]);
+  }
+
+}


Reply via email to