Author: srowen
Date: Fri Feb 4 17:48:46 2011
New Revision: 1067241
URL: http://svn.apache.org/viewvc?rev=1067241&view=rev
Log:
MAHOUT-571 add new distance measure implementations
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java?rev=1067241&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ChebyshevDistanceMeasure.java
Fri Feb 4 17:48:46 2011
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
+/**
+ * This class implements a "Chebyshev distance" metric by finding the maximum
difference
+ * between each coordinate. Also 'chessboard distance' due to the moves a king
can make.
+ */
+public class ChebyshevDistanceMeasure implements DistanceMeasure {
+
+ @Override
+ public void configure(Configuration job) {
+ // nothing to do
+ }
+
+ @Override
+ public Collection<Parameter<?>> getParameters() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void createParameters(String prefix, Configuration jobConf) {
+ // nothing to do
+ }
+
+ @Override
+ public double distance(Vector v1, Vector v2) {
+ if (v1.size() != v2.size()) {
+ throw new CardinalityException(v1.size(), v2.size());
+ }
+ double result = 0.0;
+ Vector vector = v1.minus(v2);
+ Iterator<Vector.Element> iter = vector.iterateNonZero();
+ while (iter.hasNext()) {
+ Vector.Element e = iter.next();
+ double d = Math.abs(e.get());
+ if (d > result) {
+ result = d;
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public double distance(double centroidLengthSquare, Vector centroid, Vector
v) {
+ return distance(centroid, v); // TODO
+ }
+
+}
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java?rev=1067241&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MinkowskiDistanceMeasure.java
Fri Feb 4 17:48:46 2011
@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.mahout.common.parameters.DoubleParameter;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.Vector.Element;
+
+/**
+ * Implement Minkowski distance, a real-valued generalization of the
+ * integral L(n) distances: Manhattan = L1, Euclidean = L2.
+ * For high numbers of dimensions, very high exponents give more useful
distances.
+ *
+ * Note: Math.pow is clever about integer-valued doubles.
+ **/
+public class MinkowskiDistanceMeasure implements DistanceMeasure {
+
+ private static final double EXPONENT = 3.0;
+
+ private List<Parameter<?>> parameters;
+ private double exponent = EXPONENT;
+
+ public MinkowskiDistanceMeasure() {
+ }
+
+ public MinkowskiDistanceMeasure(double exponent) {
+ this.exponent = exponent;
+ }
+
+ @Override
+ public void createParameters(String prefix, Configuration conf) {
+ parameters = new ArrayList<Parameter<?>>();
+ Parameter<?> param =
+ new DoubleParameter(prefix, "exponent", conf, EXPONENT, "Exponent for
Fractional Lagrange distance");
+ parameters.add(param);
+ }
+
+ @Override
+ public Collection<Parameter<?>> getParameters() {
+ return parameters;
+ }
+
+ @Override
+ public void configure(Configuration jobConf) {
+ if (parameters == null) {
+ ParameteredGeneralizations.configureParameters(this, jobConf);
+ }
+ }
+
+ public double getExponent() {
+ return exponent;
+ }
+
+ public void setExponent(double exponent) {
+ this.exponent = exponent;
+ }
+
+ /**
+ * Math.pow is clever about integer-valued doubles
+ */
+ @Override
+ public double distance(Vector v1, Vector v2) {
+ Vector distVector = v1.minus(v2);
+ double sum = 0.0;
+ Iterator<Element> it = distVector.iterateNonZero();
+ while (it.hasNext()) {
+ Element e = it.next();
+ sum += Math.pow(Math.abs(e.get()), exponent);
+ }
+ return Math.pow(sum, 1.0 / exponent);
+ }
+
+ // TODO: how?
+ @Override
+ public double distance(double centroidLengthSquare, Vector centroid, Vector
v) {
+ return distance(centroid, v); // TODO - can this use centroidLengthSquare
somehow?
+ }
+
+}
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java?rev=1067241&view=auto
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
(added)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestChebyshevMeasure.java
Fri Feb 4 17:48:46 2011
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestChebyshevMeasure extends MahoutTestCase {
+
+ @Test
+ public void testMeasure() {
+
+ DistanceMeasure chebyshevDistanceMeasure = new ChebyshevDistanceMeasure();
+
+ Vector[] vectors = {
+ new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+ new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+ new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+ };
+ double[][] distances = {{0.0, 1.0, 1.0}, {1.0, 0.0, 1.0}, {1.0, 1.0, 0.0}};
+
+ double[][] chebyshevDistanceMatrix = new double[3][3];
+
+ for (int a = 0; a < 3; a++) {
+ for (int b = 0; b < 3; b++) {
+ chebyshevDistanceMatrix[a][b] =
chebyshevDistanceMeasure.distance(vectors[a], vectors[b]);
+ }
+ }
+ for (int a = 0; a < 3; a++) {
+ for (int b = 0; b < 3; b++) {
+ assertEquals(distances[a][b], chebyshevDistanceMatrix[a][b], EPSILON);
+ }
+ }
+
+ assertEquals(0.0, chebyshevDistanceMatrix[0][0], EPSILON);
+ }
+
+}
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java?rev=1067241&view=auto
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
(added)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/TestMinkowskiMeasure.java
Fri Feb 4 17:48:46 2011
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.common.distance;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public final class TestMinkowskiMeasure extends MahoutTestCase {
+
+ @Test
+ public void testMeasure() {
+
+ DistanceMeasure minkowskiDistanceMeasure = new
MinkowskiDistanceMeasure(1.5);
+ DistanceMeasure manhattanDistanceMeasure = new ManhattanDistanceMeasure();
+ DistanceMeasure euclideanDistanceMeasure = new EuclideanDistanceMeasure();
+
+ Vector[] vectors = {
+ new DenseVector(new double[]{1, 0, 0, 0, 0, 0}),
+ new DenseVector(new double[]{1, 1, 1, 0, 0, 0}),
+ new DenseVector(new double[]{1, 1, 1, 1, 1, 1})
+ };
+
+ double[][] minkowskiDistanceMatrix = new double[3][3];
+ double[][] manhattanDistanceMatrix = new double[3][3];
+ double[][] euclideanDistanceMatrix = new double[3][3];
+
+ for (int a = 0; a < 3; a++) {
+ for (int b = 0; b < 3; b++) {
+ minkowskiDistanceMatrix[a][b] =
minkowskiDistanceMeasure.distance(vectors[a], vectors[b]);
+ manhattanDistanceMatrix[a][b] =
manhattanDistanceMeasure.distance(vectors[a], vectors[b]);
+ euclideanDistanceMatrix[a][b] =
euclideanDistanceMeasure.distance(vectors[a], vectors[b]);
+ }
+ }
+
+ for (int a = 0; a < 3; a++) {
+ for (int b = 0; b < 3; b++) {
+ assertTrue(minkowskiDistanceMatrix[a][b] <=
manhattanDistanceMatrix[a][b]);
+ assertTrue(minkowskiDistanceMatrix[a][b] >=
euclideanDistanceMatrix[a][b]);
+ }
+ }
+
+ assertEquals(0.0, minkowskiDistanceMatrix[0][0], EPSILON);
+ assertTrue(minkowskiDistanceMatrix[0][0] < minkowskiDistanceMatrix[0][1]);
+ assertTrue(minkowskiDistanceMatrix[0][1] < minkowskiDistanceMatrix[0][2]);
+ }
+
+}