Author: tdunning
Date: Tue Sep 4 02:18:48 2012
New Revision: 1380429
URL: http://svn.apache.org/viewvc?rev=1380429&view=rev
Log:
MAHOUT-1059 - Stylistic cleanups
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/CentroidTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedMatrixTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/WeightedVectorTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=1380429&r1=1380428&r2=1380429&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
Tue Sep 4 02:18:48 2012
@@ -1,70 +1,69 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.clustering.meanshift;
-
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.clustering.iterator.ClusterWritable;
-
-import com.google.common.collect.Lists;
-
-public class MeanShiftCanopyMapper extends
Mapper<WritableComparable<?>,ClusterWritable,Text,ClusterWritable> {
-
- private final Collection<MeanShiftCanopy> canopies = Lists.newArrayList();
-
- private MeanShiftCanopyClusterer clusterer;
-
-private Integer numReducers;
-
- @Override
- protected void setup(Context context) throws IOException,
InterruptedException {
- super.setup(context);
- Configuration conf = context.getConfiguration();
- clusterer = new MeanShiftCanopyClusterer(conf);
- numReducers =
Integer.valueOf(conf.get(MeanShiftCanopyDriver.MAPRED_REDUCE_TASKS, "1"));
- }
-
- @Override
- protected void map(WritableComparable<?> key, ClusterWritable
clusterWritable, Context context)
- throws IOException, InterruptedException {
- MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
- clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
- }
-
- @Override
- protected void cleanup(Context context) throws IOException,
InterruptedException {
- int reducer = 0;
- for (MeanShiftCanopy canopy : canopies) {
- clusterer.shiftToMean(canopy);
- ClusterWritable clusterWritable = new ClusterWritable();
- clusterWritable.setValue(canopy);
- context.write(new Text(String.valueOf(reducer)), clusterWritable);
- reducer++;
- if (reducer >= numReducers) {
- reducer=0;
- }
- }
- super.cleanup(context);
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.clustering.meanshift;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.clustering.iterator.ClusterWritable;
+
+import java.io.IOException;
+import java.util.Collection;
+
+public class MeanShiftCanopyMapper extends Mapper<WritableComparable<?>,
ClusterWritable, Text, ClusterWritable> {
+
+ private final Collection<MeanShiftCanopy> canopies = Lists.newArrayList();
+
+ private MeanShiftCanopyClusterer clusterer;
+
+ private Integer numReducers;
+
+ @Override
+ protected void setup(Context context) throws IOException,
InterruptedException {
+ super.setup(context);
+ Configuration conf = context.getConfiguration();
+ clusterer = new MeanShiftCanopyClusterer(conf);
+ numReducers =
Integer.valueOf(conf.get(MeanShiftCanopyDriver.MAPRED_REDUCE_TASKS, "1"));
+ }
+
+ @Override
+ protected void map(WritableComparable<?> key, ClusterWritable
clusterWritable, Context context)
+ throws IOException, InterruptedException {
+ MeanShiftCanopy canopy = (MeanShiftCanopy) clusterWritable.getValue();
+ clusterer.mergeCanopy(canopy.shallowCopy(), canopies);
+ }
+
+ @Override
+ protected void cleanup(Context context) throws IOException,
InterruptedException {
+ int reducer = 0;
+ for (MeanShiftCanopy canopy : canopies) {
+ clusterer.shiftToMean(canopy);
+ ClusterWritable clusterWritable = new ClusterWritable();
+ clusterWritable.setValue(canopy);
+ context.write(new Text(String.valueOf(reducer)), clusterWritable);
+ reducer++;
+ if (reducer >= numReducers) {
+ reducer = 0;
+ }
+ }
+ super.cleanup(context);
+ }
+
+}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/CentroidTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/CentroidTest.java?rev=1380429&r1=1380428&r2=1380429&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/CentroidTest.java
(original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/CentroidTest.java
Tue Sep 4 02:18:48 2012
@@ -24,36 +24,36 @@ import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class CentroidTest {
- @Test
- public void testUpdate() {
- MultiNormal f = new MultiNormal(20);
+ @Test
+ public void testUpdate() {
+ MultiNormal f = new MultiNormal(20);
- Vector a = f.sample();
- Vector b = f.sample();
- Vector c = f.sample();
+ Vector a = f.sample();
+ Vector b = f.sample();
+ Vector c = f.sample();
- final DenseVector x = new DenseVector(a);
- Centroid x1 = new Centroid(1, x);
+ final DenseVector x = new DenseVector(a);
+ Centroid x1 = new Centroid(1, x);
- x1.update(new Centroid(2, new DenseVector(b)));
- Centroid x2 = new Centroid(x1);
+ x1.update(new Centroid(2, new DenseVector(b)));
+ Centroid x2 = new Centroid(x1);
- x1.update(c);
+ x1.update(c);
- // check for correct value
- final Vector mean = a.plus(b).plus(c).assign(Functions.div(3));
- assertEquals(0, x1.getVector().minus(mean).norm(1), 1e-8);
- assertEquals(3, x1.getWeight(), 0);
+ // check for correct value
+ final Vector mean = a.plus(b).plus(c).assign(Functions.div(3));
+ assertEquals(0, x1.getVector().minus(mean).norm(1), 1e-8);
+ assertEquals(3, x1.getWeight(), 0);
- assertEquals(0, x2.minus(a.plus(b).divide(2)).norm(1), 1e-8);
- assertEquals(2, x2.getWeight(), 0);
+ assertEquals(0, x2.minus(a.plus(b).divide(2)).norm(1), 1e-8);
+ assertEquals(2, x2.getWeight(), 0);
- assertEquals(0, new Centroid(x1.getIndex(), x1,
x1.getWeight()).minus(x1).norm(1), 1e-8);
+ assertEquals(0, new Centroid(x1.getIndex(), x1,
x1.getWeight()).minus(x1).norm(1), 1e-8);
- // and verify shared storage
- assertEquals(0, x.minus(x1).norm(1), 0);
+ // and verify shared storage
+ assertEquals(0, x.minus(x1).norm(1), 0);
- assertEquals(3, x1.getWeight(), 1e-8);
- assertEquals(1, x1.getIndex());
- }
+ assertEquals(3, x1.getWeight(), 1e-8);
+ assertEquals(1, x1.getIndex());
+ }
}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedMatrixTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedMatrixTest.java?rev=1380429&r1=1380428&r2=1380429&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedMatrixTest.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/FileBasedMatrixTest.java
Tue Sep 4 02:18:48 2012
@@ -29,62 +29,62 @@ import static junit.framework.Assert.ass
import static org.junit.Assume.assumeNotNull;
public class FileBasedMatrixTest {
- // 10 million rows x 40 columns x 8 bytes = 3.2GB of data
- // we need >2GB to stress the file based matrix implementation
- private static final int ROWS = 10 * 1000 * 1000;
- private static final int COLUMNS = 40;
-
- @Test
- public void testBigMatrix() throws IOException {
- // only run this test if -DrunSlowTests is used. Also requires 4GB or
more of heap.
- assumeNotNull(System.getProperty("runSlowTests"));
-
- Matrix m0 = new SparseRowMatrix(ROWS, COLUMNS);
- Random gen = new Random(1);
- for (int i = 0; i < 1000; i++) {
- m0.set(gen.nextInt(ROWS), gen.nextInt(COLUMNS), matrixValue(i));
- }
- final File f = File.createTempFile("foo", ".m");
- f.deleteOnExit();
- System.out.printf("Starting to write to %s\n", f.getAbsolutePath());
- FileBasedMatrix.writeMatrix(f, m0);
- System.out.printf("done\n");
- System.out.printf("File is %.1f MB\n", f.length() / 1e6);
-
- FileBasedMatrix m1 = new FileBasedMatrix(ROWS, COLUMNS);
- System.out.printf("Starting read\n");
- m1.setData(f, false);
- gen = new Random(1);
- for (int i = 0; i < 1000; i++) {
- assertEquals(matrixValue(i), m1.get(gen.nextInt(ROWS),
gen.nextInt(COLUMNS)), 0.0);
- }
- System.out.printf("done\n");
+ // 10 million rows x 40 columns x 8 bytes = 3.2GB of data
+ // we need >2GB to stress the file based matrix implementation
+ private static final int ROWS = 10 * 1000 * 1000;
+ private static final int COLUMNS = 40;
+
+ @Test
+ public void testBigMatrix() throws IOException {
+ // only run this test if -DrunSlowTests is used. Also requires 4GB or
more of heap.
+ assumeNotNull(System.getProperty("runSlowTests"));
+
+ Matrix m0 = new SparseRowMatrix(ROWS, COLUMNS);
+ Random gen = new Random(1);
+ for (int i = 0; i < 1000; i++) {
+ m0.set(gen.nextInt(ROWS), gen.nextInt(COLUMNS), matrixValue(i));
}
+ final File f = File.createTempFile("foo", ".m");
+ f.deleteOnExit();
+ System.out.printf("Starting to write to %s\n", f.getAbsolutePath());
+ FileBasedMatrix.writeMatrix(f, m0);
+ System.out.printf("done\n");
+ System.out.printf("File is %.1f MB\n", f.length() / 1e6);
+
+ FileBasedMatrix m1 = new FileBasedMatrix(ROWS, COLUMNS);
+ System.out.printf("Starting read\n");
+ m1.setData(f, false);
+ gen = new Random(1);
+ for (int i = 0; i < 1000; i++) {
+ assertEquals(matrixValue(i), m1.get(gen.nextInt(ROWS),
gen.nextInt(COLUMNS)), 0.0);
+ }
+ System.out.printf("done\n");
+ }
- private int matrixValue(int i) {
- return (i * 88513) % 10000;
+ private int matrixValue(int i) {
+ return (i * 88513) % 10000;
+ }
+
+ @Test
+ public void testSetData() throws IOException {
+ File f = File.createTempFile("matrix", ".m");
+ f.deleteOnExit();
+
+ Matrix m0 = new DenseMatrix(100000, 30);
+ MultiNormal gen = new MultiNormal(30);
+ for (MatrixSlice row : m0) {
+ row.vector().assign(gen.sample());
}
+ FileBasedMatrix.writeMatrix(f, m0);
+
+ FileBasedMatrix m = new FileBasedMatrix(100000, 30);
+ m.setData(f, true);
+
+ assertEquals(0, m0.minus(m).aggregate(Functions.MAX, Functions.ABS), 1e-8);
- @Test
- public void testSetData() throws IOException {
- File f = File.createTempFile("matrix", ".m");
- f.deleteOnExit();
-
- Matrix m0 = new DenseMatrix(100000, 30);
- MultiNormal gen = new MultiNormal(30);
- for (MatrixSlice row : m0) {
- row.vector().assign(gen.sample());
- }
- FileBasedMatrix.writeMatrix(f, m0);
-
- FileBasedMatrix m = new FileBasedMatrix(100000, 30);
- m.setData(f, true);
-
- assertEquals(0, m0.minus(m).aggregate(Functions.MAX, Functions.ABS),
1e-8);
-
- int i = 0;
- for (MatrixSlice row : m) {
- assertEquals(0, row.vector().minus(m0.viewRow(i++)).norm(1), 1e-8);
- }
+ int i = 0;
+ for (MatrixSlice row : m) {
+ assertEquals(0, row.vector().minus(m0.viewRow(i++)).norm(1), 1e-8);
}
+ }
}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/WeightedVectorTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/WeightedVectorTest.java?rev=1380429&r1=1380428&r2=1380429&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/WeightedVectorTest.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/WeightedVectorTest.java
Tue Sep 4 02:18:48 2012
@@ -23,14 +23,14 @@ import static org.junit.Assert.assertEqu
public class WeightedVectorTest {
- @Test
- public void testLength() {
- Vector v = new DenseVector(new double[]{0.9921337470551008,
1.0031004325833064, 0.9963963182745947});
- Centroid c = new Centroid(3, new DenseVector(v), 2);
- assertEquals(c.getVector().getLengthSquared(), c.getLengthSquared(),
1e-17);
- // previously, this wouldn't clear the cached squared length value
correctly which would cause bad distances
- c.set(0, -1);
- System.out.printf("c = %.9f\nv = %.9f\n", c.getLengthSquared(),
c.getVector().getLengthSquared());
- assertEquals(c.getVector().getLengthSquared(), c.getLengthSquared(),
1e-17);
- }
+ @Test
+ public void testLength() {
+ Vector v = new DenseVector(new double[]{0.9921337470551008,
1.0031004325833064, 0.9963963182745947});
+ Centroid c = new Centroid(3, new DenseVector(v), 2);
+ assertEquals(c.getVector().getLengthSquared(), c.getLengthSquared(),
1e-17);
+ // previously, this wouldn't clear the cached squared length value
correctly which would cause bad distances
+ c.set(0, -1);
+ System.out.printf("c = %.9f\nv = %.9f\n", c.getLengthSquared(),
c.getVector().getLengthSquared());
+ assertEquals(c.getVector().getLengthSquared(), c.getLengthSquared(),
1e-17);
+ }
}