Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java (original) +++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java Sat Sep 25 09:51:42 2010 @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.TokenS import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.Version; +import org.apache.mahout.common.RandomUtils; import org.apache.mahout.ep.State; import org.apache.mahout.math.Matrix; import org.apache.mahout.math.RandomAccessSparseVector; @@ -45,6 +46,7 @@ import java.io.Reader; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.List; @@ -97,17 +99,18 @@ import java.util.Set; * <tr><td colspan=4><hr></td></tr> * </table> */ -public class TrainNewsGroups { +public final class TrainNewsGroups { + private static final int FEATURES = 10000; // 1997-01-15 00:01:00 GMT private static final long DATE_REFERENCE = 853286460; private static final long MONTH = 30 * 24 * 3600; private static final long WEEK = 7 * 24 * 3600; - private static final Random rand = new Random(); + private static final Random rand = RandomUtils.getRandom(); private static final String[] leakLabels = {"none", "month-year", "day-month-year"}; - private static final SimpleDateFormat[] df = new SimpleDateFormat[]{ + private static final SimpleDateFormat[] df = { new SimpleDateFormat(""), new SimpleDateFormat("MMM-yyyy"), new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss") @@ -117,6 +120,9 @@ public class TrainNewsGroups { private static final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body"); private static final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept"); + private TrainNewsGroups() { + } + public static void main(String[] args) throws IOException { File base = new File(args[0]); @@ -145,7 +151,7 @@ public class TrainNewsGroups { int k = 0; double step = 0; - int[] bumps = new int[]{1, 2, 5}; + int[] bumps = {1, 2, 5}; for (File file : files.subList(0, 10000)) { String ng = file.getParentFile().getName(); int actual = newsGroups.intern(ng); @@ -242,25 +248,28 @@ public class TrainNewsGroups { Multiset<String> words = ConcurrentHashMultiset.create(); BufferedReader reader = new BufferedReader(new FileReader(file)); - String line = reader.readLine(); - Reader dateString = new StringReader(df[leakType % 3].format(new Date(date))); - countWords(analyzer, words, dateString); - while (line != null && line.length() > 0) { - boolean countHeader = ( - line.startsWith("From:") || line.startsWith("Subject:") || - line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6); - do { - StringReader in = new StringReader(line); - if (countHeader) { - countWords(analyzer, words, in); - } - line = reader.readLine(); - } while (line.startsWith(" ")); - } - if (leakType < 3) { - countWords(analyzer, words, reader); + try { + String line = reader.readLine(); + Reader dateString = new StringReader(df[leakType % 3].format(new Date(date))); + countWords(analyzer, words, dateString); + while (line != null && line.length() > 0) { + boolean countHeader = ( + line.startsWith("From:") || line.startsWith("Subject:") || + line.startsWith("Keywords:") || line.startsWith("Summary:")) && (leakType < 6); + do { + Reader in = new StringReader(line); + if (countHeader) { + countWords(analyzer, words, in); + } + line = reader.readLine(); + } while (line.startsWith(" ")); + } + if (leakType < 3) { + countWords(analyzer, words, reader); + } + } finally { + reader.close(); } - reader.close(); Vector v = new RandomAccessSparseVector(FEATURES); bias.addToVector("", 1, v); @@ -271,7 +280,7 @@ public class TrainNewsGroups { return v; } - private static void countWords(Analyzer analyzer, Multiset<String> words, Reader in) throws IOException { + private static void countWords(Analyzer analyzer, Collection<String> words, Reader in) throws IOException { TokenStream ts = analyzer.tokenStream("text", in); ts.addAttribute(TermAttribute.class); while (ts.incrementToken()) { @@ -280,7 +289,7 @@ public class TrainNewsGroups { } } - private static List<File> permute(List<File> files, Random rand) { + private static List<File> permute(Iterable<File> files, Random rand) { List<File> r = Lists.newArrayList(); for (File file : files) { int i = rand.nextInt(r.size() + 1);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java (original) +++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplit.java Sat Sep 25 09:51:42 2010 @@ -34,7 +34,7 @@ import org.apache.mahout.common.StringUt /** * Separate the input data into a training and testing set. */ -public class DatasetSplit { +public final class DatasetSplit { private static final String SEED = "traintest.seed"; @@ -119,7 +119,7 @@ public class DatasetSplit { } /** - * a {...@link org.apache.hadoop.mapred.LineRecordReader LineRecordReader} that skips some lines from the + * a {...@link RecordReader} that skips some lines from the * input. Uses a Random number generator with a specific seed to decide if a line will be skipped or not. */ public static class RndLineRecordReader extends RecordReader<LongWritable, Text> { Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java (original) +++ mahout/trunk/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java Sat Sep 25 09:51:42 2010 @@ -22,7 +22,6 @@ import com.google.common.base.Charsets; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import com.google.common.io.Files; import com.google.common.io.Resources; import org.apache.mahout.classifier.AbstractVectorClassifier; import org.apache.mahout.examples.MahoutTestCase; @@ -30,18 +29,20 @@ import org.apache.mahout.math.DenseVecto import org.apache.mahout.math.Vector; import org.junit.Test; -import java.io.File; import java.io.IOException; import java.util.List; import java.util.Set; public class TrainLogisticTest extends MahoutTestCase { - Splitter onWhiteSpace = Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings(); + + private static final Splitter ON_WHITE_SPACE = + Splitter.on(CharMatcher.BREAKING_WHITESPACE).trimResults().omitEmptyStrings(); + @Test public void testMain() throws IOException { String outputFile = "./model"; String inputFile = "donut.csv"; - String[] args = Iterables.toArray(onWhiteSpace.split( + String[] args = Iterables.toArray(ON_WHITE_SPACE.split( "--input " + inputFile + " --output " + @@ -50,9 +51,9 @@ public class TrainLogisticTest extends M "--predictors x y --types numeric --features 20 --passes 100 --rate 50 "), String.class); TrainLogistic.main(args); LogisticModelParameters lmp = TrainLogistic.getParameters(); - assertEquals(1e-4, lmp.getLambda(), 1e-9); + assertEquals(1.0e-4, lmp.getLambda(), 1.0e-9); assertEquals(20, lmp.getNumFeatures()); - assertEquals(true, lmp.useBias()); + assertTrue(lmp.useBias()); assertEquals("color", lmp.getTargetVariable()); CsvRecordFactory csv = lmp.getCsvRecordFactory(); assertEquals("[1, 2]", Sets.newTreeSet(csv.getTargetCategories()).toString()); Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java (original) +++ mahout/trunk/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java Sat Sep 25 09:51:42 2010 @@ -25,6 +25,6 @@ package org.apache.mahout.examples; public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase { /** "Close enough" value for floating-point comparisons. */ - public static final double EPSILON = 0.0000001; + public static final double EPSILON = 0.000001; } Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractVector.java Sat Sep 25 09:51:42 2010 @@ -573,15 +573,15 @@ public abstract class AbstractVector imp } public double get() { - return getQuick(index); - } + return getQuick(index); + } - public int index() { - return index; - } + public int index() { + return index; + } - public void set(double value) { - setQuick(index, value); - } + public void set(double value) { + setQuick(index, value); + } } } Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java Sat Sep 25 09:51:42 2010 @@ -144,7 +144,7 @@ import java.util.Date; @version 1.0, 09/24/99 @see java.util.Random */ -public class MersenneTwister extends RandomEngine { +public final class MersenneTwister extends RandomEngine { /* Period parameters */ private static final int N = 624; @@ -206,7 +206,7 @@ public class MersenneTwister extends Ran } /** Generates N words at one time */ - protected void nextBlock() { + void nextBlock() { int y; int kk; @@ -248,7 +248,7 @@ public class MersenneTwister extends Ran /** Sets the receiver's seed. This method resets the receiver's entire internal state. * @param seed An integer that is used to reset the internal state of the generator */ - protected void setSeed(int seed) { + void setSeed(int seed) { mt[0] = seed; for (int i = 1; i < N; i++) { mt[i] = 1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i; @@ -275,7 +275,7 @@ public class MersenneTwister extends Ran * done in the 1999 reference implementation. Should only be used for testing, not * actual coding. */ - protected void setReferenceSeed(int seed) { + void setReferenceSeed(int seed) { for (int i = 0; i < N; i++) { mt[i] = seed & 0xffff0000; seed = 69069 * seed + 1; Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java Sat Sep 25 09:51:42 2010 @@ -14,7 +14,7 @@ import java.util.Random; /** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ @Deprecated -public class RandomSamplingAssistant extends PersistentObject { +public final class RandomSamplingAssistant extends PersistentObject { private static final int MAX_BUFFER_SIZE = 200; Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Sat Sep 25 09:51:42 2010 @@ -30,7 +30,7 @@ import java.util.Random; /** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ @Deprecated -public class WeightedRandomSampler extends PersistentObject { +public final class WeightedRandomSampler extends PersistentObject { //public class BlockedRandomSampler extends Object implements java.io.Serializable { private int skip; Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java Sat Sep 25 09:51:42 2010 @@ -672,9 +672,10 @@ public abstract class DoubleMatrix2D ext * @param columnIndexes The columns of the cells that shall be visible in the new view. To indicate that <i>all</i> * columns shall be visible, simply set this parameter to <tt>null</tt>. * @return the new view. - * @throws IndexOutOfBoundsException if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>. - * @throws IndexOutOfBoundsException if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any - * <tt>i=0..columnIndexes.length()-1</tt>. + * @throws IndexOutOfBoundsException + * if <tt>!(0 <= rowIndexes[i] < rows())</tt> for any <tt>i=0..rowIndexes.length()-1</tt>. + * @throws IndexOutOfBoundsException + * if <tt>!(0 <= columnIndexes[i] < columns())</tt> for any <tt>i=0..columnIndexes.length()-1</tt>. */ public DoubleMatrix2D viewSelection(int[] rowIndexes, int[] columnIndexes) { // check for "all" @@ -792,13 +793,15 @@ public abstract class DoubleMatrix2D ext * * // 8 neighbors org.apache.mahout.math.function.Double9Function f = new Double9Function() { * public final double apply( double a00, double a01, double - * a02, double a10, double a11, double a12, double + * a02, double a10, double a11, double a12, + * double * a20, double a21, double a22) { return beta*a11 + * alpha*(a00+a01+a02 + a10+a12 + a20+a21+a22); } }; A.zAssign8Neighbors(B,f); * * // 4 neighbors org.apache.mahout.math.function.Double9Function g = new Double9Function() { * public final double apply( double a00, double a01, double - * a02, double a10, double a11, double a12, double + * a02, double a10, double a11, double a12, + * double * a20, double a21, double a22) { return beta*a11 + alpha*(a01+a10+a12+a21); * } C.zAssign8Neighbors(B,g); // fast, even though it doesn't look like it }; </pre> * @@ -848,9 +851,12 @@ public abstract class DoubleMatrix2D ext } } - /** Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>; Equivalent to <tt>return A.zMult(y,z,1,0);</tt> */ + /** + * Linear algebraic matrix-vector multiplication; <tt>z = A * y</tt>; + * Equivalent to <tt>return A.zMult(y,z,1,0);</tt> + */ public DoubleMatrix1D zMult(DoubleMatrix1D y, DoubleMatrix1D z) { - return zMult(y, z, 1, (z == null ? 1 : 0), false); + return zMult(y, z, 1, z == null ? 1 : 0, false); } /** @@ -891,7 +897,7 @@ public abstract class DoubleMatrix2D ext * <tt>A.zMult(B,C,1,0,false,false)</tt>. */ public DoubleMatrix2D zMult(DoubleMatrix2D B, DoubleMatrix2D C) { - return zMult(B, C, 1, (C == null ? 1 : 0), false, false); + return zMult(B, C, 1, C == null ? 1 : 0, false, false); } /** Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix1D.java Sat Sep 25 09:51:42 2010 @@ -9,8 +9,10 @@ It is provided "as is" without expressed package org.apache.mahout.math.matrix.impl; /** - Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as <code>int</code>, <code>double</code>, etc. - First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture. + Abstract base class for 1-d matrices (aka <i>vectors</i>) holding objects or primitive data types such as + <code>int</code>, <code>double</code>, etc. + First see the <a href="package-summary.html">package summary</a> and javadoc + <a href="package-tree.html">tree view</a> to get the broad picture. <p> <b>Note that this implementation is not synchronized.</b> @@ -53,7 +55,7 @@ public abstract class AbstractMatrix1D e * @param rank the relative rank of the element. * @return the absolute rank of the element. */ - protected int _rank(int rank) { + protected int rank(int rank) { return zero + rank * stride; //return zero + ((rank+flipMask)^flipMask); //return zero + rank*flip; // slower @@ -98,22 +100,22 @@ public abstract class AbstractMatrix1D e /** * Sanity check for operations requiring two matrices with the same size. * - * @throws IllegalArgumentException if <tt>size() != B.size()</tt>. + * @throws IllegalArgumentException if <tt>size() != b.size()</tt>. */ - protected void checkSize(double[] B) { - if (size != B.length) { - throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.length); + protected void checkSize(double[] b) { + if (size != b.length) { + throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.length); } } /** * Sanity check for operations requiring two matrices with the same size. * - * @throws IllegalArgumentException if <tt>size() != B.size()</tt>. + * @throws IllegalArgumentException if <tt>size() != b.size()</tt>. */ - public void checkSize(AbstractMatrix1D B) { - if (size != B.size) { - throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + B.size); + public void checkSize(AbstractMatrix1D b) { + if (size != b.size) { + throw new IllegalArgumentException("Incompatible sizes: " + size + " and " + b.size); } } @@ -124,7 +126,7 @@ public abstract class AbstractMatrix1D e * @param rank the rank of the element. */ protected int index(int rank) { - return offset(_rank(rank)); + return offset(rank(rank)); } /** Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java Sat Sep 25 09:51:42 2010 @@ -16,7 +16,7 @@ import org.apache.mahout.math.matrix.Dou /** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ @Deprecated -public class Algebra extends PersistentObject { +public final class Algebra extends PersistentObject { /** * A default Algebra object; has {...@link Property#DEFAULT} attached for tolerance. Allows ommiting to construct an @@ -85,7 +85,7 @@ public class Algebra extends PersistentO } /** Returns sqrt(a^2 + b^2) without under/overflow. */ - protected static double hypot(double a, double b) { + static double hypot(double a, double b) { double r; if (Math.abs(a) > Math.abs(b)) { r = b / a; @@ -280,7 +280,7 @@ public class Algebra extends PersistentO * @return X; a new independent matrix; solution if A is square, least squares solution otherwise. */ public static DoubleMatrix2D solve(DoubleMatrix2D A, DoubleMatrix2D B) { - return (A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B))); + return A.rows() == A.columns() ? (lu(A).solve(B)) : (qr(A).solve(B)); } /** @@ -288,7 +288,7 @@ public class Algebra extends PersistentO * * @return <tt>A</tt> (for convenience only). */ - protected static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) { + static DoubleMatrix2D trapezoidalLower(DoubleMatrix2D A) { int rows = A.rows(); int columns = A.columns(); for (int r = rows; --r >= 0;) { Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java (original) +++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java Sat Sep 25 09:51:42 2010 @@ -14,9 +14,11 @@ import org.apache.mahout.math.list.IntAr import org.apache.mahout.math.matrix.DoubleMatrix1D; import org.apache.mahout.math.matrix.DoubleMatrix2D; +import java.io.Serializable; + /** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ @Deprecated -public class LUDecompositionQuick implements java.io.Serializable { +public class LUDecompositionQuick implements Serializable { /** Array for internal storage of decomposition. */ private DoubleMatrix2D lu; Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MahoutTestCase.java Sat Sep 25 09:51:42 2010 @@ -32,7 +32,7 @@ import org.junit.Before; public abstract class MahoutTestCase extends Assert { /** "Close enough" value for floating-point comparisons. */ - public static final double EPSILON = 0.0000001; + public static final double EPSILON = 0.000001; private File testTempDir; Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Sat Sep 25 09:51:42 2010 @@ -36,7 +36,7 @@ public final class TestSingularValueDeco { -360.0 / 625.0, 192.0 / 625.0, 1756.0 / 625.0 }, }; - private static final double normTolerance = 10.0e-14; + private static final double NORM_TOLERANCE = 10.0e-14; @Test public void testMoreRows() { @@ -158,7 +158,7 @@ public final class TestSingularValueDeco } double norm = Algebra.getNorm(u.times(s).times(v.transpose()).minus(matrix)); - assertEquals(0, norm, normTolerance); + assertEquals(0, norm, NORM_TOLERANCE); } @@ -184,7 +184,7 @@ public final class TestSingularValueDeco for (int i = 0; i < mTm.numRows(); i++) { id.set(i, i, 1); } - assertEquals(0, Algebra.getNorm(mTm.minus(id)), normTolerance); + assertEquals(0, Algebra.getNorm(mTm.minus(id)), NORM_TOLERANCE); } /** test matrices values */ @@ -208,11 +208,11 @@ public final class TestSingularValueDeco // check values against known references Matrix u = svd.getU(); - assertEquals(0, Algebra.getNorm(u.minus(uRef)), normTolerance); + assertEquals(0, Algebra.getNorm(u.minus(uRef)), NORM_TOLERANCE); Matrix s = svd.getS(); - assertEquals(0, Algebra.getNorm(s.minus(sRef)), normTolerance); + assertEquals(0, Algebra.getNorm(s.minus(sRef)), NORM_TOLERANCE); Matrix v = svd.getV(); - assertEquals(0, Algebra.getNorm(v.minus(vRef)), normTolerance); + assertEquals(0, Algebra.getNorm(v.minus(vRef)), NORM_TOLERANCE); } Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java (original) +++ mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java Sat Sep 25 09:51:42 2010 @@ -143,7 +143,7 @@ public final class TestHebbianSolver ext _eigensVectorFactory = new DenseMapVectorFactory(); int desiredRank = 200; long time = timeSolver(TMP_EIGEN_DIR, - 0.00001, + 0.00001, 5, desiredRank, new TrainingState()); Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Sat Sep 25 09:51:42 2010 @@ -90,37 +90,37 @@ public final class SequenceFileDumper { } else { writer = new OutputStreamWriter(System.out); } - writer.append("Input Path: ").append(String.valueOf(path)).append('\n'); - - int sub = Integer.MAX_VALUE; - if (cmdLine.hasOption(substringOpt)) { - sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString()); - } - boolean countOnly = cmdLine.hasOption(countOpt); - Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); - Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance(); - writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())); - writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n'); - writer.flush(); - long count = 0; - if (countOnly) { - while (reader.next(key, value)) { - count++; + try { + writer.append("Input Path: ").append(String.valueOf(path)).append('\n'); + + int sub = Integer.MAX_VALUE; + if (cmdLine.hasOption(substringOpt)) { + sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString()); } - writer.append("Count: ").append(String.valueOf(count)).append('\n'); - } else { - while (reader.next(key, value)) { - writer.append("Key: ").append(String.valueOf(key)); - String str = value.toString(); - writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str); - writer.write('\n'); - writer.flush(); - count++; + boolean countOnly = cmdLine.hasOption(countOpt); + Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance(); + Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance(); + writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())); + writer.append(" Value Class: ").append(String.valueOf(value.getClass())).append('\n'); + writer.flush(); + long count = 0; + if (countOnly) { + while (reader.next(key, value)) { + count++; + } + writer.append("Count: ").append(String.valueOf(count)).append('\n'); + } else { + while (reader.next(key, value)) { + writer.append("Key: ").append(String.valueOf(key)); + String str = value.toString(); + writer.append(": Value: ").append(str.length() > sub ? str.substring(0, sub) : str); + writer.write('\n'); + writer.flush(); + count++; + } + writer.append("Count: ").append(String.valueOf(count)).append('\n'); } - writer.append("Count: ").append(String.valueOf(count)).append('\n'); - } - writer.flush(); - if (cmdLine.hasOption(outputOpt)) { + } finally { writer.close(); } } Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Sat Sep 25 09:51:42 2010 @@ -73,13 +73,13 @@ public final class ClusterDumper extends private Path seqFileDir; - private Path pointsDir = null; + private Path pointsDir; - private String termDictionary = null; + private String termDictionary; - private String dictionaryFormat = null; + private String dictionaryFormat; - private String outputFile = null; + private String outputFile; private int subString = Integer.MAX_VALUE; @@ -109,9 +109,11 @@ public final class ClusterDumper extends addOption(OUTPUT_OPTION, "o", "Optional output directory. Default is to output to the console."); addOption(SUBSTRING_OPTION, "b", "The number of chars of the asFormatString() to print"); addOption(NUM_WORDS_OPTION, "n", "The number of top terms to print"); - addOption(JSON_OPTION, "j", "Output the centroid as JSON. Otherwise it substitues in the terms for vector cell entries"); - addOption(POINTS_DIR_OPTION, "p", "The directory containing points sequence files mapping input vectors to their cluster. " - + "If specified, then the program will output the points associated with a cluster"); + addOption(JSON_OPTION, "j", + "Output the centroid as JSON. Otherwise it substitues in the terms for vector cell entries"); + addOption(POINTS_DIR_OPTION, "p", + "The directory containing points sequence files mapping input vectors to their cluster. " + + "If specified, then the program will output the points associated with a cluster"); addOption(DICTIONARY_OPTION, "d", "The dictionary file"); addOption(DICTIONARY_TYPE_OPTION, "dt", "The dictionary file type (text|sequencefile)", "text"); if (parseArguments(args) == null) { @@ -256,11 +258,8 @@ public final class ClusterDumper extends return this.numTopFeatures; } - private void setUseJSON(boolean json) { - this.useJSON = json; - } - - private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, Configuration conf) throws IOException { + private static Map<Integer, List<WeightedVectorWritable>> readPoints(Path pointsPathDir, + Configuration conf) throws IOException { Map<Integer, List<WeightedVectorWritable>> result = new TreeMap<Integer, List<WeightedVectorWritable>>(); FileSystem fs = pointsPathDir.getFileSystem(conf); Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java Sat Sep 25 09:51:42 2010 @@ -29,8 +29,7 @@ import org.apache.mahout.math.Varint; import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type; /** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */ -public class GramKey extends BinaryComparable implements - WritableComparable<BinaryComparable> { +public final class GramKey extends BinaryComparable implements WritableComparable<BinaryComparable> { private int primaryLength; private int length; Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/common/PartialVectorMerger.java Sat Sep 25 09:51:42 2010 @@ -18,7 +18,6 @@ package org.apache.mahout.utils.vectors.common; import java.io.IOException; -import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -71,13 +70,10 @@ public final class PartialVectorMerger { * @param dimension * @param sequentialAccess * output vectors should be optimized for sequential access - * @param namedVectors + * @param namedVector * output vectors should be named, retaining key (doc id) as a label * @param numReducers * The number of reducers to spawn - * @throws IOException - * @throws ClassNotFoundException - * @throws InterruptedException */ public static void mergePartialVectors(Iterable<Path> partialVectorPaths, Path output, @@ -122,7 +118,7 @@ public final class PartialVectorMerger { } private static String getCommaSeparatedPaths(Iterable<Path> paths) { - StringBuilder commaSeparatedPaths = new StringBuilder(); + StringBuilder commaSeparatedPaths = new StringBuilder(100); String sep = ""; for (Path path : paths) { commaSeparatedPaths.append(sep).append(path.toString()); Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Sat Sep 25 09:51:42 2010 @@ -29,7 +29,6 @@ import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; @@ -180,8 +179,8 @@ public class ClusterLabels { /** * Get the list of labels, sorted by best score. */ - protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, Collection<WeightedVectorWritable> wvws) - throws IOException { + protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, + Collection<WeightedVectorWritable> wvws) throws IOException { if (wvws.size() < minNumIds) { log.info("Skipping small cluster {} with size: {}", integer, wvws.size()); @@ -267,8 +266,9 @@ public class ClusterLabels { return clusteredTermInfo.subList(0, Math.min(clusteredTermInfo.size(), maxLabels)); } - private static OpenBitSet getClusterDocBitset(IndexReader reader, Collection<String> idSet, String idField) - throws IOException { + private static OpenBitSet getClusterDocBitset(IndexReader reader, + Collection<String> idSet, + String idField) throws IOException { int numDocs = reader.numDocs(); OpenBitSet bitset = new OpenBitSet(numDocs); Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Sat Sep 25 09:51:42 2010 @@ -141,8 +141,8 @@ public final class Driver { if (cmdLine.hasOption(inputOpt)) { // Lucene case File file = new File(cmdLine.getValue(inputOpt).toString()); if (!file.isDirectory()) { - throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath() + - " does not exist or is not a directory"); + throw new IllegalArgumentException("Lucene directory: " + file.getAbsolutePath() + + " does not exist or is not a directory"); } long maxDocs = Long.MAX_VALUE; Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java?rev=1001180&r1=1001179&r2=1001180&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/MahoutTestCase.java Sat Sep 25 09:51:42 2010 @@ -25,6 +25,6 @@ package org.apache.mahout.utils; public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase { /** "Close enough" value for floating-point comparisons. */ - public static final double EPSILON = 0.0000001; + public static final double EPSILON = 0.000001; }
