Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/GramTest.java Sat Feb 13 17:55:56 2010 @@ -22,9 +22,8 @@ import java.util.HashMap; -import junit.framework.TestCase; +import junit.framework.Assert; -import org.apache.mahout.utils.nlp.collocations.llr.Gram; import org.junit.Test; public class GramTest { @@ -34,36 +33,36 @@ Gram one = new Gram("foo", 2, HEAD); Gram two = new Gram("foo", 3, HEAD); - TestCase.assertTrue(one.equals(two)); - TestCase.assertTrue(two.equals(one)); + Assert.assertTrue(one.equals(two)); + Assert.assertTrue(two.equals(one)); Gram three = new Gram("foo", 4, TAIL); Gram four = new Gram("foo"); - TestCase.assertTrue(!three.equals(two)); - TestCase.assertTrue(four.equals(one)); - TestCase.assertTrue(one.equals(four)); + Assert.assertTrue(!three.equals(two)); + Assert.assertTrue(four.equals(one)); + Assert.assertTrue(one.equals(four)); Gram five = new Gram("foobar", 4, TAIL); - TestCase.assertTrue(!five.equals(four)); - TestCase.assertTrue(!five.equals(three)); - TestCase.assertTrue(!five.equals(two)); - TestCase.assertTrue(!five.equals(one)); + Assert.assertTrue(!five.equals(four)); + Assert.assertTrue(!five.equals(three)); + Assert.assertTrue(!five.equals(two)); + Assert.assertTrue(!five.equals(one)); } @Test public void testHashing() { - Gram[] input = + Gram[] input = { - new Gram("foo", 2, HEAD), - new Gram("foo", 3, HEAD), - new Gram("foo", 4, TAIL), - new Gram("foo", 5, TAIL), - new Gram("bar", 6, HEAD), - new Gram("bar", 7, TAIL), - new Gram("bar", 8), - new Gram("bar") + new Gram("foo", 2, HEAD), + new Gram("foo", 3, HEAD), + new Gram("foo", 4, TAIL), + new Gram("foo", 5, TAIL), + new Gram("bar", 6, HEAD), + new Gram("bar", 7, TAIL), + new Gram("bar", 8), + new Gram("bar") }; HashMap<Gram,Gram> map = new HashMap<Gram,Gram>(); @@ -78,32 +77,32 @@ // frequencies of the items in the map. int[] freq = { - 5, - 3, - 9, - 5, - 15, - 7, - 8, - 1 + 5, + 3, + 9, + 5, + 15, + 7, + 8, + 1 }; // true if the index should be the item in the map boolean[] memb = { - true, - false, - true, - false, - true, - true, - false, - false + true, + false, + true, + false, + true, + true, + false, + false }; for (int i = 0; i < input.length; i++) { System.err.println(i); - TestCase.assertEquals(freq[i], input[i].getFrequency()); - TestCase.assertEquals(memb[i], input[i] == map.get(input[i])); + Assert.assertEquals(freq[i], input[i].getFrequency()); + Assert.assertEquals(memb[i], input[i] == map.get(input[i])); } } }
Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/LLRReducerTest.java Sat Feb 13 17:55:56 2010 @@ -37,15 +37,15 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** Test the LLRReducer +/** Test the LLRReducer * FIXME: Add negative test cases. */ @SuppressWarnings("deprecation") public class LLRReducerTest { - - private static final Logger log = + + private static final Logger log = LoggerFactory.getLogger(LLRReducerTest.class); - + Reporter reporter; LLCallback ll; LLCallback cl; @@ -54,11 +54,11 @@ OutputCollector<Text,DoubleWritable> collector = new OutputCollector<Text,DoubleWritable>() { @Override public void collect(Text key, DoubleWritable value) throws IOException { - log.info(key.toString() + " " + value.toString()); + LLRReducerTest.log.info(key.toString() + " " + value.toString()); } }; - - + + @Before public void setUp() { reporter = EasyMock.createMock(Reporter.class); @@ -66,7 +66,7 @@ cl = new LLCallback() { @Override public double logLikelihoodRatio(int k11, int k12, int k21, int k22) { - log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" + k22); + LLRReducerTest.log.info("k11:" + k11 + " k12:" + k12 + " k21:" + k21 + " k22:" + k22); try { return LogLikelihood.logLikelihoodRatio(k11, k12, k21, k22); } @@ -75,48 +75,48 @@ return -1; } } - + }; } - + @Test public void testReduce() throws Exception { LLRReducer reducer = new LLRReducer(ll); - - // test input, input[*][0] is the key, + + // test input, input[*][0] is the key, // input[*][1..n] are the values passed in via // the iterator. Gram[][] input = { - {new Gram("the best", 1), new Gram("the", 2, HEAD), new Gram("best", 1, TAIL) }, - {new Gram("best of", 1), new Gram("best", 1, HEAD), new Gram("of", 2, TAIL) }, - {new Gram("of times", 2), new Gram("of", 2, HEAD), new Gram("times", 2, TAIL) }, - {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the", 1, TAIL) }, - {new Gram("the worst", 1), new Gram("the", 2, HEAD), new Gram("worst", 1, TAIL) }, - {new Gram("worst of", 1), new Gram("worst", 1, HEAD), new Gram("of", 2, TAIL) } + {new Gram("the best", 1), new Gram("the", 2, HEAD), new Gram("best", 1, TAIL) }, + {new Gram("best of", 1), new Gram("best", 1, HEAD), new Gram("of", 2, TAIL) }, + {new Gram("of times", 2), new Gram("of", 2, HEAD), new Gram("times", 2, TAIL) }, + {new Gram("times the", 1), new Gram("times", 1, HEAD), new Gram("the", 1, TAIL) }, + {new Gram("the worst", 1), new Gram("the", 2, HEAD), new Gram("worst", 1, TAIL) }, + {new Gram("worst of", 1), new Gram("worst", 1, HEAD), new Gram("of", 2, TAIL) } }; - + int[][] expectations = { - // A+B, A+!B, !A+B, !A+!B - {1, 1, 0, 5}, // the best - {1, 0, 1, 5}, // best of - {2, 0, 0, 5}, // of times - {1, 0, 0, 6}, // times the - {1, 1, 0, 5}, // the worst - {1, 0, 1, 5} // worst of + // A+B, A+!B, !A+B, !A+!B + {1, 1, 0, 5}, // the best + {1, 0, 1, 5}, // best of + {2, 0, 0, 5}, // of times + {1, 0, 0, 6}, // times the + {1, 1, 0, 5}, // the worst + {1, 0, 1, 5} // worst of }; - + for (int[] ee: expectations) { EasyMock.expect(ll.logLikelihoodRatio(ee[0], ee[1], ee[2], ee[3])).andDelegateTo(cl); } - + EasyMock.replay(ll); - + JobConf config = new JobConf(CollocDriver.class); config.set(LLRReducer.NGRAM_TOTAL, "7"); reducer.configure(config); - + for (Gram[] ii: input) { List<Gram> vv = new LinkedList<Gram>(); for (int i = 1; i < ii.length; i++) { @@ -124,7 +124,7 @@ } reducer.reduce(ii[0], vv.iterator(), collector, reporter); } - + EasyMock.verify(ll); } } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/NGramCollectorTest.java Sat Feb 13 17:55:56 2010 @@ -42,41 +42,41 @@ */ @SuppressWarnings("deprecation") public class NGramCollectorTest { - + OutputCollector<Gram,Gram> collector; Reporter reporter; - + @Before @SuppressWarnings("unchecked") public void setUp() { collector = EasyMock.createMock(OutputCollector.class); reporter = EasyMock.createMock(Reporter.class); } - + @Test public void testCollectNgrams() throws Exception { - + String input = "the best of times the worst of times"; - - String[][] values = + + String[][] values = new String[][]{ - {"h_the", "the best"}, - {"t_best", "the best"}, - {"h_best", "best of"}, - {"t_of", "best of"}, - {"h_of", "of times"}, - {"t_times", "of times"}, - {"h_times", "times the"}, - {"t_the", "times the"}, - {"h_the", "the worst"}, - {"t_worst", "the worst"}, - {"h_worst", "worst of"}, - {"t_of", "worst of"}, - {"h_of", "of times"}, - {"t_times", "of times"} + {"h_the", "the best"}, + {"t_best", "the best"}, + {"h_best", "best of"}, + {"t_of", "best of"}, + {"h_of", "of times"}, + {"t_times", "of times"}, + {"h_times", "times the"}, + {"t_the", "times the"}, + {"h_the", "the worst"}, + {"t_worst", "the worst"}, + {"h_worst", "worst of"}, + {"t_of", "worst of"}, + {"h_of", "of times"}, + {"t_times", "of times"} }; // set up expectations for mocks. ngram max size = 2 - + // setup expectations for (String[] v: values) { Type p = v[0].startsWith("h") ? HEAD : TAIL; @@ -84,24 +84,24 @@ Gram ngram = new Gram(v[1]); collector.collect(subgram, ngram); } - + reporter.incrCounter(NGRAM_TOTAL, 7); EasyMock.replay(reporter, collector); Reader r = new StringReader(input); - + JobConf conf = new JobConf(); conf.set(NGramCollector.MAX_SHINGLE_SIZE, "2"); conf.set(NGramCollector.ANALYZER_CLASS, TestAnalyzer.class.getName()); - + NGramCollector c = new NGramCollector(); c.configure(conf); c.collectNgrams(r, collector, reporter); - + EasyMock.verify(reporter, collector); } - + /** A lucene 2.9 standard analyzer with no stopwords. */ public static class TestAnalyzer extends Analyzer { final Analyzer a; Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Sat Feb 13 17:55:56 2010 @@ -17,40 +17,40 @@ package org.apache.mahout.utils.vectors; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Random; + import org.apache.mahout.common.RandomUtils; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.math.function.UnaryFunction; import org.apache.mahout.math.Vector; - -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Random; +import org.apache.mahout.math.function.UnaryFunction; public class RandomVectorIterable implements Iterable<Vector>{ - + private int numItems = 100; public enum VectorType {DENSE, SPARSE} - + private VectorType type = VectorType.SPARSE; - + public RandomVectorIterable() { } - + public RandomVectorIterable(int numItems) { this.numItems = numItems; } - + public RandomVectorIterable(int numItems, VectorType type) { this.numItems = numItems; this.type = type; } - + @Override public Iterator<Vector> iterator() { return new VectIterator(); } - + private class VectIterator implements Iterator<Vector>{ private int count = 0; private final Random random = RandomUtils.getRandom(); @@ -58,7 +58,7 @@ public boolean hasNext() { return count < numItems; } - + @Override public Vector next() { if (!hasNext()) { @@ -74,7 +74,7 @@ count++; return result; } - + @Override public void remove() { throw new UnsupportedOperationException(); Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/SequenceFileVectorIterableTest.java Sat Feb 13 17:55:56 2010 @@ -17,6 +17,10 @@ package org.apache.mahout.utils.vectors; +import java.io.File; + +import junit.framework.Assert; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -27,13 +31,11 @@ import org.apache.mahout.math.VectorWritable; import org.apache.mahout.utils.vectors.io.SequenceFileVectorWriter; -import java.io.File; - public class SequenceFileVectorIterableTest extends MahoutTestCase { - + private File tmpLoc; private File tmpFile; - + @Override public void setUp() throws Exception { super.setUp(); @@ -44,14 +46,14 @@ tmpFile = File.createTempFile("sfvit", ".dat", tmpLoc); tmpFile.deleteOnExit(); } - + @Override public void tearDown() throws Exception { tmpFile.delete(); tmpLoc.delete(); super.tearDown(); } - + public void testIterable() throws Exception { Path path = new Path(tmpFile.getAbsolutePath()); Configuration conf = new Configuration(); @@ -61,7 +63,7 @@ RandomVectorIterable iter = new RandomVectorIterable(50); writer.write(iter); writer.close(); - + SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf); SequenceFileVectorIterable sfvi = new SequenceFileVectorIterable(seqReader); int count = 0; @@ -70,6 +72,6 @@ count++; } seqReader.close(); - assertEquals(50, count); + Assert.assertEquals(50, count); } } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterableTest.java Sat Feb 13 17:55:56 2010 @@ -17,253 +17,255 @@ package org.apache.mahout.utils.vectors.arff; +import java.text.DateFormat; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.Assert; + import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; -import java.text.DateFormat; -import java.util.Iterator; -import java.util.Map; - public class ARFFVectorIterableTest extends MahoutTestCase { - + public void testValues() throws Exception { StringBuilder builder = new StringBuilder(); builder.append("%comments").append('\n').append("@RELATION Mahout").append('\n') - .append("@ATTRIBUTE foo numeric").append('\n') - .append("@ATTRIBUTE bar numeric").append('\n') - .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n') - .append("@ATTRIBUTE junk string").append('\n') - .append("@ATTRIBUTE theNominal {c,b,a}").append('\n') - .append("@DATA").append('\n') - .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n') - .append("2,3").append('\n') - .append("{0 5,1 23}").append('\n'); + .append("@ATTRIBUTE foo numeric").append('\n') + .append("@ATTRIBUTE bar numeric").append('\n') + .append("@ATTRIBUTE timestamp DATE \"yyyy-MM-dd HH:mm:ss\"").append('\n') + .append("@ATTRIBUTE junk string").append('\n') + .append("@ATTRIBUTE theNominal {c,b,a}").append('\n') + .append("@DATA").append('\n') + .append("1,2, \"2009-01-01 5:55:55\", foo, c").append('\n') + .append("2,3").append('\n') + .append("{0 5,1 23}").append('\n'); ARFFModel model = new MapBackedARFFModel(); ARFFVectorIterable iterable = new ARFFVectorIterable(builder.toString(), model); - assertEquals("Mahout", iterable.getModel().getRelation()); + Assert.assertEquals("Mahout", iterable.getModel().getRelation()); Map<String, Integer> bindings = iterable.getModel().getLabelBindings(); - assertNotNull(bindings); - assertEquals(5, bindings.size()); + Assert.assertNotNull(bindings); + Assert.assertEquals(5, bindings.size()); Iterator<Vector> iter = iterable.iterator(); - assertTrue(iter.hasNext()); + Assert.assertTrue(iter.hasNext()); Vector next = iter.next(); - assertNotNull(next); - assertTrue("Wrong instanceof", next instanceof DenseVector); - assertEquals(1.0, next.get(0)); - assertEquals(2.0, next.get(1)); - assertTrue(iter.hasNext()); + Assert.assertNotNull(next); + Assert.assertTrue("Wrong instanceof", next instanceof DenseVector); + Assert.assertEquals(1.0, next.get(0)); + Assert.assertEquals(2.0, next.get(1)); + Assert.assertTrue(iter.hasNext()); next = iter.next(); - assertNotNull(next); - assertTrue("Wrong instanceof", next instanceof DenseVector); - assertEquals(2.0, next.get(0)); - assertEquals(3.0, next.get(1)); - - assertTrue(iter.hasNext()); + Assert.assertNotNull(next); + Assert.assertTrue("Wrong instanceof", next instanceof DenseVector); + Assert.assertEquals(2.0, next.get(0)); + Assert.assertEquals(3.0, next.get(1)); + + Assert.assertTrue(iter.hasNext()); next = iter.next(); - assertNotNull(next); - assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector); - assertEquals(5.0, next.get(0)); - assertEquals(23.0, next.get(1)); - - assertFalse(iter.hasNext()); + Assert.assertNotNull(next); + Assert.assertTrue("Wrong instanceof", next instanceof RandomAccessSparseVector); + Assert.assertEquals(5.0, next.get(0)); + Assert.assertEquals(23.0, next.get(1)); + + Assert.assertFalse(iter.hasNext()); } - + public void testDense() throws Exception { ARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_DENSE_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_DENSE_ARFF, model); int count = 0; for (Vector vector : iterable) { - assertTrue("Vector is not dense", vector instanceof DenseVector); + Assert.assertTrue("Vector is not dense", vector instanceof DenseVector); count++; } - assertEquals(10, count); + Assert.assertEquals(10, count); } - + public void testSparse() throws Exception { ARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(SAMPLE_SPARSE_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.SAMPLE_SPARSE_ARFF, model); int count = 0; for (Vector vector : iterable) { - assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); + Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); count++; } - assertEquals(10, count); + Assert.assertEquals(10, count); } - + public void testNonNumeric() throws Exception { - + MapBackedARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model); int count = 0; for (Vector vector : iterable) { - assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); + Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); count++; } - assertEquals(10, count); + Assert.assertEquals(10, count); Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap(); - assertNotNull(nominalMap); - assertEquals(1, nominalMap.size()); + Assert.assertNotNull(nominalMap); + Assert.assertEquals(1, nominalMap.size()); Map<String, Integer> noms = nominalMap.get("bar"); - assertNotNull("nominals for bar are null", noms); - assertEquals(2, noms.size()); + Assert.assertNotNull("nominals for bar are null", noms); + Assert.assertEquals(2, noms.size()); Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap(); - assertNotNull("Type map null", integerARFFTypeMap); - assertEquals(5, integerARFFTypeMap.size()); + Assert.assertNotNull("Type map null", integerARFFTypeMap); + Assert.assertEquals(5, integerARFFTypeMap.size()); Map<String, Long> words = model.getWords(); - assertNotNull("words null", words); - assertEquals(10, words.size()); + Assert.assertNotNull("words null", words); + Assert.assertEquals(10, words.size()); //System.out.println("Words: " + words); Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap(); - assertNotNull("date format null", integerDateFormatMap); - assertEquals(1, integerDateFormatMap.size()); - + Assert.assertNotNull("date format null", integerDateFormatMap); + Assert.assertEquals(1, integerDateFormatMap.size()); + } - + public void testMultipleNoms() throws Exception { MapBackedARFFModel model = new MapBackedARFFModel(); - ARFFVectorIterable iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF, model); + ARFFVectorIterable iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF, model); int count = 0; for (Vector vector : iterable) { - assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); + Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); count++; } - assertEquals(10, count); + Assert.assertEquals(10, count); Map<String, Map<String, Integer>> nominalMap = iterable.getModel().getNominalMap(); - assertNotNull(nominalMap); - assertEquals(1, nominalMap.size()); + Assert.assertNotNull(nominalMap); + Assert.assertEquals(1, nominalMap.size()); Map<String, Integer> noms = nominalMap.get("bar"); - assertNotNull("nominals for bar are null", noms); - assertEquals(2, noms.size()); + Assert.assertNotNull("nominals for bar are null", noms); + Assert.assertEquals(2, noms.size()); Map<Integer, ARFFType> integerARFFTypeMap = model.getTypeMap(); - assertNotNull("Type map null", integerARFFTypeMap); - assertEquals(5, integerARFFTypeMap.size()); + Assert.assertNotNull("Type map null", integerARFFTypeMap); + Assert.assertEquals(5, integerARFFTypeMap.size()); Map<String, Long> words = model.getWords(); - assertNotNull("words null", words); - assertEquals(10, words.size()); + Assert.assertNotNull("words null", words); + Assert.assertEquals(10, words.size()); //System.out.println("Words: " + words); Map<Integer, DateFormat> integerDateFormatMap = model.getDateMap(); - assertNotNull("date format null", integerDateFormatMap); - assertEquals(1, integerDateFormatMap.size()); + Assert.assertNotNull("date format null", integerDateFormatMap); + Assert.assertEquals(1, integerDateFormatMap.size()); model = new MapBackedARFFModel(model.getWords(), model.getWordCount(), - model.getNominalMap()); - iterable = new ARFFVectorIterable(NON_NUMERIC_ARFF2, model); + model.getNominalMap()); + iterable = new ARFFVectorIterable(ARFFVectorIterableTest.NON_NUMERIC_ARFF2, model); count = 0; for (Vector vector : iterable) { - assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); + Assert.assertTrue("Vector is not dense", vector instanceof RandomAccessSparseVector); count++; } nominalMap = model.getNominalMap(); - assertNotNull(nominalMap); - assertEquals(2, nominalMap.size()); + Assert.assertNotNull(nominalMap); + Assert.assertEquals(2, nominalMap.size()); noms = nominalMap.get("test"); - assertNotNull("nominals for bar are null", noms); - assertEquals(2, noms.size()); + Assert.assertNotNull("nominals for bar are null", noms); + Assert.assertEquals(2, noms.size()); } - - + + private static final String SAMPLE_DENSE_ARFF = " % Comments\n" + - " % \n" + - " % Comments go here" + - " % \n" + - " @RELATION Mahout\n" + - '\n' + - " @ATTRIBUTE foo NUMERIC\n" + - " @ATTRIBUTE bar NUMERIC\n" + - " @ATTRIBUTE hockey NUMERIC\n" + - " @ATTRIBUTE football NUMERIC\n" + - " \n" + - '\n' + - '\n' + - " @DATA\n" + - " 23.1,3.23,1.2,0.2\n" + - " 2.9,3.0,1.2,0.2\n" + - " 2.7,3.2,1.3,0.2\n" + - " 2.6,3.1,1.23,0.2\n" + - " 23.0,3.6,1.2,0.2\n" + - " 23.2,3.9,1.7,0.2\n" + - " 2.6,3.2,1.2,0.3\n" + - " 23.0,3.2,1.23,0.2\n" + - " 2.2,2.9,1.2,0.2\n" + - " 2.9,3.1,1.23,0.1\n"; - - + " % \n" + + " % Comments go here" + + " % \n" + + " @RELATION Mahout\n" + + '\n' + + " @ATTRIBUTE foo NUMERIC\n" + + " @ATTRIBUTE bar NUMERIC\n" + + " @ATTRIBUTE hockey NUMERIC\n" + + " @ATTRIBUTE football NUMERIC\n" + + " \n" + + '\n' + + '\n' + + " @DATA\n" + + " 23.1,3.23,1.2,0.2\n" + + " 2.9,3.0,1.2,0.2\n" + + " 2.7,3.2,1.3,0.2\n" + + " 2.6,3.1,1.23,0.2\n" + + " 23.0,3.6,1.2,0.2\n" + + " 23.2,3.9,1.7,0.2\n" + + " 2.6,3.2,1.2,0.3\n" + + " 23.0,3.2,1.23,0.2\n" + + " 2.2,2.9,1.2,0.2\n" + + " 2.9,3.1,1.23,0.1\n"; + + private static final String SAMPLE_SPARSE_ARFF = " % Comments\n" + - " % \n" + - " % Comments go here" + - " % \n" + - " @RELATION Mahout\n" + - '\n' + - " @ATTRIBUTE foo NUMERIC\n" + - " @ATTRIBUTE bar NUMERIC\n" + - " @ATTRIBUTE hockey NUMERIC\n" + - " @ATTRIBUTE football NUMERIC\n" + - " @ATTRIBUTE tennis NUMERIC\n" + - " \n" + - '\n' + - '\n' + - " @DATA\n" + - " {1 23.1,2 3.23,3 1.2,4 0.2}\n" + - " {0 2.9}\n" + - " {0 2.7,2 3.2,3 1.3,4 0.2}\n" + - " {1 2.6,2 3.1,3 1.23,4 0.2}\n" + - " {1 23.0,2 3.6,3 1.2,4 0.2}\n" + - " {0 23.2,1 3.9,3 1.7,4 0.2}\n" + - " {0 2.6,1 3.2,2 1.2,4 0.3}\n" + - " {1 23.0,2 3.2,3 1.23}\n" + - " {1 2.2,2 2.94 0.2}\n" + - " {1 2.9,2 3.1}\n"; - + " % \n" + + " % Comments go here" + + " % \n" + + " @RELATION Mahout\n" + + '\n' + + " @ATTRIBUTE foo NUMERIC\n" + + " @ATTRIBUTE bar NUMERIC\n" + + " @ATTRIBUTE hockey NUMERIC\n" + + " @ATTRIBUTE football NUMERIC\n" + + " @ATTRIBUTE tennis NUMERIC\n" + + " \n" + + '\n' + + '\n' + + " @DATA\n" + + " {1 23.1,2 3.23,3 1.2,4 0.2}\n" + + " {0 2.9}\n" + + " {0 2.7,2 3.2,3 1.3,4 0.2}\n" + + " {1 2.6,2 3.1,3 1.23,4 0.2}\n" + + " {1 23.0,2 3.6,3 1.2,4 0.2}\n" + + " {0 23.2,1 3.9,3 1.7,4 0.2}\n" + + " {0 2.6,1 3.2,2 1.2,4 0.3}\n" + + " {1 23.0,2 3.2,3 1.23}\n" + + " {1 2.2,2 2.94 0.2}\n" + + " {1 2.9,2 3.1}\n"; + private static final String NON_NUMERIC_ARFF = " % Comments\n" + - " % \n" + - " % Comments go here" + - " % \n" + - " @RELATION Mahout\n" + - '\n' + - " @ATTRIBUTE junk NUMERIC\n" + - " @ATTRIBUTE foo NUMERIC\n" + - " @ATTRIBUTE bar {c,d}\n" + - " @ATTRIBUTE hockey string\n" + - " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" + - " \n" + - '\n' + - '\n' + - " @DATA\n" + - " {2 c,3 gretzky,4 1973-10-23}\n" + - " {1 2.9,2 d,3 orr,4 1973-11-23}\n" + - " {2 c,3 bossy,4 1981-10-23}\n" + - " {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" + - " {3 esposito,4 1973-04-23}\n" + - " {1 23.2,2 d,3 chelios,4 1999-2-23}\n" + - " {3 richard,4 1973-10-12}\n" + - " {3 howe,4 1983-06-23}\n" + - " {0 2.2,2 d,3 messier,4 2008-11-23}\n" + - " {2 c,3 roy,4 1973-10-13}\n"; - + " % \n" + + " % Comments go here" + + " % \n" + + " @RELATION Mahout\n" + + '\n' + + " @ATTRIBUTE junk NUMERIC\n" + + " @ATTRIBUTE foo NUMERIC\n" + + " @ATTRIBUTE bar {c,d}\n" + + " @ATTRIBUTE hockey string\n" + + " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" + + " \n" + + '\n' + + '\n' + + " @DATA\n" + + " {2 c,3 gretzky,4 1973-10-23}\n" + + " {1 2.9,2 d,3 orr,4 1973-11-23}\n" + + " {2 c,3 bossy,4 1981-10-23}\n" + + " {1 2.6,2 c,3 lefleur,4 1989-10-23}\n" + + " {3 esposito,4 1973-04-23}\n" + + " {1 23.2,2 d,3 chelios,4 1999-2-23}\n" + + " {3 richard,4 1973-10-12}\n" + + " {3 howe,4 1983-06-23}\n" + + " {0 2.2,2 d,3 messier,4 2008-11-23}\n" + + " {2 c,3 roy,4 1973-10-13}\n"; + private static final String NON_NUMERIC_ARFF2 = " % Comments\n" + - " % \n" + - " % Comments go here" + - " % \n" + - " @RELATION Mahout\n" + - '\n' + - " @ATTRIBUTE junk NUMERIC\n" + - " @ATTRIBUTE foo NUMERIC\n" + - " @ATTRIBUTE test {f,z}\n" + - " @ATTRIBUTE hockey string\n" + - " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" + - " \n" + - '\n' + - '\n' + - " @DATA\n" + - " {2 f,3 gretzky,4 1973-10-23}\n" + - " {1 2.9,2 z,3 orr,4 1973-11-23}\n" + - " {2 f,3 bossy,4 1981-10-23}\n" + - " {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" + - " {3 esposito,4 1973-04-23}\n" + - " {1 23.2,2 z,3 chelios,4 1999-2-23}\n" + - " {3 richard,4 1973-10-12}\n" + - " {3 howe,4 1983-06-23}\n" + - " {0 2.2,2 f,3 messier,4 2008-11-23}\n" + - " {2 f,3 roy,4 1973-10-13}\n"; + " % \n" + + " % Comments go here" + + " % \n" + + " @RELATION Mahout\n" + + '\n' + + " @ATTRIBUTE junk NUMERIC\n" + + " @ATTRIBUTE foo NUMERIC\n" + + " @ATTRIBUTE test {f,z}\n" + + " @ATTRIBUTE hockey string\n" + + " @ATTRIBUTE football date \"yyyy-MM-dd\"\n" + + " \n" + + '\n' + + '\n' + + " @DATA\n" + + " {2 f,3 gretzky,4 1973-10-23}\n" + + " {1 2.9,2 z,3 orr,4 1973-11-23}\n" + + " {2 f,3 bossy,4 1981-10-23}\n" + + " {1 2.6,2 f,3 lefleur,4 1989-10-23}\n" + + " {3 esposito,4 1973-04-23}\n" + + " {1 23.2,2 z,3 chelios,4 1999-2-23}\n" + + " {3 richard,4 1973-10-12}\n" + + " {3 howe,4 1983-06-23}\n" + + " {0 2.2,2 f,3 messier,4 2008-11-23}\n" + + " {2 f,3 roy,4 1973-10-13}\n"; } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/VectorWriterTest.java Sat Feb 13 17:55:56 2010 @@ -17,27 +17,29 @@ package org.apache.mahout.utils.vectors.io; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileSystem; +import java.io.File; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.utils.vectors.RandomVectorIterable; -import java.io.File; -import java.io.StringWriter; -import java.util.List; -import java.util.ArrayList; - public class VectorWriterTest extends MahoutTestCase { - + private File tmpLoc; private File tmpFile; - + @Override public void setUp() throws Exception { super.setUp(); @@ -48,14 +50,14 @@ tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc); tmpFile.deleteOnExit(); } - + @Override public void tearDown() throws Exception { tmpFile.delete(); tmpLoc.delete(); super.tearDown(); } - + public void testSFVW() throws Exception { Path path = new Path(tmpFile.getAbsolutePath()); Configuration conf = new Configuration(); @@ -65,7 +67,7 @@ RandomVectorIterable iter = new RandomVectorIterable(50); writer.write(iter); writer.close(); - + SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf); LongWritable key = new LongWritable(); VectorWritable value = new VectorWritable(); @@ -73,9 +75,9 @@ while (seqReader.next(key, value)){ count++; } - assertEquals(count + " does not equal: " + 50, 50, count); + Assert.assertEquals(count + " does not equal: " + 50, 50, count); } - + public void test() throws Exception { StringWriter strWriter = new StringWriter(); VectorWriter writer = new JWriterVectorWriter(strWriter); @@ -85,8 +87,8 @@ writer.write(vectors); writer.close(); StringBuffer buffer = strWriter.getBuffer(); - assertNotNull(buffer); - assertTrue(buffer.length() > 0); - + Assert.assertNotNull(buffer); + Assert.assertTrue(buffer.length() > 0); + } } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Sat Feb 13 17:55:56 2010 @@ -17,63 +17,65 @@ package org.apache.mahout.utils.vectors.lucene; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexReader; +import junit.framework.Assert; + import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Field; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.math.RandomAccessSparseVector; -import org.apache.mahout.utils.vectors.Weight; +import org.apache.mahout.math.Vector; import org.apache.mahout.utils.vectors.TFIDF; import org.apache.mahout.utils.vectors.TermInfo; -import org.apache.mahout.math.Vector; +import org.apache.mahout.utils.vectors.Weight; public class LuceneIterableTest extends MahoutTestCase { private RAMDirectory directory; - + private static final String [] DOCS = { - "The quick red fox jumped over the lazy brown dogs.", - "Mary had a little lamb whose fleece was white as snow.", - "Moby Dick is a story of a whale and a man obsessed.", - "The robber wore a black fleece jacket and a baseball cap.", - "The English Springer Spaniel is the best of all dogs." - }; - - + "The quick red fox jumped over the lazy brown dogs.", + "Mary had a little lamb whose fleece was white as snow.", + "Moby Dick is a story of a whale and a man obsessed.", + "The robber wore a black fleece jacket and a baseball cap.", + "The English Springer Spaniel is the best of all dogs." + }; + + @Override protected void setUp() throws Exception { super.setUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); - for (int i = 0; i < DOCS.length; i++){ + for (int i = 0; i < LuceneIterableTest.DOCS.length; i++){ Document doc = new Document(); Field id = new Field("id", "doc_" + i, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(id); //Store both position and offset information - Field text = new Field("content", DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); + Field text = new Field("content", LuceneIterableTest.DOCS[i], Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc.add(text); writer.addDocument(doc); } writer.close(); } - + public void testIterable() throws Exception { IndexReader reader = IndexReader.open(directory, true); Weight weight = new TFIDF(); TermInfo termInfo = new CachedTermInfo(reader, "content", 1, 100); VectorMapper mapper = new TFDFMapper(reader, weight, termInfo); LuceneIterable iterable = new LuceneIterable(reader, "id", "content", mapper); - + //TODO: do something more meaningful here for (Vector vector : iterable) { - assertNotNull(vector); - assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector); - assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0); + Assert.assertNotNull(vector); + Assert.assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector); + Assert.assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0); } } - - + + } Modified: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java?rev=909861&r1=909860&r2=909861&view=diff ============================================================================== --- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java (original) +++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/text/DictionaryVectorizerTest.java Sat Feb 13 17:55:56 2010 @@ -52,46 +52,48 @@ public static final String DELIM = " .,?;:!\t\n\r"; public static final String ERRORSET = "`1234567890" - + "-...@#$%^&*()_+[]{}'\"/<>|\\"; + + "-...@#$%^&*()_+[]{}'\"/<>|\\"; private static final Random random = RandomUtils.getRandom(); private FileSystem fs; private static char getRandomDelimiter() { - return DELIM.charAt(random.nextInt(DELIM.length())); + return DictionaryVectorizerTest.DELIM.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.DELIM.length())); } public static String getRandomDocument() { - int length = (AVG_DOCUMENT_LENGTH >> 1) - + random.nextInt(AVG_DOCUMENT_LENGTH); - StringBuilder sb = new StringBuilder(length * AVG_SENTENCE_LENGTH - * AVG_WORD_LENGTH); + int length = (DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH >> 1) + + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_DOCUMENT_LENGTH); + StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_SENTENCE_LENGTH + * DictionaryVectorizerTest.AVG_WORD_LENGTH); for (int i = 0; i < length; i++) { - sb.append(getRandomSentence()); + sb.append(DictionaryVectorizerTest.getRandomSentence()); } return sb.toString(); } public static String getRandomSentence() { - int length = (AVG_SENTENCE_LENGTH >> 1) - + random.nextInt(AVG_SENTENCE_LENGTH); - StringBuilder sb = new StringBuilder(length * AVG_WORD_LENGTH); + int length = (DictionaryVectorizerTest.AVG_SENTENCE_LENGTH >> 1) + + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_SENTENCE_LENGTH); + StringBuilder sb = new StringBuilder(length * DictionaryVectorizerTest.AVG_WORD_LENGTH); for (int i = 0; i < length; i++) { - sb.append(getRandomString()).append(' '); + sb.append(DictionaryVectorizerTest.getRandomString()).append(' '); } - sb.append(getRandomDelimiter()); + sb.append(DictionaryVectorizerTest.getRandomDelimiter()); return sb.toString(); } public static String getRandomString() { - int length = (AVG_WORD_LENGTH >> 1) + random.nextInt(AVG_WORD_LENGTH); + int length = (DictionaryVectorizerTest.AVG_WORD_LENGTH >> 1) + DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.AVG_WORD_LENGTH); StringBuilder sb = new StringBuilder(length); for (int i = 0; i < length; i++) { - sb.append(CHARSET.charAt(random.nextInt(CHARSET.length()))); + sb.append(DictionaryVectorizerTest.CHARSET.charAt(DictionaryVectorizerTest.random.nextInt(DictionaryVectorizerTest.CHARSET.length()))); + } + if (DictionaryVectorizerTest.random.nextInt(10) == 0) { + sb.append(DictionaryVectorizerTest.ERRORSET.charAt(DictionaryVectorizerTest.random + .nextInt(DictionaryVectorizerTest.ERRORSET.length()))); } - if (random.nextInt(10) == 0) sb.append(ERRORSET.charAt(random - .nextInt(ERRORSET.length()))); return sb.toString(); } @@ -101,7 +103,7 @@ if (f.isDirectory()) { String[] contents = f.list(); for (String content : contents) { - rmr(f.toString() + File.separator + content); + DictionaryVectorizerTest.rmr(f.toString() + File.separator + content); } } f.delete(); @@ -111,31 +113,31 @@ @Override public void setUp() throws Exception { super.setUp(); - rmr("output"); - rmr("testdata"); + DictionaryVectorizerTest.rmr("output"); + DictionaryVectorizerTest.rmr("testdata"); Configuration conf = new Configuration(); fs = FileSystem.get(conf); } public void testCreateTermFrequencyVectors() throws IOException, - InterruptedException, - ClassNotFoundException, - URISyntaxException { + InterruptedException, + ClassNotFoundException, + URISyntaxException { Configuration conf = new Configuration(); String pathString = "testdata/documents/docs.file"; Path path = new Path(pathString); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, - Text.class, Text.class); + Text.class, Text.class); - for (int i = 0; i < NUM_DOCS; i++) { + for (int i = 0; i < DictionaryVectorizerTest.NUM_DOCS; i++) { writer.append(new Text("Document::ID::" + i), new Text( - getRandomDocument())); + DictionaryVectorizerTest.getRandomDocument())); } writer.close(); Class<? extends Analyzer> analyzer = new StandardAnalyzer( - Version.LUCENE_CURRENT).getClass(); + Version.LUCENE_CURRENT).getClass(); DocumentProcessor.tokenizeDocuments(pathString, analyzer, - "output/tokenized-documents"); + "output/tokenized-documents"); DictionaryVectorizer.createTermFrequencyVectors("output/tokenized-documents", "output/wordcount", 2, 1, 0.0f, 1, 100, false); TFIDFConverter.processTfIdf("output/wordcount/vectors", "output/tfidf/", 100, 1, 99, 1.0f, false);