Repository: mahout Updated Branches: refs/heads/master 708cc4f2b -> 82e78a8c9
http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java ---------------------------------------------------------------------- diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java deleted file mode 100644 index d61197c..0000000 --- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java +++ /dev/null @@ -1,174 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.text; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.lucene.search.TermQuery; -import org.apache.mahout.common.HadoopUtil; -import org.apache.mahout.text.doc.MultipleFieldsDocument; -import org.apache.mahout.text.doc.SingleFieldDocument; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -@Deprecated -public class SequenceFilesFromLuceneStorageDriverTest extends AbstractLuceneStorageTest { - - private SequenceFilesFromLuceneStorageDriver driver; - private LuceneStorageConfiguration lucene2SeqConf; - private String idField; - private List<String> fields; - private Path seqFilesOutputPath; - private Configuration conf; - - @Before - public void before() throws Exception { - conf = getConfiguration(); - conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," - + "org.apache.hadoop.io.serializer.WritableSerialization"); - - seqFilesOutputPath = new Path(getTestTempDirPath(), "seqfiles"); - idField = SingleFieldDocument.ID_FIELD; - fields = Collections.singletonList("field"); - - driver = new SequenceFilesFromLuceneStorageDriver() { - @Override - public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration configuration, List<Path> indexPaths, Path seqPath, String idField, List<String> fields) { - lucene2SeqConf = new LuceneStorageConfiguration(configuration, indexPaths, seqPath, idField, fields); - return lucene2SeqConf; - } - }; - } - - @After - public void after() throws IOException { - HadoopUtil.delete(conf, seqFilesOutputPath); - HadoopUtil.delete(conf, getIndexPath1()); - } - - @Test - public void testNewLucene2SeqConfiguration() { - lucene2SeqConf = driver.newLucene2SeqConfiguration(conf, - Collections.singletonList(new Path(getIndexPath1().toString())), - seqFilesOutputPath, - idField, - fields); - - assertEquals(conf, lucene2SeqConf.getConfiguration()); - assertEquals(Collections.singletonList(getIndexPath1()), lucene2SeqConf.getIndexPaths()); - assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); - assertEquals(idField, lucene2SeqConf.getIdField()); - assertEquals(fields, lucene2SeqConf.getFields()); - } - - @Test - public void testRun() throws Exception { - List<MultipleFieldsDocument> docs = - Collections.singletonList(new MultipleFieldsDocument("123", "test 1", "test 2", "test 3")); - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.get(0)); - - String queryField = "queryfield"; - String queryTerm = "queryterm"; - String maxHits = "500"; - String field1 = "field1"; - String field2 = "field2"; - - String[] args = { - "-i", getIndexPath1AsFile().toString(), - "-o", seqFilesOutputPath.toString(), - "-id", idField, - "-f", field1 + "," + field2, - "-q", queryField + ":" + queryTerm, - "-n", maxHits, - "-xm", "sequential" - }; - - driver.setConf(conf); - driver.run(args); - assertEquals(1, lucene2SeqConf.getIndexPaths().size()); - assertEquals(getIndexPath1().toUri().getPath(), lucene2SeqConf.getIndexPaths().get(0).toUri().getPath()); - assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); - assertEquals(idField, lucene2SeqConf.getIdField()); - assertEquals(Arrays.asList(field1, field2), lucene2SeqConf.getFields()); - - assertTrue(lucene2SeqConf.getQuery() instanceof TermQuery); - assertEquals(queryField, ((TermQuery) lucene2SeqConf.getQuery()).getTerm().field()); - assertEquals(queryTerm, ((TermQuery) lucene2SeqConf.getQuery()).getTerm().text()); - assertEquals(new Integer(maxHits), (Integer) lucene2SeqConf.getMaxHits()); - } - - @Test - public void testRunOptionalArguments() throws Exception { - commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool")); - commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool")); - - String[] args = { - "-i", getIndexPath1AsFile().toString(), - "-o", seqFilesOutputPath.toString(), - "-id", idField, - "-f", StringUtils.join(fields, SequenceFilesFromLuceneStorageDriver.SEPARATOR_FIELDS) - }; - - driver.setConf(conf); - driver.run(args); - - assertEquals(1, lucene2SeqConf.getIndexPaths().size()); - assertEquals(getIndexPath1().toUri().getPath(), lucene2SeqConf.getIndexPaths().get(0).toUri().getPath()); - assertEquals(seqFilesOutputPath, lucene2SeqConf.getSequenceFilesOutputPath()); - assertEquals(idField, lucene2SeqConf.getIdField()); - assertEquals(fields, lucene2SeqConf.getFields()); - assertEquals(conf, lucene2SeqConf.getConfiguration()); - - assertEquals(SequenceFilesFromLuceneStorageDriver.DEFAULT_QUERY, lucene2SeqConf.getQuery()); - assertEquals(SequenceFilesFromLuceneStorageDriver.DEFAULT_MAX_HITS, lucene2SeqConf.getMaxHits()); - } - - @Test - public void testRunInvalidQuery() throws Exception { - commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("1", "Mahout is cool")); - commitDocuments(getDirectory(getIndexPath1AsFile()), new SingleFieldDocument("2", "Mahout is cool")); - - String[] args = { - "-i", getIndexPath1AsFile().toString(), - "-o", seqFilesOutputPath.toString(), - "-id", idField, - "-f", StringUtils.join(fields, SequenceFilesFromLuceneStorageDriver.SEPARATOR_FIELDS), - "-q", "invalid:query", - "-xm", "sequential" - }; - - driver.setConf(conf); - driver.run(args); - assertTrue(FileSystem.get(conf).exists(seqFilesOutputPath)); - //shouldn't be any real files in the seq files out path - } - - @Test - public void testHelp() throws Exception { - driver = new SequenceFilesFromLuceneStorageDriver(); - driver.run(new String[]{"--help"}); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java ---------------------------------------------------------------------- diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java deleted file mode 100644 index 94bec2b..0000000 --- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJobTest.java +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.text; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Text; -import org.apache.mahout.common.HadoopUtil; -import org.apache.mahout.common.Pair; -import org.apache.mahout.text.doc.SingleFieldDocument; -import org.apache.mahout.text.doc.TestDocument; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import static java.util.Arrays.asList; -@Deprecated -public class SequenceFilesFromLuceneStorageMRJobTest extends AbstractLuceneStorageTest { - - private SequenceFilesFromLuceneStorageMRJob lucene2seq; - private LuceneStorageConfiguration lucene2SeqConf; - - @Before - public void before() throws IOException { - lucene2seq = new SequenceFilesFromLuceneStorageMRJob(); - Configuration configuration = getConfiguration(); - Path seqOutputPath = new Path(getTestTempDirPath(), "seqOutputPath");//don't make the output directory - lucene2SeqConf = new LuceneStorageConfiguration(configuration, asList(getIndexPath1(), getIndexPath2()), - seqOutputPath, SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD)); - } - - @After - public void after() throws IOException { - HadoopUtil.delete(lucene2SeqConf.getConfiguration(), lucene2SeqConf.getSequenceFilesOutputPath()); - HadoopUtil.delete(lucene2SeqConf.getConfiguration(), lucene2SeqConf.getIndexPaths()); - } - - @Test - public void testRun() throws IOException { - //Two commit points, each in two diff. Directories - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(1000, 1500)); - - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(500, 1000)); - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(1500, 2000)); - commitDocuments(getDirectory(getIndexPath1AsFile()), misshapenDocs); - lucene2seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - Map<String, Text> map = new HashMap<>(); - while (iterator.hasNext()) { - Pair<Text, Text> next = iterator.next(); - map.put(next.getFirst().toString(), next.getSecond()); - } - assertEquals(docs.size() + misshapenDocs.size(), map.size()); - for (TestDocument doc : docs) { - Text value = map.get(doc.getId()); - assertNotNull(value); - assertEquals(value.toString(), doc.getField()); - } - for (TestDocument doc : misshapenDocs) { - Text value = map.get(doc.getId()); - assertNotNull(value); - assertEquals(value.toString(), doc.getField()); - } - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java ---------------------------------------------------------------------- diff --git a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java b/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java deleted file mode 100644 index 09c2dd7..0000000 --- a/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java +++ /dev/null @@ -1,244 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.mahout.text; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Text; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.mahout.common.HadoopUtil; -import org.apache.mahout.common.Pair; -import org.apache.mahout.text.doc.MultipleFieldsDocument; -import org.apache.mahout.text.doc.NumericFieldDocument; -import org.apache.mahout.text.doc.SingleFieldDocument; -import org.apache.mahout.text.doc.TestDocument; -import org.apache.mahout.text.doc.UnstoredFieldsDocument; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -@Deprecated -public class SequenceFilesFromLuceneStorageTest extends AbstractLuceneStorageTest { - - private SequenceFilesFromLuceneStorage lucene2Seq; - private LuceneStorageConfiguration lucene2SeqConf; - private Path seqFilesOutputPath; - private Configuration configuration; - - @Before - public void before() throws IOException { - configuration = getConfiguration(); - seqFilesOutputPath = new Path(getTestTempDirPath(), "seqfiles"); - - lucene2Seq = new SequenceFilesFromLuceneStorage(); - lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Arrays.asList(getIndexPath1(), getIndexPath2()), seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD)); - } - - @After - public void after() throws IOException { - HadoopUtil.delete(lucene2SeqConf.getConfiguration(), lucene2SeqConf.getSequenceFilesOutputPath()); - HadoopUtil.delete(lucene2SeqConf.getConfiguration(), lucene2SeqConf.getIndexPaths()); - } - - @Test - public void testRun2Directories() throws Exception { - //Two commit points, each in two diff. Directories - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(1000, 1500)); - - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(500, 1000)); - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(1500, 2000)); - - commitDocuments(getDirectory(getIndexPath1AsFile()), misshapenDocs); - lucene2Seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - Map<String, Text> map = new HashMap<>(); - while (iterator.hasNext()) { - Pair<Text, Text> next = iterator.next(); - map.put(next.getFirst().toString(), next.getSecond()); - } - assertEquals(docs.size() + misshapenDocs.size(), map.size()); - for (TestDocument doc : docs) { - Text value = map.get(doc.getId()); - assertNotNull(value); - assertEquals(value.toString(), doc.getField()); - } - for (TestDocument doc : misshapenDocs) { - Text value = map.get(doc.getId()); - assertNotNull(value); - assertEquals(value.toString(), doc.getField()); - } - } - - @Test(expected = IllegalArgumentException.class) - public void testRunUnstoredFields() throws IOException { - commitDocuments(getDirectory(getIndexPath1AsFile()), new UnstoredFieldsDocument("5", "This is test document 5")); - - LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, Arrays.asList(UnstoredFieldsDocument.FIELD, UnstoredFieldsDocument.UNSTORED_FIELD)); - - lucene2Seq.run(lucene2SeqConf); - } - - @Test - public void testRunMaxHits() throws IOException { - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(1000, 1500)); - - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(500, 1000)); - commitDocuments(getDirectory(getIndexPath2AsFile()), docs.subList(1500, 2000)); - - lucene2SeqConf.setMaxHits(3); - lucene2Seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - assertTrue(iterator.hasNext()); - iterator.next(); - assertTrue(iterator.hasNext()); - iterator.next(); - assertTrue(iterator.hasNext()); - iterator.next(); - assertFalse(iterator.hasNext()); - } - - @Test - public void testRunQuery() throws IOException { - commitDocuments(getDirectory(getIndexPath1AsFile()), docs); - LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, Collections.singletonList(SingleFieldDocument.FIELD)); - - Query query = new TermQuery(new Term(lucene2SeqConf.getFields().get(0), "599")); - - lucene2SeqConf.setQuery(query); - lucene2Seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - assertTrue(iterator.hasNext()); - Pair<Text, Text> next = iterator.next(); - assertTrue(next.getSecond().toString().contains("599")); - assertFalse(iterator.hasNext()); - } - - @Test - public void testRunMultipleFields() throws IOException { - LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, - Arrays.asList(MultipleFieldsDocument.FIELD, MultipleFieldsDocument.FIELD1, MultipleFieldsDocument.FIELD2)); - - MultipleFieldsDocument multipleFieldsDocument1 = - new MultipleFieldsDocument("1", "This is field 1-1", "This is field 1-2", "This is field 1-3"); - MultipleFieldsDocument multipleFieldsDocument2 = - new MultipleFieldsDocument("2", "This is field 2-1", "This is field 2-2", "This is field 2-3"); - MultipleFieldsDocument multipleFieldsDocument3 = - new MultipleFieldsDocument("3", "This is field 3-1", "This is field 3-2", "This is field 3-3"); - commitDocuments(getDirectory(getIndexPath1AsFile()), multipleFieldsDocument1, - multipleFieldsDocument2, multipleFieldsDocument3); - - lucene2Seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - - assertMultipleFieldsDocumentEquals(multipleFieldsDocument1, iterator.next()); - assertMultipleFieldsDocumentEquals(multipleFieldsDocument2, iterator.next()); - assertMultipleFieldsDocumentEquals(multipleFieldsDocument3, iterator.next()); - } - - @Test - public void testRunNumericField() throws IOException { - LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, Arrays.asList(NumericFieldDocument.FIELD, NumericFieldDocument.NUMERIC_FIELD)); - - NumericFieldDocument doc1 = new NumericFieldDocument("1", "This is field 1", 100); - NumericFieldDocument doc2 = new NumericFieldDocument("2", "This is field 2", 200); - NumericFieldDocument doc3 = new NumericFieldDocument("3", "This is field 3", 300); - - commitDocuments(getDirectory(getIndexPath1AsFile()), doc1, doc2, doc3); - - lucene2Seq.run(lucene2SeqConf); - - Iterator<Pair<Text, Text>> iterator = lucene2SeqConf.getSequenceFileIterator(); - - assertNumericFieldEquals(doc1, iterator.next()); - assertNumericFieldEquals(doc2, iterator.next()); - assertNumericFieldEquals(doc3, iterator.next()); - } - - @Test(expected = IllegalArgumentException.class) - public void testNonExistingIdField() throws IOException { - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); - - lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), - seqFilesOutputPath, - "nonExistingField", - Collections.singletonList(SingleFieldDocument.FIELD)); - - lucene2Seq.run(lucene2SeqConf); - } - - @Test(expected = IllegalArgumentException.class) - public void testNonExistingField() throws IOException { - commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); - - lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), - seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, - Arrays.asList(SingleFieldDocument.FIELD, "nonExistingField")); - - lucene2Seq.run(lucene2SeqConf); - } - - @Test(expected = IllegalArgumentException.class) - public void testIndexedButNotStoredField() throws IOException { - SingleFieldDocument document = new SingleFieldDocument("id", "field") { - @Override - public Document asLuceneDocument() { - Document document = super.asLuceneDocument(); - document.add(new TextField("indexed", "This text is indexed", Field.Store.NO)); - return document; - } - }; - commitDocuments(getDirectory(getIndexPath1AsFile()), document); - - lucene2SeqConf = new LuceneStorageConfiguration(configuration, - Collections.singletonList(getIndexPath1()), - seqFilesOutputPath, - SingleFieldDocument.ID_FIELD, - Arrays.asList(SingleFieldDocument.FIELD, "indexed")); - - lucene2Seq.run(lucene2SeqConf); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/src/conf/driver.classes.default.props ---------------------------------------------------------------------- diff --git a/src/conf/driver.classes.default.props b/src/conf/driver.classes.default.props index 69a9ba5..cc57f1f 100644 --- a/src/conf/driver.classes.default.props +++ b/src/conf/driver.classes.default.props @@ -13,7 +13,6 @@ org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles = seq2sparse: Sparse org.apache.mahout.vectorizer.EncodedVectorsFromSequenceFiles = seq2encoded: Encoded Sparse Vector generation from Text sequence files org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file org.apache.mahout.text.SequenceFilesFromMailArchives = seqmailarchives : Creates SequenceFile from a directory containing gzipped mail archives -org.apache.mahout.text.SequenceFilesFromLuceneStorageDriver = lucene2seq : Generate Text SequenceFiles from a Lucene index org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles = resplit : Splits a set of SequenceFiles into a number of equal splits org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer = qualcluster : Runs clustering experiments and summarizes results in a CSV org.apache.mahout.classifier.df.tools.Describe = describe : Describe the fields and target variable in a data set
