MAHOUT-2042 and MAHOUT-2045 Delete directories which were moved/no longer in use
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/e0573de3 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/e0573de3 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/e0573de3 Branch: refs/heads/branch-0.14.0 Commit: e0573de33887e9d0909f1a5c34313680916b5aee Parents: 0908c52 Author: Trevor a.k.a @rawkintrevo <[email protected]> Authored: Wed Jun 27 09:51:06 2018 -0500 Committer: Trevor a.k.a @rawkintrevo <[email protected]> Committed: Wed Jun 27 09:51:06 2018 -0500 ---------------------------------------------------------------------- bin/mahout.bu | 395 --- community/mahout-mr/conf/arff.vector.props | 9 + community/mahout-mr/conf/canopy.props | 14 + community/mahout-mr/conf/cat.props | 4 + community/mahout-mr/conf/cleansvd.props | 3 + community/mahout-mr/conf/clusterdump.props | 0 community/mahout-mr/conf/clusterpp.props | 3 + .../mahout-mr/conf/driver.classes.default.props | 69 + .../mahout-mr/conf/evaluateFactorization.props | 0 .../conf/evaluateFactorizationParallel.props | 0 community/mahout-mr/conf/fkmeans.props | 17 + community/mahout-mr/conf/flink-config.yaml | 67 + community/mahout-mr/conf/itemsimilarity.props | 9 + community/mahout-mr/conf/kmeans.props | 13 + community/mahout-mr/conf/log4j.xml | 15 + community/mahout-mr/conf/lucene.vector.props | 0 community/mahout-mr/conf/matrixmult.props | 6 + community/mahout-mr/conf/parallelALS.props | 0 .../conf/predictFromFactorization.props | 0 .../mahout-mr/conf/recommendfactorized.props | 0 .../mahout-mr/conf/recommenditembased.props | 14 + community/mahout-mr/conf/rowid.props | 2 + community/mahout-mr/conf/rowsimilarity.props | 8 + community/mahout-mr/conf/runlogistic.props | 1 + community/mahout-mr/conf/seq2sparse.props | 15 + community/mahout-mr/conf/seqdirectory.props | 3 + community/mahout-mr/conf/seqdumper.props | 0 community/mahout-mr/conf/seqwiki.props | 0 community/mahout-mr/conf/splitDataset.props | 0 community/mahout-mr/conf/ssvd.props | 14 + community/mahout-mr/conf/svd.props | 6 + community/mahout-mr/conf/trainlogistic.props | 2 + community/mahout-mr/conf/transpose.props | 2 + community/mahout-mr/conf/vectordump.props | 1 + .../integration/bin/prep_asf_mail_archives.sh | 106 + community/mahout-mr/integration/pom.xml | 198 ++ .../mahout/benchmark/BenchmarkRunner.java | 111 + .../apache/mahout/benchmark/CloneBenchmark.java | 62 + .../benchmark/ClosestCentroidBenchmark.java | 98 + .../mahout/benchmark/DistanceBenchmark.java | 104 + .../apache/mahout/benchmark/DotBenchmark.java | 191 ++ .../apache/mahout/benchmark/MinusBenchmark.java | 115 + .../apache/mahout/benchmark/PlusBenchmark.java | 115 + .../benchmark/SerializationBenchmark.java | 124 + .../apache/mahout/benchmark/TimesBenchmark.java | 115 + .../mahout/benchmark/VectorBenchmarks.java | 497 ++++ .../model/cassandra/CassandraDataModel.java | 465 ++++ .../taste/impl/model/hbase/HBaseDataModel.java | 497 ++++ .../jdbc/AbstractBooleanPrefJDBCDataModel.java | 137 ++ .../impl/model/jdbc/AbstractJDBCDataModel.java | 787 ++++++ .../model/jdbc/ConnectionPoolDataSource.java | 122 + .../impl/model/jdbc/GenericJDBCDataModel.java | 146 ++ .../jdbc/MySQLBooleanPrefJDBCDataModel.java | 161 ++ .../impl/model/jdbc/MySQLJDBCDataModel.java | 247 ++ .../PostgreSQLBooleanPrefJDBCDataModel.java | 146 ++ .../model/jdbc/PostgreSQLJDBCDataModel.java | 172 ++ .../model/jdbc/ReloadFromJDBCDataModel.java | 178 ++ .../jdbc/SQL92BooleanPrefJDBCDataModel.java | 221 ++ .../impl/model/jdbc/SQL92JDBCDataModel.java | 248 ++ .../impl/model/mongodb/MongoDBDataModel.java | 873 +++++++ .../AbstractJDBCInMemoryItemSimilarity.java | 132 + .../jdbc/AbstractJDBCItemSimilarity.java | 213 ++ .../jdbc/MySQLJDBCInMemoryItemSimilarity.java | 47 + .../jdbc/MySQLJDBCItemSimilarity.java | 103 + .../jdbc/SQL92JDBCInMemoryItemSimilarity.java | 51 + .../jdbc/SQL92JDBCItemSimilarity.java | 57 + .../mahout/cf/taste/web/RecommenderServlet.java | 215 ++ .../cf/taste/web/RecommenderSingleton.java | 57 + .../mahout/cf/taste/web/RecommenderWrapper.java | 126 + .../classifier/ConfusionMatrixDumper.java | 425 ++++ .../mahout/clustering/cdbw/CDbwEvaluator.java | 387 +++ .../clustering/conversion/InputDriver.java | 114 + .../clustering/conversion/InputMapper.java | 81 + .../clustering/evaluation/ClusterEvaluator.java | 196 ++ .../evaluation/RepresentativePointsDriver.java | 243 ++ .../evaluation/RepresentativePointsMapper.java | 117 + .../evaluation/RepresentativePointsReducer.java | 70 + .../mahout/clustering/lda/LDAPrintTopics.java | 229 ++ .../text/MailArchivesClusteringAnalyzer.java | 164 ++ .../text/MultipleTextFileInputFormat.java | 46 + .../mahout/text/PrefixAdditionFilter.java | 67 + .../mahout/text/SequenceFilesFromDirectory.java | 214 ++ .../text/SequenceFilesFromDirectoryFilter.java | 99 + .../text/SequenceFilesFromDirectoryMapper.java | 61 + .../text/SequenceFilesFromMailArchives.java | 369 +++ .../SequenceFilesFromMailArchivesMapper.java | 244 ++ .../mahout/text/TextParagraphSplittingJob.java | 73 + .../mahout/text/WholeFileRecordReader.java | 125 + .../mahout/text/WikipediaToSequenceFile.java | 210 ++ .../text/wikipedia/WikipediaAnalyzer.java | 49 + .../WikipediaDatasetCreatorDriver.java | 190 ++ .../WikipediaDatasetCreatorMapper.java | 142 ++ .../WikipediaDatasetCreatorReducer.java | 38 + .../mahout/text/wikipedia/WikipediaMapper.java | 179 ++ .../text/wikipedia/WikipediaXmlSplitter.java | 234 ++ .../mahout/text/wikipedia/XmlInputFormat.java | 164 ++ .../java/org/apache/mahout/utils/Bump125.java | 62 + .../org/apache/mahout/utils/MatrixDumper.java | 138 ++ .../apache/mahout/utils/SequenceFileDumper.java | 168 ++ .../org/apache/mahout/utils/SplitInput.java | 673 +++++ .../org/apache/mahout/utils/SplitInputJob.java | 213 ++ .../utils/clustering/AbstractClusterWriter.java | 160 ++ .../utils/clustering/CSVClusterWriter.java | 69 + .../mahout/utils/clustering/ClusterDumper.java | 328 +++ .../utils/clustering/ClusterDumperWriter.java | 100 + .../mahout/utils/clustering/ClusterWriter.java | 53 + .../utils/clustering/GraphMLClusterWriter.java | 216 ++ .../utils/clustering/JsonClusterWriter.java | 188 ++ .../apache/mahout/utils/email/MailOptions.java | 186 ++ .../mahout/utils/email/MailProcessor.java | 183 ++ .../apache/mahout/utils/io/ChunkedWrapper.java | 42 + .../apache/mahout/utils/io/ChunkedWriter.java | 86 + .../apache/mahout/utils/io/IOWriterWrapper.java | 45 + .../apache/mahout/utils/io/WrappedWriter.java | 31 + .../nlp/collocations/llr/BloomTokenFilter.java | 78 + .../mahout/utils/regex/AnalyzerTransformer.java | 75 + .../mahout/utils/regex/ChainTransformer.java | 55 + .../apache/mahout/utils/regex/FPGFormatter.java | 34 + .../mahout/utils/regex/IdentityFormatter.java | 26 + .../mahout/utils/regex/IdentityTransformer.java | 30 + .../utils/regex/RegexConverterDriver.java | 101 + .../mahout/utils/regex/RegexFormatter.java | 24 + .../apache/mahout/utils/regex/RegexMapper.java | 80 + .../mahout/utils/regex/RegexTransformer.java | 27 + .../apache/mahout/utils/regex/RegexUtils.java | 69 + .../utils/regex/URLDecodeTransformer.java | 43 + .../apache/mahout/utils/vectors/RowIdJob.java | 99 + .../apache/mahout/utils/vectors/TermEntry.java | 46 + .../apache/mahout/utils/vectors/TermInfo.java | 33 + .../mahout/utils/vectors/VectorDumper.java | 266 ++ .../mahout/utils/vectors/VectorHelper.java | 256 ++ .../mahout/utils/vectors/arff/ARFFIterator.java | 144 ++ .../mahout/utils/vectors/arff/ARFFModel.java | 76 + .../mahout/utils/vectors/arff/ARFFType.java | 62 + .../utils/vectors/arff/ARFFVectorIterable.java | 155 ++ .../mahout/utils/vectors/arff/Driver.java | 263 ++ .../utils/vectors/arff/MapBackedARFFModel.java | 282 +++ .../utils/vectors/csv/CSVVectorIterator.java | 69 + .../vectors/io/DelimitedTermInfoWriter.java | 73 + .../vectors/io/SequenceFileVectorWriter.java | 75 + .../mahout/utils/vectors/io/TermInfoWriter.java | 29 + .../utils/vectors/io/TextualVectorWriter.java | 70 + .../mahout/utils/vectors/io/VectorWriter.java | 52 + .../vectors/lucene/AbstractLuceneIterator.java | 140 ++ .../utils/vectors/lucene/CachedTermInfo.java | 79 + .../utils/vectors/lucene/ClusterLabels.java | 381 +++ .../mahout/utils/vectors/lucene/Driver.java | 349 +++ .../utils/vectors/lucene/LuceneIterable.java | 80 + .../utils/vectors/lucene/LuceneIterator.java | 99 + .../mahout/utils/vectors/lucene/TFDFMapper.java | 64 + .../vectors/lucene/TermInfoClusterInOut.java | 81 + .../MySQLJDBCInMemoryItemSimilarityTest.java | 79 + .../mahout/clustering/TestClusterDumper.java | 236 ++ .../mahout/clustering/TestClusterEvaluator.java | 321 +++ .../clustering/cdbw/TestCDbwEvaluator.java | 326 +++ .../MailArchivesClusteringAnalyzerTest.java | 66 + .../text/SequenceFilesFromMailArchivesTest.java | 240 ++ .../org/apache/mahout/text/TestPathFilter.java | 32 + .../text/TestSequenceFilesFromDirectory.java | 313 +++ .../mahout/text/doc/MultipleFieldsDocument.java | 58 + .../mahout/text/doc/NumericFieldDocument.java | 54 + .../mahout/text/doc/SingleFieldDocument.java | 63 + .../apache/mahout/text/doc/TestDocument.java | 29 + .../mahout/text/doc/UnstoredFieldsDocument.java | 43 + .../org/apache/mahout/utils/Bump125Test.java | 42 + .../org/apache/mahout/utils/SplitInputTest.java | 418 ++++ .../mahout/utils/email/MailProcessorTest.java | 72 + .../collocations/llr/BloomTokenFilterTest.java | 154 ++ .../mahout/utils/regex/RegexMapperTest.java | 104 + .../mahout/utils/regex/RegexUtilsTest.java | 61 + .../utils/vectors/RandomVectorIterable.java | 73 + .../mahout/utils/vectors/VectorHelperTest.java | 140 ++ .../mahout/utils/vectors/arff/ARFFTypeTest.java | 35 + .../vectors/arff/ARFFVectorIterableTest.java | 289 +++ .../mahout/utils/vectors/arff/DriverTest.java | 54 + .../vectors/arff/MapBackedARFFModelTest.java | 60 + .../vectors/csv/CSVVectorIteratorTest.java | 57 + .../utils/vectors/io/VectorWriterTest.java | 67 + .../vectors/lucene/CachedTermInfoTest.java | 121 + .../mahout/utils/vectors/lucene/DriverTest.java | 136 ++ .../vectors/lucene/LuceneIterableTest.java | 195 ++ .../integration/src/test/resources/date.arff | 18 + .../resources/expected-arff-dictionary-2.csv | 22 + .../test/resources/expected-arff-dictionary.csv | 22 + .../test/resources/expected-arff-schema-2.json | 1 + .../test/resources/expected-arff-schema.json | 1 + .../src/test/resources/non-numeric-1.arff | 24 + .../src/test/resources/non-numeric-2.arff | 24 + .../src/test/resources/quoted-id.arff | 9 + .../src/test/resources/sample-dense.arff | 20 + .../src/test/resources/sample-sparse.arff | 24 + .../integration/src/test/resources/sample.arff | 11 + .../integration/src/test/resources/test.mbox | 1038 ++++++++ .../appended-resources/supplemental-models.xml | 279 +++ .../src/images/logos/ mahout-powered.svg | 630 +++++ .../mahout-mr/src/images/logos/favicon.ico | Bin 0 -> 28838 bytes .../mahout-mr/src/images/logos/favicon128.png | Bin 0 -> 5259 bytes .../mahout-mr/src/images/logos/favicon16.png | Bin 0 -> 1009 bytes .../mahout-mr/src/images/logos/favicon32.png | Bin 0 -> 1847 bytes .../mahout-mr/src/images/logos/favicon64.png | Bin 0 -> 3148 bytes .../src/images/logos/mahout-logo-100.png | Bin 0 -> 19477 bytes .../src/images/logos/mahout-logo-200.png | Bin 0 -> 46360 bytes .../src/images/logos/mahout-logo-300.png | Bin 0 -> 70139 bytes .../src/images/logos/mahout-logo-400.png | Bin 0 -> 55468 bytes .../images/logos/mahout-logo-poweredby-100.png | Bin 0 -> 24623 bytes .../images/logos/mahout-logo-poweredby-55.png | Bin 0 -> 11684 bytes .../logos/mahout-logo-transparent-400.png | Bin 0 -> 61970 bytes .../mahout-mr/src/images/logos/mahout-logo.svg | 627 +++++ community/mahout-mr/src/main/assembly/src.xml | 64 + distribution/pom.xml | 407 ---- .../src/main/assembly/scala-2.10_spark-1.6.xml | 249 -- .../src/main/assembly/scala-2.11_spark-2.0.xml | 249 -- .../src/main/assembly/scala-2.11_spark-2.1.xml | 249 -- distribution/src/main/assembly/src.xml | 64 - hdfs/pom.xml | 246 -- .../java/org/apache/mahout/common/IOUtils.java | 194 -- .../org/apache/mahout/math/MatrixWritable.java | 202 -- .../org/apache/mahout/math/VarIntWritable.java | 86 - .../org/apache/mahout/math/VarLongWritable.java | 83 - .../java/org/apache/mahout/math/Varint.java | 167 -- .../org/apache/mahout/math/VectorWritable.java | 267 -- .../apache/mahout/math/MatrixWritableTest.java | 141 -- .../java/org/apache/mahout/math/VarintTest.java | 189 -- .../apache/mahout/math/VectorWritableTest.java | 116 - integration/bin/prep_asf_mail_archives.sh | 106 - integration/pom.xml | 198 -- .../mahout/benchmark/BenchmarkRunner.java | 111 - .../apache/mahout/benchmark/CloneBenchmark.java | 62 - .../benchmark/ClosestCentroidBenchmark.java | 98 - .../mahout/benchmark/DistanceBenchmark.java | 104 - .../apache/mahout/benchmark/DotBenchmark.java | 191 -- .../apache/mahout/benchmark/MinusBenchmark.java | 115 - .../apache/mahout/benchmark/PlusBenchmark.java | 115 - .../benchmark/SerializationBenchmark.java | 124 - .../apache/mahout/benchmark/TimesBenchmark.java | 115 - .../mahout/benchmark/VectorBenchmarks.java | 497 ---- .../model/cassandra/CassandraDataModel.java | 465 ---- .../taste/impl/model/hbase/HBaseDataModel.java | 497 ---- .../jdbc/AbstractBooleanPrefJDBCDataModel.java | 137 -- .../impl/model/jdbc/AbstractJDBCDataModel.java | 787 ------ .../model/jdbc/ConnectionPoolDataSource.java | 122 - .../impl/model/jdbc/GenericJDBCDataModel.java | 146 -- .../jdbc/MySQLBooleanPrefJDBCDataModel.java | 161 -- .../impl/model/jdbc/MySQLJDBCDataModel.java | 247 -- .../PostgreSQLBooleanPrefJDBCDataModel.java | 146 -- .../model/jdbc/PostgreSQLJDBCDataModel.java | 172 -- .../model/jdbc/ReloadFromJDBCDataModel.java | 178 -- .../jdbc/SQL92BooleanPrefJDBCDataModel.java | 221 -- .../impl/model/jdbc/SQL92JDBCDataModel.java | 248 -- .../impl/model/mongodb/MongoDBDataModel.java | 873 ------- .../AbstractJDBCInMemoryItemSimilarity.java | 132 - .../jdbc/AbstractJDBCItemSimilarity.java | 213 -- .../jdbc/MySQLJDBCInMemoryItemSimilarity.java | 47 - .../jdbc/MySQLJDBCItemSimilarity.java | 103 - .../jdbc/SQL92JDBCInMemoryItemSimilarity.java | 51 - .../jdbc/SQL92JDBCItemSimilarity.java | 57 - .../mahout/cf/taste/web/RecommenderServlet.java | 215 -- .../cf/taste/web/RecommenderSingleton.java | 57 - .../mahout/cf/taste/web/RecommenderWrapper.java | 126 - .../classifier/ConfusionMatrixDumper.java | 425 ---- .../mahout/clustering/cdbw/CDbwEvaluator.java | 387 --- .../clustering/conversion/InputDriver.java | 114 - .../clustering/conversion/InputMapper.java | 81 - .../clustering/evaluation/ClusterEvaluator.java | 196 -- .../evaluation/RepresentativePointsDriver.java | 243 -- .../evaluation/RepresentativePointsMapper.java | 117 - .../evaluation/RepresentativePointsReducer.java | 70 - .../mahout/clustering/lda/LDAPrintTopics.java | 229 -- .../text/MailArchivesClusteringAnalyzer.java | 164 -- .../text/MultipleTextFileInputFormat.java | 46 - .../mahout/text/PrefixAdditionFilter.java | 67 - .../mahout/text/SequenceFilesFromDirectory.java | 214 -- .../text/SequenceFilesFromDirectoryFilter.java | 99 - .../text/SequenceFilesFromDirectoryMapper.java | 61 - .../text/SequenceFilesFromMailArchives.java | 369 --- .../SequenceFilesFromMailArchivesMapper.java | 244 -- .../mahout/text/TextParagraphSplittingJob.java | 73 - .../mahout/text/WholeFileRecordReader.java | 125 - .../mahout/text/WikipediaToSequenceFile.java | 210 -- .../text/wikipedia/WikipediaAnalyzer.java | 49 - .../WikipediaDatasetCreatorDriver.java | 190 -- .../WikipediaDatasetCreatorMapper.java | 142 -- .../WikipediaDatasetCreatorReducer.java | 38 - .../mahout/text/wikipedia/WikipediaMapper.java | 179 -- .../text/wikipedia/WikipediaXmlSplitter.java | 234 -- .../mahout/text/wikipedia/XmlInputFormat.java | 164 -- .../java/org/apache/mahout/utils/Bump125.java | 62 - .../org/apache/mahout/utils/MatrixDumper.java | 138 -- .../apache/mahout/utils/SequenceFileDumper.java | 168 -- .../org/apache/mahout/utils/SplitInput.java | 673 ----- .../org/apache/mahout/utils/SplitInputJob.java | 213 -- .../utils/clustering/AbstractClusterWriter.java | 160 -- .../utils/clustering/CSVClusterWriter.java | 69 - .../mahout/utils/clustering/ClusterDumper.java | 328 --- .../utils/clustering/ClusterDumperWriter.java | 100 - .../mahout/utils/clustering/ClusterWriter.java | 53 - .../utils/clustering/GraphMLClusterWriter.java | 216 -- .../utils/clustering/JsonClusterWriter.java | 188 -- .../apache/mahout/utils/email/MailOptions.java | 186 -- .../mahout/utils/email/MailProcessor.java | 183 -- .../apache/mahout/utils/io/ChunkedWrapper.java | 42 - .../apache/mahout/utils/io/ChunkedWriter.java | 86 - .../apache/mahout/utils/io/IOWriterWrapper.java | 45 - .../apache/mahout/utils/io/WrappedWriter.java | 31 - .../nlp/collocations/llr/BloomTokenFilter.java | 78 - .../mahout/utils/regex/AnalyzerTransformer.java | 75 - .../mahout/utils/regex/ChainTransformer.java | 55 - .../apache/mahout/utils/regex/FPGFormatter.java | 34 - .../mahout/utils/regex/IdentityFormatter.java | 26 - .../mahout/utils/regex/IdentityTransformer.java | 30 - .../utils/regex/RegexConverterDriver.java | 101 - .../mahout/utils/regex/RegexFormatter.java | 24 - .../apache/mahout/utils/regex/RegexMapper.java | 80 - .../mahout/utils/regex/RegexTransformer.java | 27 - .../apache/mahout/utils/regex/RegexUtils.java | 69 - .../utils/regex/URLDecodeTransformer.java | 43 - .../apache/mahout/utils/vectors/RowIdJob.java | 99 - .../apache/mahout/utils/vectors/TermEntry.java | 46 - .../apache/mahout/utils/vectors/TermInfo.java | 33 - .../mahout/utils/vectors/VectorDumper.java | 266 -- .../mahout/utils/vectors/VectorHelper.java | 256 -- .../mahout/utils/vectors/arff/ARFFIterator.java | 144 -- .../mahout/utils/vectors/arff/ARFFModel.java | 76 - .../mahout/utils/vectors/arff/ARFFType.java | 62 - .../utils/vectors/arff/ARFFVectorIterable.java | 155 -- .../mahout/utils/vectors/arff/Driver.java | 263 -- .../utils/vectors/arff/MapBackedARFFModel.java | 282 --- .../utils/vectors/csv/CSVVectorIterator.java | 69 - .../vectors/io/DelimitedTermInfoWriter.java | 73 - .../vectors/io/SequenceFileVectorWriter.java | 75 - .../mahout/utils/vectors/io/TermInfoWriter.java | 29 - .../utils/vectors/io/TextualVectorWriter.java | 70 - .../mahout/utils/vectors/io/VectorWriter.java | 52 - .../vectors/lucene/AbstractLuceneIterator.java | 140 -- .../utils/vectors/lucene/CachedTermInfo.java | 79 - .../utils/vectors/lucene/ClusterLabels.java | 381 --- .../mahout/utils/vectors/lucene/Driver.java | 349 --- .../utils/vectors/lucene/LuceneIterable.java | 80 - .../utils/vectors/lucene/LuceneIterator.java | 99 - .../mahout/utils/vectors/lucene/TFDFMapper.java | 64 - .../vectors/lucene/TermInfoClusterInOut.java | 81 - .../MySQLJDBCInMemoryItemSimilarityTest.java | 79 - .../mahout/clustering/TestClusterDumper.java | 236 -- .../mahout/clustering/TestClusterEvaluator.java | 321 --- .../clustering/cdbw/TestCDbwEvaluator.java | 326 --- .../MailArchivesClusteringAnalyzerTest.java | 66 - .../text/SequenceFilesFromMailArchivesTest.java | 240 -- .../org/apache/mahout/text/TestPathFilter.java | 32 - .../text/TestSequenceFilesFromDirectory.java | 313 --- .../mahout/text/doc/MultipleFieldsDocument.java | 58 - .../mahout/text/doc/NumericFieldDocument.java | 54 - .../mahout/text/doc/SingleFieldDocument.java | 63 - .../apache/mahout/text/doc/TestDocument.java | 29 - .../mahout/text/doc/UnstoredFieldsDocument.java | 43 - .../org/apache/mahout/utils/Bump125Test.java | 42 - .../org/apache/mahout/utils/SplitInputTest.java | 418 ---- .../mahout/utils/email/MailProcessorTest.java | 72 - .../collocations/llr/BloomTokenFilterTest.java | 154 -- .../mahout/utils/regex/RegexMapperTest.java | 104 - .../mahout/utils/regex/RegexUtilsTest.java | 61 - .../utils/vectors/RandomVectorIterable.java | 73 - .../mahout/utils/vectors/VectorHelperTest.java | 140 -- .../mahout/utils/vectors/arff/ARFFTypeTest.java | 35 - .../vectors/arff/ARFFVectorIterableTest.java | 289 --- .../mahout/utils/vectors/arff/DriverTest.java | 54 - .../vectors/arff/MapBackedARFFModelTest.java | 60 - .../vectors/csv/CSVVectorIteratorTest.java | 57 - .../utils/vectors/io/VectorWriterTest.java | 67 - .../vectors/lucene/CachedTermInfoTest.java | 121 - .../mahout/utils/vectors/lucene/DriverTest.java | 136 -- .../vectors/lucene/LuceneIterableTest.java | 195 -- integration/src/test/resources/date.arff | 18 - .../resources/expected-arff-dictionary-2.csv | 22 - .../test/resources/expected-arff-dictionary.csv | 22 - .../test/resources/expected-arff-schema-2.json | 1 - .../test/resources/expected-arff-schema.json | 1 - .../src/test/resources/non-numeric-1.arff | 24 - .../src/test/resources/non-numeric-2.arff | 24 - integration/src/test/resources/quoted-id.arff | 9 - .../src/test/resources/sample-dense.arff | 20 - .../src/test/resources/sample-sparse.arff | 24 - integration/src/test/resources/sample.arff | 11 - integration/src/test/resources/test.mbox | 1038 -------- math-scala/pom.xml | 244 -- .../classifier/naivebayes/NBClassifier.scala | 119 - .../mahout/classifier/naivebayes/NBModel.scala | 215 -- .../classifier/naivebayes/NaiveBayes.scala | 383 --- .../classifier/stats/ClassifierStats.scala | 467 ---- .../classifier/stats/ConfusionMatrix.scala | 459 ---- .../common/io/GenericMatrixKryoSerializer.scala | 188 -- .../mahout/common/io/VectorKryoSerializer.scala | 248 -- .../apache/mahout/drivers/MahoutDriver.scala | 44 - .../mahout/drivers/MahoutOptionParser.scala | 220 -- .../org/apache/mahout/logging/package.scala | 73 - .../apache/mahout/math/algorithms/Fitter.scala | 27 - .../apache/mahout/math/algorithms/Model.scala | 26 - .../math/algorithms/SupervisedFitter.scala | 29 - .../math/algorithms/SupervisedModel.scala | 26 - .../math/algorithms/UnsupervisedFitter.scala | 28 - .../math/algorithms/UnsupervisedModel.scala | 24 - .../math/algorithms/clustering/Canopy.scala | 157 -- .../algorithms/clustering/ClusteringModel.scala | 45 - .../common/distance/DistanceMetrics.scala | 48 - .../algorithms/preprocessing/AsFactor.scala | 129 - .../algorithms/preprocessing/MeanCenter.scala | 91 - .../preprocessing/PreprocessorModel.scala | 58 - .../preprocessing/StandardScaler.scala | 108 - .../regression/CochraneOrcuttModel.scala | 151 -- .../regression/LinearRegressorModel.scala | 178 -- .../regression/OrdinaryLeastSquaresModel.scala | 71 - .../algorithms/regression/RegressorModel.scala | 66 - .../regression/tests/AutocorrelationTests.scala | 57 - .../regression/tests/FittnessTests.scala | 133 - .../apache/mahout/math/backend/Backend.scala | 33 - .../mahout/math/backend/RootSolverFactory.scala | 84 - .../mahout/math/backend/SolverFactory.scala | 55 - .../mahout/math/backend/incore/package.scala | 17 - .../mahout/math/backend/jvm/JvmBackend.scala | 51 - .../mahout/math/cf/SimilarityAnalysis.scala | 453 ---- .../apache/mahout/math/decompositions/ALS.scala | 141 -- .../apache/mahout/math/decompositions/DQR.scala | 78 - .../mahout/math/decompositions/DSPCA.scala | 162 -- .../mahout/math/decompositions/DSSVD.scala | 100 - .../mahout/math/decompositions/SSVD.scala | 167 -- .../mahout/math/decompositions/package.scala | 141 -- .../org/apache/mahout/math/drm/BCast.scala | 24 - .../org/apache/mahout/math/drm/CacheHint.scala | 36 - .../mahout/math/drm/CheckpointedDrm.scala | 43 - .../mahout/math/drm/CheckpointedOps.scala | 49 - .../mahout/math/drm/DistributedContext.scala | 27 - .../mahout/math/drm/DistributedEngine.scala | 268 -- .../mahout/math/drm/DrmDoubleScalarOps.scala | 37 - .../org/apache/mahout/math/drm/DrmLike.scala | 60 - .../org/apache/mahout/math/drm/DrmLikeOps.scala | 140 -- .../apache/mahout/math/drm/RLikeDrmOps.scala | 172 -- .../math/drm/logical/AbstractBinaryOp.scala | 44 - .../math/drm/logical/AbstractUnaryOp.scala | 32 - .../math/drm/logical/CheckpointAction.scala | 48 - .../apache/mahout/math/drm/logical/OpAB.scala | 47 - .../mahout/math/drm/logical/OpABAnyKey.scala | 48 - .../apache/mahout/math/drm/logical/OpABt.scala | 48 - .../apache/mahout/math/drm/logical/OpAewB.scala | 52 - .../mahout/math/drm/logical/OpAewScalar.scala | 55 - .../math/drm/logical/OpAewUnaryFunc.scala | 50 - .../math/drm/logical/OpAewUnaryFuncFusion.scala | 67 - .../apache/mahout/math/drm/logical/OpAt.scala | 43 - .../apache/mahout/math/drm/logical/OpAtA.scala | 42 - .../mahout/math/drm/logical/OpAtAnyKey.scala | 40 - .../apache/mahout/math/drm/logical/OpAtB.scala | 48 - .../apache/mahout/math/drm/logical/OpAtx.scala | 49 - .../apache/mahout/math/drm/logical/OpAx.scala | 48 - .../mahout/math/drm/logical/OpCbind.scala | 48 - .../mahout/math/drm/logical/OpCbindScalar.scala | 42 - .../mahout/math/drm/logical/OpMapBlock.scala | 48 - .../apache/mahout/math/drm/logical/OpPar.scala | 23 - .../mahout/math/drm/logical/OpRbind.scala | 46 - .../mahout/math/drm/logical/OpRowRange.scala | 44 - .../math/drm/logical/OpTimesLeftMatrix.scala | 51 - .../math/drm/logical/OpTimesRightMatrix.scala | 51 - .../mahout/math/drm/logical/TEwFunc.scala | 37 - .../org/apache/mahout/math/drm/package.scala | 375 --- .../mahout/math/indexeddataset/BiMap.scala | 128 - .../math/indexeddataset/IndexedDataset.scala | 61 - .../math/indexeddataset/ReaderWriter.scala | 117 - .../mahout/math/indexeddataset/Schema.scala | 105 - .../apache/mahout/math/scalabindings/MMul.scala | 295 --- .../math/scalabindings/MahoutCollections.scala | 46 - .../scalabindings/MatlabLikeMatrixOps.scala | 66 - .../math/scalabindings/MatlabLikeOps.scala | 35 - .../math/scalabindings/MatlabLikeTimesOps.scala | 28 - .../scalabindings/MatlabLikeVectorOps.scala | 73 - .../mahout/math/scalabindings/MatrixOps.scala | 332 --- .../scalabindings/RLikeDoubleScalarOps.scala | 63 - .../math/scalabindings/RLikeMatrixOps.scala | 172 -- .../mahout/math/scalabindings/RLikeOps.scala | 38 - .../math/scalabindings/RLikeVectorOps.scala | 110 - .../mahout/math/scalabindings/VectorOps.scala | 174 -- .../mahout/math/scalabindings/package.scala | 477 ---- .../org/apache/mahout/nlp/tfidf/TFIDF.scala | 112 - .../org/apache/mahout/util/IOUtilsScala.scala | 64 - .../classifier/naivebayes/NBTestBase.scala | 291 --- .../stats/ClassifierStatsTestBase.scala | 257 -- .../math/algorithms/ClusteringSuiteBase.scala | 48 - .../math/algorithms/PreprocessorSuiteBase.scala | 118 - .../math/algorithms/RegressionSuiteBase.scala | 180 -- .../algorithms/RegressionTestsSuiteBase.scala | 126 - .../mahout/math/backend/BackendSuite.scala | 59 - .../decompositions/DecompositionsSuite.scala | 113 - .../DistributedDecompositionsSuiteBase.scala | 219 -- .../mahout/math/drm/DrmLikeOpsSuiteBase.scala | 153 -- .../mahout/math/drm/DrmLikeSuiteBase.scala | 74 - .../mahout/math/drm/RLikeDrmOpsSuiteBase.scala | 655 ----- .../scalabindings/MahoutCollectionsSuite.scala | 42 - .../mahout/math/scalabindings/MathSuite.scala | 267 -- .../MatlabLikeMatrixOpsSuite.scala | 67 - .../math/scalabindings/MatrixOpsSuite.scala | 228 -- .../scalabindings/RLikeMatrixOpsSuite.scala | 369 --- .../scalabindings/RLikeVectorOpsSuite.scala | 72 - .../math/scalabindings/VectorOpsSuite.scala | 110 - .../apache/mahout/nlp/tfidf/TFIDFtestBase.scala | 184 -- .../mahout/test/DistributedMahoutSuite.scala | 28 - .../mahout/test/LoggerConfiguration.scala | 16 - .../org/apache/mahout/test/MahoutSuite.scala | 54 - math/pom.xml | 256 -- .../math/buffer/ValueTypeBufferConsumer.java.t | 42 - .../math/function/KeyTypeObjectProcedure.java.t | 50 - .../math/function/KeyTypeProcedure.java.t | 46 - .../function/KeyTypeValueTypeProcedure.java.t | 49 - .../function/ObjectValueTypeProcedure.java.t | 49 - .../math/function/ValueTypeComparator.java.t | 81 - .../math/list/AbstractValueTypeList.java.t | 851 ------- .../mahout/math/list/ValueTypeArrayList.java.t | 659 ----- .../math/map/AbstractKeyTypeObjectMap.java.t | 467 ---- .../math/map/AbstractKeyTypeValueTypeMap.java.t | 509 ---- .../math/map/AbstractObjectValueTypeMap.java.t | 516 ---- .../math/map/OpenKeyTypeObjectHashMap.java.t | 548 ----- .../math/map/OpenKeyTypeValueTypeHashMap.java.t | 632 ----- .../math/map/OpenObjectValueTypeHashMap.java.t | 567 ----- .../mahout/math/set/AbstractKeyTypeSet.java.t | 181 -- .../mahout/math/set/OpenKeyTypeHashSet.java.t | 423 ---- .../apache/mahout/collections/Arithmetic.java | 489 ---- .../apache/mahout/collections/Constants.java | 75 - .../org/apache/mahout/common/RandomUtils.java | 100 - .../org/apache/mahout/common/RandomWrapper.java | 105 - .../org/apache/mahout/math/AbstractMatrix.java | 834 ------- .../org/apache/mahout/math/AbstractVector.java | 684 ------ .../java/org/apache/mahout/math/Algebra.java | 73 - .../java/org/apache/mahout/math/Arrays.java | 662 ----- .../org/apache/mahout/math/BinarySearch.java | 403 --- .../mahout/math/CardinalityException.java | 30 - .../java/org/apache/mahout/math/Centroid.java | 89 - .../mahout/math/CholeskyDecomposition.java | 227 -- .../org/apache/mahout/math/ConstantVector.java | 177 -- .../apache/mahout/math/DelegatingVector.java | 336 --- .../org/apache/mahout/math/DenseMatrix.java | 193 -- .../mahout/math/DenseSymmetricMatrix.java | 62 - .../org/apache/mahout/math/DenseVector.java | 442 ---- .../org/apache/mahout/math/DiagonalMatrix.java | 378 --- .../org/apache/mahout/math/FileBasedMatrix.java | 185 -- .../math/FileBasedSparseBinaryMatrix.java | 535 ---- .../mahout/math/FunctionalMatrixView.java | 99 - .../org/apache/mahout/math/IndexException.java | 30 - .../apache/mahout/math/LengthCachingVector.java | 35 - .../java/org/apache/mahout/math/Matrices.java | 167 -- .../java/org/apache/mahout/math/Matrix.java | 413 ---- .../org/apache/mahout/math/MatrixSlice.java | 36 - .../org/apache/mahout/math/MatrixTimesOps.java | 35 - .../apache/mahout/math/MatrixVectorView.java | 292 --- .../java/org/apache/mahout/math/MatrixView.java | 160 -- .../java/org/apache/mahout/math/MurmurHash.java | 158 -- .../org/apache/mahout/math/MurmurHash3.java | 84 - .../org/apache/mahout/math/NamedVector.java | 328 --- .../apache/mahout/math/OldQRDecomposition.java | 234 -- .../mahout/math/OrderedIntDoubleMapping.java | 265 -- .../mahout/math/OrthonormalityVerifier.java | 46 - .../apache/mahout/math/PermutedVectorView.java | 250 -- .../apache/mahout/math/PersistentObject.java | 58 - .../org/apache/mahout/math/PivotedMatrix.java | 288 --- .../main/java/org/apache/mahout/math/QR.java | 27 - .../org/apache/mahout/math/QRDecomposition.java | 181 -- .../mahout/math/RandomAccessSparseVector.java | 303 --- .../apache/mahout/math/RandomTrinaryMatrix.java | 146 -- .../math/SequentialAccessSparseVector.java | 379 --- .../mahout/math/SingularValueDecomposition.java | 669 ----- .../java/org/apache/mahout/math/Sorting.java | 2297 ------------------ .../apache/mahout/math/SparseColumnMatrix.java | 220 -- .../org/apache/mahout/math/SparseMatrix.java | 245 -- .../org/apache/mahout/math/SparseRowMatrix.java | 289 --- .../java/org/apache/mahout/math/Swapper.java | 35 - .../mahout/math/TransposedMatrixView.java | 147 -- .../org/apache/mahout/math/UpperTriangular.java | 160 -- .../java/org/apache/mahout/math/Vector.java | 434 ---- .../mahout/math/VectorBinaryAggregate.java | 481 ---- .../apache/mahout/math/VectorBinaryAssign.java | 667 ----- .../org/apache/mahout/math/VectorIterable.java | 56 - .../java/org/apache/mahout/math/VectorView.java | 238 -- .../org/apache/mahout/math/WeightedVector.java | 87 - .../mahout/math/WeightedVectorComparator.java | 54 - .../math/als/AlternatingLeastSquaresSolver.java | 116 - ...itFeedbackAlternatingLeastSquaresSolver.java | 171 -- .../math/decomposer/AsyncEigenVerifier.java | 80 - .../mahout/math/decomposer/EigenStatus.java | 50 - .../math/decomposer/SimpleEigenVerifier.java | 41 - .../math/decomposer/SingularVectorVerifier.java | 25 - .../math/decomposer/hebbian/EigenUpdater.java | 25 - .../math/decomposer/hebbian/HebbianSolver.java | 342 --- .../math/decomposer/hebbian/HebbianUpdater.java | 71 - .../math/decomposer/hebbian/TrainingState.java | 143 -- .../math/decomposer/lanczos/LanczosSolver.java | 213 -- .../math/decomposer/lanczos/LanczosState.java | 107 - .../org/apache/mahout/math/flavor/BackEnum.java | 26 - .../apache/mahout/math/flavor/MatrixFlavor.java | 82 - .../math/flavor/TraversingStructureEnum.java | 48 - .../math/function/DoubleDoubleFunction.java | 98 - .../mahout/math/function/DoubleFunction.java | 48 - .../mahout/math/function/FloatFunction.java | 36 - .../apache/mahout/math/function/Functions.java | 1730 ------------- .../mahout/math/function/IntFunction.java | 41 - .../math/function/IntIntDoubleFunction.java | 43 - .../mahout/math/function/IntIntFunction.java | 25 - .../org/apache/mahout/math/function/Mult.java | 71 - .../math/function/ObjectObjectProcedure.java | 40 - .../mahout/math/function/ObjectProcedure.java | 47 - .../apache/mahout/math/function/PlusMult.java | 123 - .../math/function/SquareRootFunction.java | 26 - .../mahout/math/function/TimesFunction.java | 77 - .../mahout/math/function/VectorFunction.java | 27 - .../mahout/math/function/package-info.java | 4 - .../apache/mahout/math/jet/math/Arithmetic.java | 328 --- .../apache/mahout/math/jet/math/Constants.java | 49 - .../apache/mahout/math/jet/math/Polynomial.java | 98 - .../mahout/math/jet/math/package-info.java | 5 - .../random/AbstractContinousDistribution.java | 51 - .../random/AbstractDiscreteDistribution.java | 27 - .../math/jet/random/AbstractDistribution.java | 87 - .../mahout/math/jet/random/Exponential.java | 81 - .../apache/mahout/math/jet/random/Gamma.java | 302 --- .../math/jet/random/NegativeBinomial.java | 106 - .../apache/mahout/math/jet/random/Normal.java | 110 - .../apache/mahout/math/jet/random/Poisson.java | 296 --- .../apache/mahout/math/jet/random/Uniform.java | 164 -- .../math/jet/random/engine/MersenneTwister.java | 275 --- .../math/jet/random/engine/RandomEngine.java | 169 -- .../math/jet/random/engine/package-info.java | 7 - .../math/jet/random/sampling/RandomSampler.java | 503 ---- .../org/apache/mahout/math/jet/stat/Gamma.java | 681 ------ .../mahout/math/jet/stat/Probability.java | 203 -- .../mahout/math/jet/stat/package-info.java | 5 - .../apache/mahout/math/list/AbstractList.java | 247 -- .../mahout/math/list/AbstractObjectList.java | 80 - .../mahout/math/list/ObjectArrayList.java | 419 ---- .../mahout/math/list/SimpleLongArrayList.java | 102 - .../apache/mahout/math/list/package-info.java | 144 -- .../apache/mahout/math/map/HashFunctions.java | 115 - .../org/apache/mahout/math/map/OpenHashMap.java | 652 ----- .../org/apache/mahout/math/map/PrimeFinder.java | 145 -- .../mahout/math/map/QuickOpenIntIntHashMap.java | 215 -- .../apache/mahout/math/map/package-info.java | 250 -- .../org/apache/mahout/math/package-info.java | 4 - .../math/random/AbstractSamplerFunction.java | 39 - .../mahout/math/random/ChineseRestaurant.java | 111 - .../apache/mahout/math/random/Empirical.java | 124 - .../apache/mahout/math/random/IndianBuffet.java | 157 -- .../org/apache/mahout/math/random/Missing.java | 59 - .../apache/mahout/math/random/MultiNormal.java | 118 - .../apache/mahout/math/random/Multinomial.java | 202 -- .../org/apache/mahout/math/random/Normal.java | 40 - .../mahout/math/random/PoissonSampler.java | 67 - .../org/apache/mahout/math/random/Sampler.java | 25 - .../mahout/math/random/WeightedThing.java | 71 - .../org/apache/mahout/math/set/AbstractSet.java | 188 -- .../org/apache/mahout/math/set/HashUtils.java | 56 - .../org/apache/mahout/math/set/OpenHashSet.java | 548 ----- .../math/solver/ConjugateGradientSolver.java | 213 -- .../mahout/math/solver/EigenDecomposition.java | 892 ------- .../mahout/math/solver/JacobiConditioner.java | 47 - .../org/apache/mahout/math/solver/LSMR.java | 565 ----- .../mahout/math/solver/Preconditioner.java | 36 - .../mahout/math/ssvd/SequentialBigSvd.java | 69 - .../apache/mahout/math/stats/LogLikelihood.java | 220 -- .../math/stats/OnlineExponentialAverage.java | 62 - .../mahout/math/stats/OnlineSummarizer.java | 93 - .../math/list/ValueTypeArrayListTest.java.t | 237 -- .../map/OpenKeyTypeObjectHashMapTest.java.t | 431 ---- .../map/OpenKeyTypeValueTypeHashMapTest.java.t | 379 --- .../map/OpenObjectValueTypeHashMapTest.java.t | 423 ---- .../math/set/OpenKeyTypeHashSetTest.java.t | 179 -- .../apache/mahout/common/RandomUtilsTest.java | 81 - .../apache/mahout/math/AbstractVectorTest.java | 658 ----- .../org/apache/mahout/math/CentroidTest.java | 72 - .../mahout/math/CholeskyDecompositionTest.java | 152 -- .../apache/mahout/math/DenseSymmetricTest.java | 65 - .../apache/mahout/math/DiagonalMatrixTest.java | 92 - .../apache/mahout/math/FileBasedMatrixTest.java | 89 - .../math/FileBasedSparseBinaryMatrixTest.java | 95 - .../org/apache/mahout/math/FunctionTest.java | 133 - .../org/apache/mahout/math/MahoutTestCase.java | 109 - .../org/apache/mahout/math/MatricesTest.java | 123 - .../java/org/apache/mahout/math/MatrixTest.java | 645 ----- .../mahout/math/MatrixVectorViewTest.java | 58 - .../org/apache/mahout/math/MurmurHash3Test.java | 48 - .../org/apache/mahout/math/MurmurHashTest.java | 120 - .../mahout/math/OldQRDecompositionTest.java | 187 -- .../mahout/math/PermutedVectorViewTest.java | 105 - .../apache/mahout/math/PivotedMatrixTest.java | 65 - .../apache/mahout/math/QRDecompositionTest.java | 280 --- .../org/apache/mahout/math/TestDenseMatrix.java | 45 - .../org/apache/mahout/math/TestDenseVector.java | 47 - .../org/apache/mahout/math/TestMatrixView.java | 470 ---- .../math/TestOrderedIntDoubleMapping.java | 104 - .../math/TestRandomAccessSparseVector.java | 65 - .../math/TestSequentialAccessSparseVector.java | 62 - .../math/TestSingularValueDecomposition.java | 327 --- .../mahout/math/TestSparseColumnMatrix.java | 37 - .../apache/mahout/math/TestSparseMatrix.java | 101 - .../apache/mahout/math/TestSparseRowMatrix.java | 180 -- .../org/apache/mahout/math/TestVectorView.java | 314 --- .../apache/mahout/math/UpperTriangularTest.java | 54 - .../math/VectorBinaryAggregateCostTest.java | 330 --- .../mahout/math/VectorBinaryAggregateTest.java | 143 -- .../mahout/math/VectorBinaryAssignCostTest.java | 243 -- .../mahout/math/VectorBinaryAssignTest.java | 75 - .../java/org/apache/mahout/math/VectorTest.java | 1135 --------- .../apache/mahout/math/WeightedVectorTest.java | 88 - .../als/AlternatingLeastSquaresSolverTest.java | 151 -- .../mahout/math/decomposer/SolverTest.java | 177 -- .../decomposer/hebbian/TestHebbianSolver.java | 207 -- .../decomposer/lanczos/TestLanczosSolver.java | 97 - .../math/jet/random/DistributionChecks.java | 118 - .../mahout/math/jet/random/ExponentialTest.java | 102 - .../mahout/math/jet/random/GammaTest.java | 131 - .../math/jet/random/NegativeBinomialTest.java | 60 - .../mahout/math/jet/random/NormalTest.java | 71 - .../jet/random/engine/MersenneTwisterTest.java | 704 ------ .../apache/mahout/math/jet/stat/GammaTest.java | 138 -- .../mahout/math/jet/stat/ProbabilityTest.java | 196 -- .../mahout/math/list/ObjectArrayListTest.java | 51 - .../math/random/ChineseRestaurantTest.java | 158 -- .../mahout/math/random/EmpiricalTest.java | 78 - .../mahout/math/random/IndianBuffetTest.java | 43 - .../mahout/math/random/MultiNormalTest.java | 81 - .../mahout/math/random/MultinomialTest.java | 269 -- .../apache/mahout/math/random/NormalTest.java | 62 - .../mahout/math/random/PoissonSamplerTest.java | 56 - .../mahout/math/randomized/RandomBlasting.java | 355 --- .../apache/mahout/math/set/HashUtilsTest.java | 90 - .../math/solver/EigenDecompositionTest.java | 120 - .../org/apache/mahout/math/solver/LSMRTest.java | 105 - .../solver/TestConjugateGradientSolver.java | 231 -- .../mahout/math/ssvd/SequentialBigSvdTest.java | 86 - .../mahout/math/stats/LogLikelihoodTest.java | 197 -- .../stats/OnlineExponentialAverageTest.java | 69 - .../mahout/math/stats/OnlineSummarizerTest.java | 108 - math/src/test/resources/beta-test-data.csv | 1005 -------- math/src/test/resources/hanging-svd.tsv | 90 - .../resources/negative-binomial-test-data.csv | 62 - math/src/test/resources/words.txt | 1168 --------- pom.xml.bu | 1252 ---------- refactor-readme.md | 90 - spark/pom.xml | 271 --- spark/src/main/assembly/dependency-reduced.xml | 51 - .../classifier/naivebayes/SparkNaiveBayes.scala | 170 -- .../org/apache/mahout/common/DrmMetadata.scala | 73 - .../apache/mahout/common/HDFSPathSearch.scala | 81 - .../org/apache/mahout/common/HDFSUtil.scala | 28 - .../apache/mahout/common/Hadoop2HDFSUtil.scala | 83 - .../mahout/drivers/ItemSimilarityDriver.scala | 213 -- .../mahout/drivers/MahoutSparkDriver.scala | 103 - .../drivers/MahoutSparkOptionParser.scala | 47 - .../mahout/drivers/RowSimilarityDriver.scala | 148 -- .../apache/mahout/drivers/TestNBDriver.scala | 108 - .../drivers/TextDelimitedReaderWriter.scala | 336 --- .../apache/mahout/drivers/TrainNBDriver.scala | 111 - .../sparkbindings/SparkDistributedContext.scala | 30 - .../mahout/sparkbindings/SparkEngine.scala | 387 --- .../apache/mahout/sparkbindings/blas/ABt.scala | 339 --- .../apache/mahout/sparkbindings/blas/AewB.scala | 239 -- .../mahout/sparkbindings/blas/AinCoreB.scala | 63 - .../apache/mahout/sparkbindings/blas/At.scala | 85 - .../apache/mahout/sparkbindings/blas/AtA.scala | 271 --- .../apache/mahout/sparkbindings/blas/AtB.scala | 358 --- .../apache/mahout/sparkbindings/blas/Ax.scala | 63 - .../mahout/sparkbindings/blas/CbindAB.scala | 126 - .../mahout/sparkbindings/blas/DrmRddOps.scala | 43 - .../mahout/sparkbindings/blas/MapBlock.scala | 43 - .../apache/mahout/sparkbindings/blas/Par.scala | 56 - .../mahout/sparkbindings/blas/RbindAB.scala | 50 - .../mahout/sparkbindings/blas/Slicing.scala | 27 - .../mahout/sparkbindings/blas/package.scala | 217 -- .../drm/CheckpointedDrmSpark.scala | 224 -- .../drm/CheckpointedDrmSparkOps.scala | 16 - .../mahout/sparkbindings/drm/DrmRddInput.scala | 41 - .../mahout/sparkbindings/drm/SparkBCast.scala | 27 - .../mahout/sparkbindings/drm/package.scala | 112 - .../indexeddataset/IndexedDatasetSpark.scala | 129 - .../io/MahoutKryoRegistrator.scala | 76 - .../io/UnsupportedSerializer.scala | 31 - .../io/WritableKryoSerializer.scala | 47 - .../apache/mahout/sparkbindings/package.scala | 299 --- .../mahout/cf/SimilarityAnalysisSuite.scala | 447 ---- .../naivebayes/NBSparkTestSuite.scala | 159 -- .../stats/ClassifierStatsSparkTestSuite.scala | 26 - .../drivers/ItemSimilarityDriverSuite.scala | 832 ------- .../drivers/RowSimilarityDriverSuite.scala | 139 -- .../TextDelimitedReaderWriterSuite.scala | 53 - .../math/algorithms/ClusteringSuite.scala | 25 - .../math/algorithms/PreprocessorSuite.scala | 24 - .../math/algorithms/RegressionSuite.scala | 25 - .../math/algorithms/RegressionTestsSuite.scala | 25 - .../DistributedDecompositionsSuite.scala | 32 - .../mahout/nlp/tfidf/TFIDFSparkTestSuite.scala | 25 - .../sparkbindings/SparkBindingsSuite.scala | 52 - .../mahout/sparkbindings/blas/BlasSuite.scala | 208 -- .../sparkbindings/drm/DrmLikeOpsSuite.scala | 57 - .../mahout/sparkbindings/drm/DrmLikeSuite.scala | 162 -- .../sparkbindings/drm/RLikeDrmOpsSuite.scala | 179 -- .../mahout/sparkbindings/io/IOSuite.scala | 195 -- .../test/DistributedSparkSuite.scala | 83 - .../test/LoggerConfiguration.scala | 30 - src/conf/arff.vector.props | 9 - src/conf/canopy.props | 14 - src/conf/cat.props | 4 - src/conf/cleansvd.props | 3 - src/conf/clusterdump.props | 0 src/conf/clusterpp.props | 3 - src/conf/driver.classes.default.props | 69 - src/conf/evaluateFactorization.props | 0 src/conf/evaluateFactorizationParallel.props | 0 src/conf/fkmeans.props | 17 - src/conf/flink-config.yaml | 67 - src/conf/itemsimilarity.props | 9 - src/conf/kmeans.props | 13 - src/conf/log4j.xml | 15 - src/conf/lucene.vector.props | 0 src/conf/matrixmult.props | 6 - src/conf/parallelALS.props | 0 src/conf/predictFromFactorization.props | 0 src/conf/recommendfactorized.props | 0 src/conf/recommenditembased.props | 14 - src/conf/rowid.props | 2 - src/conf/rowsimilarity.props | 8 - src/conf/runlogistic.props | 1 - src/conf/seq2sparse.props | 15 - src/conf/seqdirectory.props | 3 - src/conf/seqdumper.props | 0 src/conf/seqwiki.props | 0 src/conf/splitDataset.props | 0 src/conf/ssvd.props | 14 - src/conf/svd.props | 6 - src/conf/trainlogistic.props | 2 - src/conf/transpose.props | 2 - src/conf/vectordump.props | 1 - .../appended-resources/supplemental-models.xml | 279 --- src/main/images/logos/ mahout-powered.svg | 630 ----- src/main/images/logos/favicon.ico | Bin 28838 -> 0 bytes src/main/images/logos/favicon128.png | Bin 5259 -> 0 bytes src/main/images/logos/favicon16.png | Bin 1009 -> 0 bytes src/main/images/logos/favicon32.png | Bin 1847 -> 0 bytes src/main/images/logos/favicon64.png | Bin 3148 -> 0 bytes src/main/images/logos/mahout-logo-100.png | Bin 19477 -> 0 bytes src/main/images/logos/mahout-logo-200.png | Bin 46360 -> 0 bytes src/main/images/logos/mahout-logo-300.png | Bin 70139 -> 0 bytes src/main/images/logos/mahout-logo-400.png | Bin 55468 -> 0 bytes .../images/logos/mahout-logo-poweredby-100.png | Bin 24623 -> 0 bytes .../images/logos/mahout-logo-poweredby-55.png | Bin 11684 -> 0 bytes .../logos/mahout-logo-transparent-400.png | Bin 61970 -> 0 bytes src/main/images/logos/mahout-logo.svg | 627 ----- 847 files changed, 25722 insertions(+), 103784 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/bin/mahout.bu ---------------------------------------------------------------------- diff --git a/bin/mahout.bu b/bin/mahout.bu deleted file mode 100755 index 20f9c3d..0000000 --- a/bin/mahout.bu +++ /dev/null @@ -1,395 +0,0 @@ -#!/bin/bash -# -# The Mahout command script -# -# Environment Variables -# -# MAHOUT_JAVA_HOME The java implementation to use. Overrides JAVA_HOME. -# -# MAHOUT_HEAPSIZE The maximum amount of heap to use, in MB. -# Default is 4000. -# -# HADOOP_CONF_DIR The location of a hadoop config directory -# -# MAHOUT_OPTS Extra Java runtime options. -# -# MAHOUT_CONF_DIR The location of the program short-name to class name -# mappings and the default properties files -# defaults to "$MAHOUT_HOME/src/conf" -# -# MAHOUT_LOCAL set to anything other than an empty string to force -# mahout to run locally even if -# HADOOP_CONF_DIR and HADOOP_HOME are set -# -# MAHOUT_CORE set to anything other than an empty string to force -# mahout to run in developer 'core' mode, just as if the -# -core option was presented on the command-line -# Command-line Options -# -# -core -core is used to switch into 'developer mode' when -# running mahout locally. If specified, the classes -# from the 'target/classes' directories in each project -# are used. Otherwise classes will be retrieved from -# jars in the binary release collection or *-job.jar files -# found in build directories. When running on hadoop -# the job files will always be used. - -# -#/** -# * Licensed to the Apache Software Foundation (ASF) under one or more -# * contributor license agreements. See the NOTICE file distributed with -# * this work for additional information regarding copyright ownership. -# * The ASF licenses this file to You under the Apache License, Version 2.0 -# * (the "License"); you may not use this file except in compliance with -# * the License. You may obtain a copy of the License at -# * -# * http://www.apache.org/licenses/LICENSE-2.0 -# * -# * Unless required by applicable law or agreed to in writing, software -# * distributed under the License is distributed on an "AS IS" BASIS, -# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# * See the License for the specific language governing permissions and -# * limitations under the License. -# */ - -cygwin=false -case "`uname`" in -CYGWIN*) cygwin=true;; -esac - -# Check that mahout home is set, if not set it to one dir up. - -# resolve links - $0 may be a softlink -THIS="$0" -while [ -h "$THIS" ]; do - ls=`ls -ld "$THIS"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '.*/.*' > /dev/null; then - THIS="$link" - else - THIS=`dirname "$THIS"`/"$link" - fi -done - -IS_CORE=0 -if [ "$1" == "-core" ] ; then - IS_CORE=1 - shift -fi - -if [ "$1" == "-spark" ]; then - SPARK=1 - shift -fi - -if [ "$1" == "spark-shell" ]; then - SPARK=1 -fi - -if [ "$1" == "spark-itemsimilarity" ]; then - SPARK=1 -fi - -if [ "$1" == "spark-rowsimilarity" ]; then - SPARK=1 -fi - -if [ "$1" == "spark-trainnb" ]; then - SPARK=1 -fi - -if [ "$1" == "spark-testnb" ]; then - SPARK=1 -fi - -if [ "$MAHOUT_CORE" != "" ]; then - IS_CORE=1 -fi - -if [ "$1" == "h2o-node" ]; then - H2O=1 -fi - -# some directories -THIS_DIR=`dirname "$THIS"` -MAHOUT_HOME=`cd "$THIS_DIR/.." ; pwd` - -# some Java parameters -if [ "$MAHOUT_JAVA_HOME" != "" ]; then - #echo "run java in $MAHOUT_JAVA_HOME" - JAVA_HOME=$MAHOUT_JAVA_HOME -fi - -if [ "$JAVA_HOME" = "" ]; then - echo "Error: JAVA_HOME is not set." - exit 1 -fi - -JAVA=$JAVA_HOME/bin/java -JAVA_HEAP_MAX=-Xmx4g - -# check envvars which might override default args -if [ "$MAHOUT_HEAPSIZE" != "" ]; then - #echo "run with heapsize $MAHOUT_HEAPSIZE" - JAVA_HEAP_MAX="-Xmx""$MAHOUT_HEAPSIZE""m" - #echo $JAVA_HEAP_MAX -fi - -if [ "x$MAHOUT_CONF_DIR" = "x" ]; then - if [ -d $MAHOUT_HOME/src/conf ]; then - MAHOUT_CONF_DIR=$MAHOUT_HOME/src/conf - else - if [ -d $MAHOUT_HOME/conf ]; then - MAHOUT_CONF_DIR=$MAHOUT_HOME/conf - else - echo No MAHOUT_CONF_DIR found - fi - fi -fi - - -# CLASSPATH initially contains $MAHOUT_CONF_DIR, or defaults to $MAHOUT_HOME/src/conf -CLASSPATH=${CLASSPATH}:$MAHOUT_CONF_DIR - -if [ "$MAHOUT_LOCAL" != "" ]; then - echo "MAHOUT_LOCAL is set, so we don't add HADOOP_CONF_DIR to classpath." -elif [ -n "$HADOOP_CONF_DIR" ] ; then - echo "MAHOUT_LOCAL is not set; adding HADOOP_CONF_DIR to classpath." - CLASSPATH=${CLASSPATH}:$HADOOP_CONF_DIR -fi - -CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar - -# so that filenames w/ spaces are handled correctly in loops below -IFS= - -if [ $IS_CORE == 0 ] -then - # add release dependencies to CLASSPATH - for f in $MAHOUT_HOME/lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f; - done - - if [ "$SPARK" != "1" ]; then - if [$SPARK_HOME == ""]; then - echo "Have you set SPARK_HOME ?" - fi - # add dev targets if they exist - for f in $MAHOUT_HOME/examples/target/mahout-examples-*-job.jar $MAHOUT_HOME/mahout-examples-*-job.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - fi - - # add scala dev target - for f in $MAHOUT_HOME/math-scala/target/mahout-math-scala_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - if [ "$H2O" == "1" ]; then - for f in $MAHOUT_HOME/hdfs/target/mahout-hdfs-*.jar; do - CLASSPATH=${CLASSPATH}:$f; - done - - for f in $MAHOUT_HOME/h2o/target/mahout-h2o*.jar; do - CLASSPATH=${CLASSPATH}:$f; - done - - fi - - # add jars for running from the command line if we requested shell or spark CLI driver - if [ "$SPARK" == "1" ]; then - - for f in $MAHOUT_HOME/lib/mahout-hdfs-*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - for f in $MAHOUT_HOME/lib/mahout-core-*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - for f in $MAHOUT_HOME/lib/spark_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - for f in $MAHOUT_HOME/lib/spark-cli_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - # viennacl jars- may or may not be available depending on build profile - for f in $MAHOUT_HOME/viennacl/target/mahout-native-viennacl_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - # viennacl jars- may or may not be available depending on build profile - for f in $MAHOUT_HOME/viennacl-omp/target/mahout-native-viennacl-omp_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - - SPARK_CP_BIN="${MAHOUT_HOME}/bin/compute-classpath.sh" - if [ -x "${SPARK_CP_BIN}" ]; then - SPARK_CLASSPATH=$("${SPARK_CP_BIN}" 2>/dev/null) - CLASSPATH="${CLASSPATH}:${SPARK_CLASSPATH}" - else - echo "Cannot find Spark classpath. Is 'SPARK_HOME' set?" - exit -1 - fi - - SPARK_ASSEMBLY_BIN="${MAHOUT_HOME}/bin/mahout-spark-class.sh" - if [ -x "${SPARK_ASSEMBLY_BIN}" ]; then - SPARK_ASSEMBLY_CLASSPATH=$("${SPARK_ASSEMBLY_BIN}" 2>/dev/null) - CLASSPATH="${CLASSPATH}:${SPARK_ASSEMBLY_BIN}" - else - echo "Cannot find Spark assembly classpath. Is 'SPARK_HOME' set?" - exit -1 - fi - fi - - # add vcl jars at any point. - # viennacl jars- may or may not be available depending on build profile - for f in $MAHOUT_HOME/viennacl/target/mahout-native-viennacl_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - # viennacl jars- may or may not be available depending on build profile - for f in $MAHOUT_HOME/viennacl-omp/target/mahout-native-viennacl-omp_*.jar ; do - CLASSPATH=${CLASSPATH}:$f; - done - - # add release dependencies to CLASSPATH - for f in $MAHOUT_HOME/lib/*.jar; do - CLASSPATH=${CLASSPATH}:$f; - done -else - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/math/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/hdfs/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/mr/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/integration/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/examples/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/math-scala/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/spark/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/spark-shell/target/classes - CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/h2o/target/classes -fi - -# add development dependencies to CLASSPATH -if [ "$SPARK" != "1" ]; then - for f in $MAHOUT_HOME/examples/target/dependency/*.jar; do - CLASSPATH=${CLASSPATH}:$f; - done -fi - - -# cygwin path translation -if $cygwin; then - CLASSPATH=`cygpath -p -w "$CLASSPATH"` -fi - -# restore ordinary behaviour -unset IFS -JARS=$(echo "$MAHOUT_HOME"/*.jar | tr ' ' ',') -case "$1" in - (spark-shell) - save_stty=$(stty -g 2>/dev/null); - $SPARK_HOME/bin/spark-shell --jars "$JARS" -i $MAHOUT_HOME/bin/load-shell.scala --conf spark.kryo.referenceTracking=false --conf spark.kryo.registrator=org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator --conf spark.kryoserializer.buffer=32k --conf spark.kryoserializer.buffer.max=600m --conf spark.serializer=org.apache.spark.serializer.KryoSerializer $@ - stty sane; stty $save_stty - ;; - # Spark CLI drivers go here - (spark-itemsimilarity) - shift - "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "org.apache.mahout.drivers.ItemSimilarityDriver" "$@" - ;; - (spark-rowsimilarity) - shift - "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "org.apache.mahout.drivers.RowSimilarityDriver" "$@" - ;; - (spark-trainnb) - shift - "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "org.apache.mahout.drivers.TrainNBDriver" "$@" - ;; - (spark-testnb) - shift - "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "org.apache.mahout.drivers.TestNBDriver" "$@" - ;; - - (h2o-node) - shift - "$JAVA" $JAVA_HEAP_MAX -classpath "$CLASSPATH" "water.H2O" -md5skip "$@" -name mah2out - ;; - (*) - - # default log directory & file - if [ "$MAHOUT_LOG_DIR" = "" ]; then - MAHOUT_LOG_DIR="$MAHOUT_HOME/logs" - fi - if [ "$MAHOUT_LOGFILE" = "" ]; then - MAHOUT_LOGFILE='mahout.log' - fi - - #Fix log path under cygwin - if $cygwin; then - MAHOUT_LOG_DIR=`cygpath -p -w "$MAHOUT_LOG_DIR"` - fi - - MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.dir=$MAHOUT_LOG_DIR" - MAHOUT_OPTS="$MAHOUT_OPTS -Dhadoop.log.file=$MAHOUT_LOGFILE" - - - if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then - MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH" - fi - - CLASS=org.apache.mahout.driver.MahoutDriver - - for f in $MAHOUT_HOME/examples/target/mahout-examples-*-job.jar $MAHOUT_HOME/mahout-examples-*-job.jar ; do - if [ -e "$f" ]; then - MAHOUT_JOB=$f - fi - done - - # run it - - HADOOP_BINARY=$(PATH="${HADOOP_HOME:-${HADOOP_PREFIX}}/bin:$PATH" which hadoop 2>/dev/null) - if [ -x "$HADOOP_BINARY" ] ; then - HADOOP_BINARY_CLASSPATH=$("$HADOOP_BINARY" classpath) - fi - if [ ! -x "$HADOOP_BINARY" ] || [ "$MAHOUT_LOCAL" != "" ] ; then - if [ ! -x "$HADOOP_BINARY" ] ; then - echo "hadoop binary is not in PATH,HADOOP_HOME/bin,HADOOP_PREFIX/bin, running locally" - elif [ "$MAHOUT_LOCAL" != "" ] ; then - echo "MAHOUT_LOCAL is set, running locally" - fi - CLASSPATH="${CLASSPATH}:${MAHOUT_HOME}/lib/hadoop/*" - case $1 in - (classpath) - echo $CLASSPATH - ;; - (*) - exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@" - esac - else - echo "Running on hadoop, using $HADOOP_BINARY and HADOOP_CONF_DIR=$HADOOP_CONF_DIR" - - if [ "$MAHOUT_JOB" = "" ] ; then - echo "ERROR: Could not find mahout-examples-*.job in $MAHOUT_HOME or $MAHOUT_HOME/examples/target, please run 'mvn install' to create the .job file" - exit 1 - else - case "$1" in - (hadoop) - shift - export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH}:$CLASSPATH - exec "$HADOOP_BINARY" "$@" - ;; - (classpath) - echo $CLASSPATH - ;; - (*) - echo "MAHOUT-JOB: $MAHOUT_JOB" - export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH} - exec "$HADOOP_BINARY" jar $MAHOUT_JOB $CLASS "$@" - esac - fi - fi - ;; -esac - http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/arff.vector.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/arff.vector.props b/community/mahout-mr/conf/arff.vector.props new file mode 100644 index 0000000..c8faebf --- /dev/null +++ b/community/mahout-mr/conf/arff.vector.props @@ -0,0 +1,9 @@ +# The following parameters must be specified +#d|input = /path/to/input +#o|output = /path/to/output +#t|dictOut = /path/to/dictionaryFileOrDirectory + +# The following parameters all have default values if not specified +#m|max = <Max number of vectors to output. Defaults to Long.MAX_VALUE> +#e|outputWriter <Defaults to 'seq' for SequenceFileVectorWriter> +#l|delimiter <Delimiter for outputing the dictionary. Defaults to '\t'> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/canopy.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/canopy.props b/community/mahout-mr/conf/canopy.props new file mode 100644 index 0000000..f79f1e9 --- /dev/null +++ b/community/mahout-mr/conf/canopy.props @@ -0,0 +1,14 @@ +# The following parameters must be specified +#i|input = /path/to/input +#o|output = /path/to/output +#t1|t1 = <T1 threshold value> +#t2|t2 = <T2 threshold value> + +# The following parameters all have default values if not specified +#ow|overwrite = <clear output directory if present> +#cl|clustering = <cluster points if present> +#dm|distance = <distance measure class name. Default: SquaredEuclideanDistanceMeasure> + + + + http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/cat.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/cat.props b/community/mahout-mr/conf/cat.props new file mode 100644 index 0000000..6b1ddb1 --- /dev/null +++ b/community/mahout-mr/conf/cat.props @@ -0,0 +1,4 @@ +#lambda|lambda = +#passes|passes = +#lambda|lambda = +#passes|passes = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/cleansvd.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/cleansvd.props b/community/mahout-mr/conf/cleansvd.props new file mode 100644 index 0000000..0c4e804 --- /dev/null +++ b/community/mahout-mr/conf/cleansvd.props @@ -0,0 +1,3 @@ +#ci|corpusInput = +#ei|eigenInput = +#o|output = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/clusterdump.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/clusterdump.props b/community/mahout-mr/conf/clusterdump.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/clusterpp.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/clusterpp.props b/community/mahout-mr/conf/clusterpp.props new file mode 100644 index 0000000..5b96a89 --- /dev/null +++ b/community/mahout-mr/conf/clusterpp.props @@ -0,0 +1,3 @@ +# The following parameters must be specified +#i|input = /path/to/initial/cluster/output +#o|output = /path/to/output http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/driver.classes.default.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/driver.classes.default.props b/community/mahout-mr/conf/driver.classes.default.props new file mode 100644 index 0000000..cb37efb --- /dev/null +++ b/community/mahout-mr/conf/driver.classes.default.props @@ -0,0 +1,69 @@ +#Utils +org.apache.mahout.utils.vectors.VectorDumper = vectordump : Dump vectors from a sequence file to text +org.apache.mahout.utils.clustering.ClusterDumper = clusterdump : Dump cluster output to text +org.apache.mahout.utils.SequenceFileDumper = seqdumper : Generic Sequence File dumper +org.apache.mahout.utils.vectors.lucene.Driver = lucene.vector : Generate Vectors from a Lucene index +org.apache.mahout.utils.vectors.arff.Driver = arff.vector : Generate Vectors from an ARFF file or directory +org.apache.mahout.utils.vectors.RowIdJob = rowid : Map SequenceFile<Text,VectorWritable> to {SequenceFile<IntWritable,VectorWritable>, SequenceFile<IntWritable,Text>} +org.apache.mahout.utils.SplitInput = split : Split Input data into test and train sets +org.apache.mahout.utils.MatrixDumper = matrixdump : Dump matrix in CSV format +org.apache.mahout.utils.regex.RegexConverterDriver = regexconverter : Convert text files on a per line basis based on regular expressions +org.apache.mahout.text.SequenceFilesFromDirectory = seqdirectory : Generate sequence files (of Text) from a directory +org.apache.mahout.vectorizer.SparseVectorsFromSequenceFiles = seq2sparse: Sparse Vector generation from Text sequence files +org.apache.mahout.vectorizer.EncodedVectorsFromSequenceFiles = seq2encoded: Encoded Sparse Vector generation from Text sequence files +org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file +org.apache.mahout.text.SequenceFilesFromMailArchives = seqmailarchives : Creates SequenceFile from a directory containing gzipped mail archives +org.apache.mahout.clustering.streaming.tools.ResplitSequenceFiles = resplit : Splits a set of SequenceFiles into a number of equal splits +org.apache.mahout.clustering.streaming.tools.ClusterQualitySummarizer = qualcluster : Runs clustering experiments and summarizes results in a CSV +org.apache.mahout.classifier.df.tools.Describe = describe : Describe the fields and target variable in a data set + +#Math +org.apache.mahout.math.hadoop.TransposeJob = transpose : Take the transpose of a matrix +org.apache.mahout.math.hadoop.MatrixMultiplicationJob = matrixmult : Take the product of two matrices +org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver = svd : Lanczos Singular Value Decomposition +org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob = cleansvd : Cleanup and verification of SVD output +org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob = rowsimilarity : Compute the pairwise similarities of the rows of a matrix +org.apache.mahout.math.hadoop.similarity.VectorDistanceSimilarityJob = vecdist : Compute the distances between a set of Vectors (or Cluster or Canopy, they must fit in memory) and a list of Vectors +org.apache.mahout.math.hadoop.stochasticsvd.SSVDCli = ssvd : Stochastic SVD + +#Clustering +org.apache.mahout.clustering.kmeans.KMeansDriver = kmeans : K-means clustering +org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver = fkmeans : Fuzzy K-means clustering +org.apache.mahout.clustering.lda.cvb.CVB0Driver = cvb : LDA via Collapsed Variation Bayes (0th deriv. approx) +org.apache.mahout.clustering.lda.cvb.InMemoryCollapsedVariationalBayes0 = cvb0_local : LDA via Collapsed Variation Bayes, in memory locally. +org.apache.mahout.clustering.canopy.CanopyDriver = canopy : Canopy clustering +org.apache.mahout.clustering.spectral.kmeans.SpectralKMeansDriver = spectralkmeans : Spectral k-means clustering +org.apache.mahout.clustering.topdown.postprocessor.ClusterOutputPostProcessorDriver = clusterpp : Groups Clustering Output In Clusters +org.apache.mahout.clustering.streaming.mapreduce.StreamingKMeansDriver = streamingkmeans : Streaming k-means clustering + +#Classification +#new bayes +org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb : Train the Vector-based Bayes classifier +org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb : Test the Vector-based Bayes classifier + +#SGD +org.apache.mahout.classifier.sgd.TrainLogistic = trainlogistic : Train a logistic regression using stochastic gradient descent +org.apache.mahout.classifier.sgd.RunLogistic = runlogistic : Run a logistic regression model against CSV data +org.apache.mahout.classifier.sgd.PrintResourceOrFile = cat : Print a file or resource as the logistic regression models would see it +org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic = trainAdaptiveLogistic : Train an AdaptivelogisticRegression model +org.apache.mahout.classifier.sgd.ValidateAdaptiveLogistic = validateAdaptiveLogistic : Validate an AdaptivelogisticRegression model against hold-out data set +org.apache.mahout.classifier.sgd.RunAdaptiveLogistic = runAdaptiveLogistic : Score new production data using a probably trained and validated AdaptivelogisticRegression model +#HMM +org.apache.mahout.classifier.sequencelearning.hmm.BaumWelchTrainer = baumwelch : Baum-Welch algorithm for unsupervised HMM training +org.apache.mahout.classifier.sequencelearning.hmm.ViterbiEvaluator = viterbi : Viterbi decoding of hidden states from given output states sequence +org.apache.mahout.classifier.sequencelearning.hmm.RandomSequenceGenerator = hmmpredict : Generate random sequence of observations by given HMM +#Classifier Utils +org.apache.mahout.classifier.ConfusionMatrixDumper = cmdump : Dump confusion matrix in HTML or text formats + +#Recommenders +org.apache.mahout.cf.taste.hadoop.als.DatasetSplitter = splitDataset : split a rating dataset into training and probe parts +org.apache.mahout.cf.taste.hadoop.als.FactorizationEvaluator = evaluateFactorization : compute RMSE and MAE of a rating matrix factorization against probes +org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob = itemsimilarity : Compute the item-item-similarities for item-based collaborative filtering +org.apache.mahout.cf.taste.hadoop.item.RecommenderJob = recommenditembased : Compute recommendations using item-based collaborative filtering +org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob = parallelALS : ALS-WR factorization of a rating matrix +org.apache.mahout.cf.taste.hadoop.als.RecommenderJob = recommendfactorized : Compute recommendations using the factorization of a rating matrix +prepare20newsgroups = deprecated : Try the new vector backed naivebayes classifier see examples/bin/classify-20newsgroups.sh +trainclassifier = deprecated : Try the new vector backed naivebayes classifier see examples/bin/classify-20newsgroups.sh +testclassifier = deprecated : Try the new vector backed naivebayes classifier see examples/bin/classify-20newsgroups.sh +lda = deprecated : Try the new Collapsed Variation Bayes LDA, try bin/mahout cvb or bin/mahout cvb0_local +ldatopics = deprecated : Try the new Collapsed Variation Bayes LDA, try bin/mahout cvb or bin/mahout cvb0_local http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/evaluateFactorization.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/evaluateFactorization.props b/community/mahout-mr/conf/evaluateFactorization.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/evaluateFactorizationParallel.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/evaluateFactorizationParallel.props b/community/mahout-mr/conf/evaluateFactorizationParallel.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/fkmeans.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/fkmeans.props b/community/mahout-mr/conf/fkmeans.props new file mode 100644 index 0000000..ad994d6 --- /dev/null +++ b/community/mahout-mr/conf/fkmeans.props @@ -0,0 +1,17 @@ +# The following parameters must be specified +#i|input = /path/to/input +#c|clusters = /path/to/initial/clusters +#o|output = /path/to/output +#m|m = <the coefficient normalization factor > 1.0> +#x|max = <the maximum number of iterations to attempt> + +# The following parameters all have default values if not specified +#ow|overwrite = <clear output directory if present> +#dm|distance = <distance measure class name. Default: SquaredEuclideanDistanceMeasure> +#cd|convergenceDelta = <the convergence threshold. Default: 0.5> +#u|numMap <the number of mapper tasks to launch. Default: 10> +#r|numReduce = <the number of reduce tasks to launch. Default: 2> +#cl|clustering = <cluster points if present> +#e|emitMostLikely = <emit most likely cluster if clustering. Default: true> +#t|threshold = <threshold if clustering and not emitMostLikely. Default: 0.0> +#rs|randomSeed =<Value to seed RNG with if set> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/flink-config.yaml ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/flink-config.yaml b/community/mahout-mr/conf/flink-config.yaml new file mode 100644 index 0000000..968cb04 --- /dev/null +++ b/community/mahout-mr/conf/flink-config.yaml @@ -0,0 +1,67 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + + +#============================================================================== +# Common +#============================================================================== + +# The number of task slots that each TaskManager offers. Each slot runs one parallel pipeline. + +taskmanager.numberOfTaskSlots: 1 + +# The parallelism used for programs that did not specify and other parallelism. + +parallelism.default: 4 + +#============================================================================== +# Advanced +#============================================================================== + +# The number of buffers for the network stack. +# +# taskmanager.network.numberOfBuffers: 2048 + + +# Directories for temporary files. +# +# Add a delimited list for multiple directories, using the system directory +# delimiter (colon ':' on unix) or a comma, e.g.: +# /data1/tmp:/data2/tmp:/data3/tmp +# +# Note: Each directory entry is read from and written to by a different I/O +# thread. You can include the same directory multiple times in order to create +# multiple I/O threads against that directory. This is for example relevant for +# high-throughput RAIDs. +# +# If not specified, the system-specific Java temporary directory (java.io.tmpdir +# property) is taken. +# +taskmanager.tmp.dirs: /tmp/mahoutcache + + +# Path to the Hadoop configuration directory. +# +# This configuration is used when writing into HDFS. Unless specified otherwise, +# HDFS file creation will use HDFS default settings with respect to block-size, +# replication factor, etc. +# +# You can also directly specify the paths to hdfs-default.xml and hdfs-site.xml +# via keys 'fs.hdfs.hdfsdefault' and 'fs.hdfs.hdfssite'. +# +# fs.hdfs.hadoopconf: /path/to/hadoop/conf/ http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/itemsimilarity.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/itemsimilarity.props b/community/mahout-mr/conf/itemsimilarity.props new file mode 100644 index 0000000..fdc3322 --- /dev/null +++ b/community/mahout-mr/conf/itemsimilarity.props @@ -0,0 +1,9 @@ +# The following parameters must be specified +#i|input = /path/to/input +#o|output = /path/to/output +#s|similarityClassname = <Name of distributed similarity class to instantiate> + +# The following parameters all have default values if not specified +#m|maxSimilaritiesPerItem = <try to cap the number of similar items per item to this number. Default: 100> +#mo|maxCooccurrencesPerItem = <try to cap the number of cooccurrences per item to this number. Default: 100> +#b|booleanData = <Treat input as without pref values. Default: false> http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/kmeans.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/kmeans.props b/community/mahout-mr/conf/kmeans.props new file mode 100644 index 0000000..1b54e80 --- /dev/null +++ b/community/mahout-mr/conf/kmeans.props @@ -0,0 +1,13 @@ +# The following parameters must be specified +#i|input = /path/to/input +#c|clusters = /path/to/initial/clusters +#o|output = /path/to/output +#x|max = <the maximum number of iterations to attempt> + +# The following parameters all have default values if not specified +#ow|overwrite = <clear output directory if present> +#cl|clustering = <cluster points if present> +#dm|distance = <distance measure class name. Default: SquaredEuclideanDistanceMeasure> +#cd|convergenceDelta = <the convergence threshold. Default: 0.5> +#r|numReduce = <the number of reduce tasks to launch. Default: 1> +#rs|randomSeed =<Value to seed RNG with if set> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/log4j.xml ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/log4j.xml b/community/mahout-mr/conf/log4j.xml new file mode 100644 index 0000000..6231b48 --- /dev/null +++ b/community/mahout-mr/conf/log4j.xml @@ -0,0 +1,15 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd"> +<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/"> + <appender class="org.apache.log4j.ConsoleAppender" name="console"> + <param value="System.err" name="target"/> + <layout class="org.apache.log4j.PatternLayout"> + <param value="%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n" name="ConversionPattern"/> + </layout> + </appender> + <!-- Change the level below to adjust logging levels. --> + <root> + <level value="info"/> + <appender-ref ref="console"/> + </root> +</log4j:configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/lucene.vector.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/lucene.vector.props b/community/mahout-mr/conf/lucene.vector.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/matrixmult.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/matrixmult.props b/community/mahout-mr/conf/matrixmult.props new file mode 100644 index 0000000..95218b3 --- /dev/null +++ b/community/mahout-mr/conf/matrixmult.props @@ -0,0 +1,6 @@ +#nra|numRowsA = +#nca|numColsA = +#nrb|numRowsB = +#ncb|numColsB = +#ia|inputPathA = +#ib|inputPathB = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/parallelALS.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/parallelALS.props b/community/mahout-mr/conf/parallelALS.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/predictFromFactorization.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/predictFromFactorization.props b/community/mahout-mr/conf/predictFromFactorization.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/recommendfactorized.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/recommendfactorized.props b/community/mahout-mr/conf/recommendfactorized.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/recommenditembased.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/recommenditembased.props b/community/mahout-mr/conf/recommenditembased.props new file mode 100644 index 0000000..68375d9 --- /dev/null +++ b/community/mahout-mr/conf/recommenditembased.props @@ -0,0 +1,14 @@ +# The following parameters must be specified +#i|input = /path/to/input +#o|output = /path/to/output + +# The following parameters all have default values if not specified +#n|numRecommendations = <Number of recommendations per user. Default: 10> +#u|usersFile = <File of users to recommend for. Default: null> +#i|itemsFile = <File of items to recommend for. Default: null> +#f|filterFile = <File containing comma-separated userID,itemID pairs. Used to exclude the item from the recommendations for that user. Default: null> +#b|booleanData = <Treat input as without pref values. Default: false> +#mp|maxPrefsPerUser = <Maximum number of preferences considered per user in final recommendation phase. Default: 10> +#m|maxSimilaritiesPerItem = <Maximum number of similarities considered per item. Default: 100> +#mo|maxCooccurrencesPerItem = <try to cap the number of cooccurrences per item to this number. Default: 100> +#s|similarityClassname = <Name of distributed similarity class to instantiate. Default: org.apache.mahout.math.hadoop.similarity.vector.DistributedCooccurrenceVectorSimilarity> http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/rowid.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/rowid.props b/community/mahout-mr/conf/rowid.props new file mode 100644 index 0000000..a1a040e --- /dev/null +++ b/community/mahout-mr/conf/rowid.props @@ -0,0 +1,2 @@ +#i|input = +#o|output = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/rowsimilarity.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/rowsimilarity.props b/community/mahout-mr/conf/rowsimilarity.props new file mode 100644 index 0000000..4774bcd --- /dev/null +++ b/community/mahout-mr/conf/rowsimilarity.props @@ -0,0 +1,8 @@ +# The following parameters must be specified +#i|input = /path/to/input +#o|output = /path/to/output +#r|numberOfColumns = <Number of columns in the input matrix> +#s|similarityClassname = <Name of distributed similarity class to instantiate> + +# The following parameters all have default values if not specified +#m|maxSimilaritiesPerRow = <Number of maximum similarities per row. Default: 100> http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/runlogistic.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/runlogistic.props b/community/mahout-mr/conf/runlogistic.props new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/community/mahout-mr/conf/runlogistic.props @@ -0,0 +1 @@ + \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/seq2sparse.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/seq2sparse.props b/community/mahout-mr/conf/seq2sparse.props new file mode 100644 index 0000000..a50a139 --- /dev/null +++ b/community/mahout-mr/conf/seq2sparse.props @@ -0,0 +1,15 @@ +#o|output = +#i|input = +#s|minSupport = +#a|analyzerName = +#chunk|chunkSize = +#md|minDF = +#x|maxDFPercent = +#wt|weight = +#n|norm = +#ml|minLLR = +#nr|numReducers = +#ng|maxNGramSize = +#w|overwrite = +#h|help = +#seq|sequentialAccessVector = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/seqdirectory.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/seqdirectory.props b/community/mahout-mr/conf/seqdirectory.props new file mode 100644 index 0000000..21c0d8f --- /dev/null +++ b/community/mahout-mr/conf/seqdirectory.props @@ -0,0 +1,3 @@ +#i|input = +#o|output = +#c|charset = http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/seqdumper.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/seqdumper.props b/community/mahout-mr/conf/seqdumper.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/seqwiki.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/seqwiki.props b/community/mahout-mr/conf/seqwiki.props new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/splitDataset.props ---------------------------------------------------------------------- diff --git a/community/mahout-mr/conf/splitDataset.props b/community/mahout-mr/conf/splitDataset.props new file mode 100644 index 0000000..e69de29
