Merge branch 'master' of https://github.com/apache/mahout
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/b391c765 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/b391c765 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/b391c765 Branch: refs/heads/master Commit: b391c76502a5294284fe761de86f952fd91434a5 Parents: 1cfa8ea eb70eb8 Author: Andrew Musselman <[email protected]> Authored: Tue Aug 16 22:47:14 2016 -0600 Committer: Andrew Musselman <[email protected]> Committed: Tue Aug 16 22:47:14 2016 -0600 ---------------------------------------------------------------------- .gitignore | 1 + .travis.yml | 39 + CHANGELOG | 6 +- LICENSE.txt | 198 +- NOTICE.txt | 14 +- README.md | 66 + README.txt | 18 - bin/compute-classpath.sh | 186 + bin/mahout | 10 +- bin/mahout-load-spark-env.sh | 40 + bin/mahout-spark-class.sh | 80 + buildtools/pom.xml | 4 +- conf/flink-config.yaml | 67 + distribution/pom.xml | 6 +- distribution/src/main/assembly/bin.xml | 20 +- doap_Mahout.rdf | 4 +- examples/bin/README.txt | 6 +- examples/bin/classify-20newsgroups.sh | 8 +- examples/bin/classify-wikipedia.sh | 25 +- examples/bin/cluster-reuters.sh | 10 +- examples/bin/cluster-syntheticcontrol.sh | 26 +- examples/bin/create-rf-data.sh | 36 - examples/bin/factorize-movielens-1M.sh | 9 +- examples/bin/factorize-netflix.sh | 6 +- examples/bin/resources/bank-full.csv | 45212 +++++++++++++++++ examples/bin/resources/cf-data-purchase.txt | 7 + examples/bin/resources/cf-data-view.txt | 12 + examples/bin/resources/donut-test.csv | 41 + examples/bin/resources/donut.csv | 41 + examples/bin/resources/test-data.csv | 61 + examples/bin/run-rf.sh | 89 - examples/pom.xml | 2 +- .../mahout/classifier/NewsgroupHelper.java | 3 +- .../mahout/classifier/df/BreimanExample.java | 224 - .../classifier/df/mapreduce/BuildForest.java | 258 - .../classifier/df/mapreduce/TestForest.java | 313 - flink/pom.xml | 197 + flink/src/main/resources/log4j.properties | 8 + .../mahout/flinkbindings/FlinkByteBCast.scala | 90 + .../flinkbindings/FlinkDistributedContext.scala | 50 + .../mahout/flinkbindings/FlinkEngine.scala | 416 + .../mahout/flinkbindings/blas/FlinkOpAewB.scala | 60 + .../flinkbindings/blas/FlinkOpAewScalar.scala | 106 + .../mahout/flinkbindings/blas/FlinkOpAt.scala | 72 + .../mahout/flinkbindings/blas/FlinkOpAtA.scala | 222 + .../mahout/flinkbindings/blas/FlinkOpAtB.scala | 88 + .../mahout/flinkbindings/blas/FlinkOpAx.scala | 96 + .../flinkbindings/blas/FlinkOpCBind.scala | 117 + .../flinkbindings/blas/FlinkOpMapBlock.scala | 47 + .../flinkbindings/blas/FlinkOpRBind.scala | 41 + .../flinkbindings/blas/FlinkOpRowRange.scala | 46 + .../blas/FlinkOpTimesRightMatrix.scala | 81 + .../mahout/flinkbindings/blas/package.scala | 125 + .../drm/CheckpointedFlinkDrm.scala | 302 + .../drm/CheckpointedFlinkDrmOps.scala | 34 + .../mahout/flinkbindings/drm/FlinkDrm.scala | 108 + .../mahout/flinkbindings/io/DrmMetadata.scala | 71 + .../flinkbindings/io/HDFSPathSearch.scala | 83 + .../mahout/flinkbindings/io/HDFSUtil.scala | 33 + .../flinkbindings/io/Hadoop2HDFSUtil.scala | 89 + .../apache/mahout/flinkbindings/package.scala | 114 + .../flinkbindings/DistributedFlinkSuite.scala | 74 + .../mahout/flinkbindings/DrmLikeOpsSuite.scala | 73 + .../flinkbindings/FlinkByteBCastSuite.scala | 38 + .../mahout/flinkbindings/RLikeOpsSuite.scala | 326 + .../mahout/flinkbindings/UseCasesSuite.scala | 141 + .../mahout/flinkbindings/blas/LATestSuite.scala | 211 + .../flinkbindings/examples/ReadCsvExample.scala | 39 + .../standard/DrmLikeOpsSuite.scala | 28 + .../flinkbindings/standard/DrmLikeSuite.scala | 28 + .../FlinkDistributedDecompositionsSuite.scala | 221 + .../standard/NaiveBayesTestSuite.scala | 29 + .../standard/RLikeDrmOpsSuite.scala | 28 + h2o/pom.xml | 2 +- .../apache/mahout/h2obindings/ops/AewUnary.java | 13 +- .../org/apache/mahout/h2obindings/ops/Atx.java | 2 +- .../mahout/h2obindings/ops/CbindScalar.java | 8 +- .../apache/mahout/h2o/common/DrmMetadata.scala | 14 +- .../apache/mahout/h2obindings/H2OEngine.scala | 67 +- .../h2obindings/drm/CheckpointedDrmH2O.scala | 9 +- .../mahout/h2obindings/ops/MapBlockHelper.scala | 2 +- hdfs/pom.xml | 10 +- integration/pom.xml | 2 +- .../mahout/text/LuceneIndexFileNameFilter.java | 62 - .../apache/mahout/text/LuceneIndexHelper.java | 41 - .../mahout/text/LuceneSegmentInputFormat.java | 80 - .../mahout/text/LuceneSegmentInputSplit.java | 107 - .../mahout/text/LuceneSegmentRecordReader.java | 103 - .../apache/mahout/text/LuceneSeqFileHelper.java | 54 - .../mahout/text/LuceneStorageConfiguration.java | 333 - .../text/MailArchivesClusteringAnalyzer.java | 31 +- .../text/ReadOnlyFileSystemDirectory.java | 355 - .../text/SequenceFilesFromLuceneStorage.java | 139 - .../SequenceFilesFromLuceneStorageDriver.java | 140 - .../SequenceFilesFromLuceneStorageMRJob.java | 66 - .../SequenceFilesFromLuceneStorageMapper.java | 83 - .../text/wikipedia/WikipediaAnalyzer.java | 17 +- .../mahout/utils/ConcatenateVectorsJob.java | 118 - .../mahout/utils/ConcatenateVectorsReducer.java | 102 - .../mahout/utils/regex/AnalyzerTransformer.java | 3 +- .../vectors/lucene/AbstractLuceneIterator.java | 2 +- .../utils/vectors/lucene/CachedTermInfo.java | 2 +- .../utils/vectors/lucene/ClusterLabels.java | 19 +- .../mahout/utils/vectors/lucene/Driver.java | 3 +- .../mahout/clustering/TestClusterDumper.java | 6 +- .../mahout/text/AbstractLuceneStorageTest.java | 107 - .../text/LuceneSegmentInputFormatTest.java | 85 - .../text/LuceneSegmentInputSplitTest.java | 88 - .../text/LuceneSegmentRecordReaderTest.java | 121 - .../text/LuceneStorageConfigurationTest.java | 49 - ...equenceFilesFromLuceneStorageDriverTest.java | 174 - ...SequenceFilesFromLuceneStorageMRJobTest.java | 87 - .../SequenceFilesFromLuceneStorageTest.java | 244 - .../mahout/utils/TestConcatenateVectorsJob.java | 99 - .../collocations/llr/BloomTokenFilterTest.java | 9 +- .../vectors/lucene/CachedTermInfoTest.java | 6 +- .../mahout/utils/vectors/lucene/DriverTest.java | 17 +- .../vectors/lucene/LuceneIterableTest.java | 8 +- math-scala/pom.xml | 32 +- .../classifier/naivebayes/NBClassifier.scala | 6 +- .../mahout/classifier/naivebayes/NBModel.scala | 2 - .../classifier/naivebayes/NaiveBayes.scala | 19 +- .../classifier/stats/ConfusionMatrix.scala | 15 +- .../common/io/GenericMatrixKryoSerializer.scala | 188 + .../mahout/common/io/VectorKryoSerializer.scala | 248 + .../apache/mahout/math/decompositions/ALS.scala | 9 +- .../apache/mahout/math/decompositions/DQR.scala | 7 +- .../mahout/math/decompositions/DSPCA.scala | 6 +- .../mahout/math/decompositions/DSSVD.scala | 12 +- .../org/apache/mahout/math/drm/CacheHint.scala | 17 + .../mahout/math/drm/CheckpointedDrm.scala | 12 +- .../mahout/math/drm/CheckpointedOps.scala | 5 +- .../mahout/math/drm/DistributedContext.scala | 2 +- .../mahout/math/drm/DistributedEngine.scala | 69 +- .../mahout/math/drm/DrmDoubleScalarOps.scala | 8 +- .../org/apache/mahout/math/drm/DrmLike.scala | 9 +- .../org/apache/mahout/math/drm/DrmLikeOps.scala | 29 +- .../apache/mahout/math/drm/RLikeDrmOps.scala | 27 +- .../math/drm/logical/AbstractBinaryOp.scala | 40 +- .../math/drm/logical/AbstractUnaryOp.scala | 7 +- .../math/drm/logical/CheckpointAction.scala | 4 +- .../apache/mahout/math/drm/logical/OpAB.scala | 8 +- .../mahout/math/drm/logical/OpABAnyKey.scala | 9 +- .../apache/mahout/math/drm/logical/OpABt.scala | 8 +- .../apache/mahout/math/drm/logical/OpAewB.scala | 10 +- .../mahout/math/drm/logical/OpAewScalar.scala | 8 +- .../math/drm/logical/OpAewUnaryFunc.scala | 11 +- .../math/drm/logical/OpAewUnaryFuncFusion.scala | 9 +- .../apache/mahout/math/drm/logical/OpAt.scala | 8 + .../apache/mahout/math/drm/logical/OpAtA.scala | 8 +- .../mahout/math/drm/logical/OpAtAnyKey.scala | 8 +- .../apache/mahout/math/drm/logical/OpAtB.scala | 8 +- .../apache/mahout/math/drm/logical/OpAtx.scala | 8 + .../apache/mahout/math/drm/logical/OpAx.scala | 8 +- .../mahout/math/drm/logical/OpCbind.scala | 10 +- .../mahout/math/drm/logical/OpCbindScalar.scala | 9 +- .../mahout/math/drm/logical/OpMapBlock.scala | 13 +- .../apache/mahout/math/drm/logical/OpPar.scala | 9 +- .../mahout/math/drm/logical/OpRbind.scala | 10 +- .../mahout/math/drm/logical/OpRowRange.scala | 8 + .../math/drm/logical/OpTimesLeftMatrix.scala | 10 +- .../math/drm/logical/OpTimesRightMatrix.scala | 9 +- .../org/apache/mahout/math/drm/package.scala | 75 +- .../mahout/math/indexeddataset/Schema.scala | 13 +- .../apache/mahout/math/scalabindings/MMul.scala | 53 +- .../mahout/math/scalabindings/MatrixOps.scala | 80 +- .../mahout/math/scalabindings/RLikeOps.scala | 2 +- .../math/scalabindings/RLikeVectorOps.scala | 2 +- .../mahout/math/scalabindings/VectorOps.scala | 2 +- .../mahout/math/scalabindings/package.scala | 84 +- .../DistributedDecompositionsSuiteBase.scala | 4 +- .../mahout/math/drm/DrmLikeOpsSuiteBase.scala | 20 +- .../mahout/math/drm/DrmLikeSuiteBase.scala | 1 - .../mahout/math/drm/RLikeDrmOpsSuiteBase.scala | 17 + .../mahout/math/scalabindings/MathSuite.scala | 50 +- .../math/scalabindings/MatrixOpsSuite.scala | 12 + .../scalabindings/RLikeMatrixOpsSuite.scala | 10 + .../scalabindings/RLikeVectorOpsSuite.scala | 40 +- .../math/scalabindings/VectorOpsSuite.scala | 11 + .../apache/mahout/nlp/tfidf/TFIDFtestBase.scala | 2 +- math/pom.xml | 8 +- .../org/apache/mahout/math/DenseVector.java | 99 +- .../java/org/apache/mahout/math/Matrices.java | 28 +- .../java/org/apache/mahout/math/MatrixView.java | 4 +- .../mahout/math/RandomAccessSparseVector.java | 159 +- .../org/apache/mahout/math/SparseMatrix.java | 59 +- .../java/org/apache/mahout/math/Vector.java | 14 +- .../mahout/math/VectorBinaryAggregate.java | 3 +- .../java/org/apache/mahout/math/VectorView.java | 4 +- .../apache/mahout/math/flavor/MatrixFlavor.java | 10 +- .../math/TestRandomAccessSparseVector.java | 2 +- .../java/org/apache/mahout/math/VectorTest.java | 13 +- mr/pom.xml | 10 +- .../cf/taste/impl/model/file/FileDataModel.java | 2 +- .../apache/mahout/classifier/df/Bagging.java | 1 + .../apache/mahout/classifier/df/DFUtils.java | 1 + .../mahout/classifier/df/DecisionForest.java | 1 + .../mahout/classifier/df/ErrorEstimate.java | 1 + .../df/builder/DecisionTreeBuilder.java | 1 + .../df/builder/DefaultTreeBuilder.java | 1 + .../classifier/df/builder/TreeBuilder.java | 1 + .../apache/mahout/classifier/df/data/Data.java | 1 + .../classifier/df/data/DataConverter.java | 1 + .../mahout/classifier/df/data/DataLoader.java | 1 + .../mahout/classifier/df/data/DataUtils.java | 1 + .../mahout/classifier/df/data/Dataset.java | 1 + .../classifier/df/data/DescriptorException.java | 1 + .../classifier/df/data/DescriptorUtils.java | 1 + .../mahout/classifier/df/data/Instance.java | 1 + .../df/data/conditions/Condition.java | 1 + .../classifier/df/data/conditions/Equals.java | 1 + .../df/data/conditions/GreaterOrEquals.java | 1 + .../classifier/df/data/conditions/Lesser.java | 1 + .../mahout/classifier/df/mapreduce/Builder.java | 1 + .../classifier/df/mapreduce/Classifier.java | 1 + .../classifier/df/mapreduce/MapredMapper.java | 1 + .../classifier/df/mapreduce/MapredOutput.java | 1 + .../df/mapreduce/inmem/InMemBuilder.java | 1 + .../df/mapreduce/inmem/InMemInputFormat.java | 1 + .../df/mapreduce/inmem/InMemMapper.java | 1 + .../df/mapreduce/partial/PartialBuilder.java | 1 + .../df/mapreduce/partial/Step1Mapper.java | 1 + .../classifier/df/mapreduce/partial/TreeID.java | 1 + .../classifier/df/node/CategoricalNode.java | 2 +- .../apache/mahout/classifier/df/node/Leaf.java | 1 + .../apache/mahout/classifier/df/node/Node.java | 1 + .../classifier/df/node/NumericalNode.java | 1 + .../classifier/df/ref/SequentialBuilder.java | 1 + .../classifier/df/split/DefaultIgSplit.java | 1 + .../mahout/classifier/df/split/IgSplit.java | 1 + .../mahout/classifier/df/split/OptIgSplit.java | 1 + .../classifier/df/split/RegressionSplit.java | 1 + .../mahout/classifier/df/split/Split.java | 1 + .../classifier/df/tools/ForestVisualizer.java | 1 + .../mahout/classifier/df/tools/Frequencies.java | 1 + .../classifier/df/tools/FrequenciesJob.java | 1 + .../classifier/df/tools/TreeVisualizer.java | 1 + .../mahout/classifier/df/tools/UDistrib.java | 1 + .../mahout/clustering/AbstractCluster.java | 2 +- .../mahout/common/lucene/AnalyzerUtils.java | 4 +- .../mahout/vectorizer/DictionaryVectorizer.java | 14 +- .../org/apache/mahout/vectorizer/TFIDF.java | 4 +- .../encoders/LuceneTextValueEncoder.java | 10 +- .../classifier/df/DecisionForestTest.java | 2 +- .../df/builder/DecisionTreeBuilderTest.java | 2 +- .../df/builder/DefaultTreeBuilderTest.java | 2 +- .../df/builder/InfiniteRecursionTest.java | 2 +- .../classifier/df/data/DataConverterTest.java | 2 +- .../classifier/df/data/DataLoaderTest.java | 2 +- .../mahout/classifier/df/data/DataTest.java | 2 +- .../mahout/classifier/df/data/DatasetTest.java | 2 +- .../classifier/df/data/DescriptorUtilsTest.java | 2 +- .../apache/mahout/classifier/df/data/Utils.java | 1 + .../mapreduce/inmem/InMemInputFormatTest.java | 2 +- .../df/mapreduce/inmem/InMemInputSplitTest.java | 2 +- .../mapreduce/partial/PartialBuilderTest.java | 2 +- .../df/mapreduce/partial/Step1MapperTest.java | 2 +- .../df/mapreduce/partial/TreeIDTest.java | 2 +- .../mahout/classifier/df/node/NodeTest.java | 2 +- .../classifier/df/split/DefaultIgSplitTest.java | 2 +- .../df/split/RegressionSplitTest.java | 2 +- .../classifier/df/tools/VisualizerTest.java | 2 +- .../apache/mahout/math/hadoop/MathHelper.java | 12 +- .../encoders/TextValueEncoderTest.java | 3 +- pom.xml | 67 +- runtests.sh | 44 + spark-shell/pom.xml | 3 +- .../sparkbindings/shell/MahoutSparkILoop.scala | 9 +- spark/pom.xml | 6 +- spark/src/main/assembly/dependency-reduced.xml | 1 + .../classifier/naivebayes/SparkNaiveBayes.scala | 5 +- .../org/apache/mahout/common/DrmMetadata.scala | 14 +- .../apache/mahout/common/Hadoop1HDFSUtil.scala | 8 +- .../mahout/drivers/MahoutSparkDriver.scala | 6 +- .../drivers/MahoutSparkOptionParser.scala | 6 +- .../mahout/drivers/RowSimilarityDriver.scala | 5 +- .../drivers/TextDelimitedReaderWriter.scala | 50 +- .../apache/mahout/drivers/TrainNBDriver.scala | 25 +- .../sparkbindings/SparkDistributedContext.scala | 2 +- .../mahout/sparkbindings/SparkEngine.scala | 220 +- .../apache/mahout/sparkbindings/blas/ABt.scala | 54 +- .../apache/mahout/sparkbindings/blas/AewB.scala | 53 +- .../mahout/sparkbindings/blas/AinCoreB.scala | 13 +- .../apache/mahout/sparkbindings/blas/At.scala | 2 +- .../apache/mahout/sparkbindings/blas/AtA.scala | 14 +- .../apache/mahout/sparkbindings/blas/AtB.scala | 37 +- .../apache/mahout/sparkbindings/blas/Ax.scala | 7 +- .../mahout/sparkbindings/blas/CbindAB.scala | 16 +- .../mahout/sparkbindings/blas/DrmRddOps.scala | 3 +- .../mahout/sparkbindings/blas/MapBlock.scala | 11 +- .../apache/mahout/sparkbindings/blas/Par.scala | 22 +- .../mahout/sparkbindings/blas/RbindAB.scala | 13 +- .../mahout/sparkbindings/blas/Slicing.scala | 2 +- .../mahout/sparkbindings/blas/package.scala | 28 +- .../drm/CheckpointedDrmSpark.scala | 62 +- .../drm/CheckpointedDrmSparkOps.scala | 6 +- .../mahout/sparkbindings/drm/DrmRddInput.scala | 7 +- .../mahout/sparkbindings/drm/package.scala | 54 +- .../io/GenericMatrixKryoSerializer.scala | 189 - .../io/MahoutKryoRegistrator.scala | 1 + .../sparkbindings/io/VectorKryoSerializer.scala | 252 - .../apache/mahout/sparkbindings/package.scala | 16 +- .../TextDelimitedReaderWriterSuite.scala | 53 + .../sparkbindings/SparkBindingsSuite.scala | 6 +- .../sparkbindings/drm/DrmLikeOpsSuite.scala | 32 +- .../mahout/sparkbindings/drm/DrmLikeSuite.scala | 4 +- .../mahout/sparkbindings/io/IOSuite.scala | 6 +- .../test/DistributedSparkSuite.scala | 12 +- src/conf/driver.classes.default.props | 4 - 309 files changed, 52317 insertions(+), 5288 deletions(-) ----------------------------------------------------------------------
