MAHOUT-1759: Deprecate Random Forests, this closes apache/mahout#173
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/1ffa3a46 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/1ffa3a46 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/1ffa3a46 Branch: refs/heads/master Commit: 1ffa3a4609f58f9144a69422b202ae53cd8ef6c9 Parents: 48bfb64 Author: smarthi <[email protected]> Authored: Fri Nov 6 00:15:09 2015 -0500 Committer: smarthi <[email protected]> Committed: Fri Nov 6 00:15:09 2015 -0500 ---------------------------------------------------------------------- examples/bin/README.txt | 1 - examples/bin/create-rf-data.sh | 36 --- examples/bin/run-rf.sh | 89 ------ .../mahout/classifier/df/BreimanExample.java | 224 ------------- .../classifier/df/mapreduce/BuildForest.java | 258 --------------- .../classifier/df/mapreduce/TestForest.java | 313 ------------------- .../apache/mahout/classifier/df/Bagging.java | 1 + .../apache/mahout/classifier/df/DFUtils.java | 1 + .../mahout/classifier/df/DecisionForest.java | 1 + .../mahout/classifier/df/ErrorEstimate.java | 1 + .../df/builder/DecisionTreeBuilder.java | 1 + .../df/builder/DefaultTreeBuilder.java | 1 + .../classifier/df/builder/TreeBuilder.java | 1 + .../apache/mahout/classifier/df/data/Data.java | 1 + .../classifier/df/data/DataConverter.java | 1 + .../mahout/classifier/df/data/DataLoader.java | 1 + .../mahout/classifier/df/data/DataUtils.java | 1 + .../mahout/classifier/df/data/Dataset.java | 1 + .../classifier/df/data/DescriptorException.java | 1 + .../classifier/df/data/DescriptorUtils.java | 1 + .../mahout/classifier/df/data/Instance.java | 1 + .../df/data/conditions/Condition.java | 1 + .../classifier/df/data/conditions/Equals.java | 1 + .../df/data/conditions/GreaterOrEquals.java | 1 + .../classifier/df/data/conditions/Lesser.java | 1 + .../mahout/classifier/df/mapreduce/Builder.java | 1 + .../classifier/df/mapreduce/Classifier.java | 1 + .../classifier/df/mapreduce/MapredMapper.java | 1 + .../classifier/df/mapreduce/MapredOutput.java | 1 + .../df/mapreduce/inmem/InMemBuilder.java | 1 + .../df/mapreduce/inmem/InMemInputFormat.java | 1 + .../df/mapreduce/inmem/InMemMapper.java | 1 + .../df/mapreduce/partial/PartialBuilder.java | 1 + .../df/mapreduce/partial/Step1Mapper.java | 1 + .../classifier/df/mapreduce/partial/TreeID.java | 1 + .../classifier/df/node/CategoricalNode.java | 2 +- .../apache/mahout/classifier/df/node/Leaf.java | 1 + .../apache/mahout/classifier/df/node/Node.java | 1 + .../classifier/df/node/NumericalNode.java | 1 + .../classifier/df/ref/SequentialBuilder.java | 1 + .../classifier/df/split/DefaultIgSplit.java | 1 + .../mahout/classifier/df/split/IgSplit.java | 1 + .../mahout/classifier/df/split/OptIgSplit.java | 1 + .../classifier/df/split/RegressionSplit.java | 1 + .../mahout/classifier/df/split/Split.java | 1 + .../classifier/df/tools/ForestVisualizer.java | 1 + .../mahout/classifier/df/tools/Frequencies.java | 1 + .../classifier/df/tools/FrequenciesJob.java | 1 + .../classifier/df/tools/TreeVisualizer.java | 1 + .../mahout/classifier/df/tools/UDistrib.java | 1 + .../classifier/df/DecisionForestTest.java | 2 +- .../df/builder/DecisionTreeBuilderTest.java | 2 +- .../df/builder/DefaultTreeBuilderTest.java | 2 +- .../df/builder/InfiniteRecursionTest.java | 2 +- .../classifier/df/data/DataConverterTest.java | 2 +- .../classifier/df/data/DataLoaderTest.java | 2 +- .../mahout/classifier/df/data/DataTest.java | 2 +- .../mahout/classifier/df/data/DatasetTest.java | 2 +- .../classifier/df/data/DescriptorUtilsTest.java | 2 +- .../apache/mahout/classifier/df/data/Utils.java | 1 + .../mapreduce/inmem/InMemInputFormatTest.java | 2 +- .../df/mapreduce/inmem/InMemInputSplitTest.java | 2 +- .../mapreduce/partial/PartialBuilderTest.java | 2 +- .../df/mapreduce/partial/Step1MapperTest.java | 2 +- .../df/mapreduce/partial/TreeIDTest.java | 2 +- .../mahout/classifier/df/node/NodeTest.java | 2 +- .../classifier/df/split/DefaultIgSplitTest.java | 2 +- .../df/split/RegressionSplitTest.java | 2 +- .../classifier/df/tools/VisualizerTest.java | 2 +- src/conf/driver.classes.default.props | 2 - 70 files changed, 63 insertions(+), 942 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/README.txt ---------------------------------------------------------------------- diff --git a/examples/bin/README.txt b/examples/bin/README.txt index f47ab44..503a687 100644 --- a/examples/bin/README.txt +++ b/examples/bin/README.txt @@ -7,5 +7,4 @@ cluster-reuters.sh -- Cluster the Reuters data set using a variety of algorithms cluster-syntheticcontrol.sh -- Cluster the Synthetic Control data set. Downloads the data set automatically. factorize-movielens-1m.sh -- Run the Alternating Least Squares Recommender on the Grouplens data set (size 1M). factorize-netflix.sh -- (Deprecated due to lack of availability of the data set) Run the ALS Recommender on the Netflix data set. -run-rf.sh -- Create some synthetic data, build a random forest, and test performance. spark-document-classifier.mscala -- A mahout-shell script which trains and tests a Naive Bayes model on the Wikipedia XML dump and defines simple methods to classify new text. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/create-rf-data.sh ---------------------------------------------------------------------- diff --git a/examples/bin/create-rf-data.sh b/examples/bin/create-rf-data.sh deleted file mode 100755 index 6e72829..0000000 --- a/examples/bin/create-rf-data.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# Create synthetic data set with four numeric fields and a boolean label. -# -# Requires scala, and is run from run-rf.sh. - -exec scala "$0" "$@" -!# -val r = new scala.util.Random() -val pw = new java.io.PrintWriter(args(1)) -val numRows = args(0).toInt -(1 to numRows).foreach(e => - pw.println(r.nextDouble() + "," + - r.nextDouble() + "," + - r.nextDouble() + "," + - r.nextDouble() + "," + - (if (r.nextBoolean()) 1 else 0)) -) -pw.close() - http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/bin/run-rf.sh ---------------------------------------------------------------------- diff --git a/examples/bin/run-rf.sh b/examples/bin/run-rf.sh deleted file mode 100755 index e52a3b9..0000000 --- a/examples/bin/run-rf.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# Requires scala, and for $HADOOP_HOME to be set. -# -# Creates test data for random forest classifier, splits data into train -# and test sets, trains the classifier on the train set, then tests -# model on test set. -# -# To run: change into the mahout directory and type: -# ./examples/bin/run-rf.sh <num-rows> - - -if [ $# -ne 1 ] -then - echo -e "\nThis script takes one parameter, the number of rows of random data to generate.\n" - echo -e "Syntax: $0 <number-of-rows-of-sample-data> \n" - exit -1 -fi - -WORK_DIR=/tmp/mahout-work-${USER} -INPUT="${WORK_DIR}/input" -mkdir -p $INPUT -INPUT_PATH="${INPUT}/rf-input.csv" - -# Set commands for dfs -source ./examples/bin/set-dfs-commands.sh - -# Create test data -numrows=$1 -echo "Writing random data to $INPUT_PATH" -./examples/bin/create-rf-data.sh $numrows $INPUT_PATH - -# Put the test file in HDFS -if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] ; then - echo "Copying random data to HDFS" - set +e - $DFSRM $WORK_DIR - $DFS -mkdir -p $INPUT - set -e - $DFS -put $INPUT_PATH $INPUT -fi - -# Split original file into train and test -echo "Creating training and holdout set with a random 60-40 split of the generated vector dataset" -./bin/mahout split \ - -i $INPUT \ - --trainingOutput ${WORK_DIR}/train.csv \ - --testOutput ${WORK_DIR}/test.csv \ - --randomSelectionPct 40 --overwrite -xm sequential - -# Describe input file schema -# Note: "-d 4 N L" indicates four numerical fields and one label, as built by the step above. -./bin/mahout describe -p $INPUT_PATH -f ${WORK_DIR}/info -d 4 N L - -# Train rf model -echo -echo "Training random forest." -echo -./bin/mahout buildforest -DXmx10000m -Dmapred.max.split.size=1000000 -d ${WORK_DIR}/train.csv -ds ${WORK_DIR}/info -sl 7 -p -t 500 -o ${WORK_DIR}/forest - -# Test predictions -echo -echo "Testing predictions on test set." -echo -./bin/mahout testforest -DXmx10000m -Dmapred.output.compress=false -i ${WORK_DIR}/test.csv -ds ${WORK_DIR}/info -m ${WORK_DIR}/forest -a -mr -o ${WORK_DIR}/predictions - -# Remove old files -if [ "$HADOOP_HOME" != "" ] && [ "$MAHOUT_LOCAL" == "" ] -then - $DFSRM $WORK_DIR -fi -rm -r $WORK_DIR - http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java b/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java deleted file mode 100644 index 8d2c1cd..0000000 --- a/examples/src/main/java/org/apache/mahout/classifier/df/BreimanExample.java +++ /dev/null @@ -1,224 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.df; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Random; - -import org.apache.commons.cli2.CommandLine; -import org.apache.commons.cli2.Group; -import org.apache.commons.cli2.Option; -import org.apache.commons.cli2.OptionException; -import org.apache.commons.cli2.builder.ArgumentBuilder; -import org.apache.commons.cli2.builder.DefaultOptionBuilder; -import org.apache.commons.cli2.builder.GroupBuilder; -import org.apache.commons.cli2.commandline.Parser; -import org.apache.commons.math3.util.FastMath; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.mahout.common.CommandLineUtil; -import org.apache.mahout.common.RandomUtils; -import org.apache.mahout.classifier.df.builder.DefaultTreeBuilder; -import org.apache.mahout.classifier.df.data.Data; -import org.apache.mahout.classifier.df.data.DataLoader; -import org.apache.mahout.classifier.df.data.Dataset; -import org.apache.mahout.classifier.df.ref.SequentialBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Test procedure as described in Breiman's paper.<br> - * <b>Leo Breiman: Random Forests. Machine Learning 45(1): 5-32 (2001)</b> - */ -public class BreimanExample extends Configured implements Tool { - - private static final Logger log = LoggerFactory.getLogger(BreimanExample.class); - - /** sum test error */ - private double sumTestErrM; - - private double sumTestErrOne; - - /** mean time to build a forest with m=log2(M)+1 */ - private long sumTimeM; - - /** mean time to build a forest with m=1 */ - private long sumTimeOne; - - /** mean number of nodes for all the trees grown with m=log2(M)+1 */ - private long numNodesM; - - /** mean number of nodes for all the trees grown with m=1 */ - private long numNodesOne; - - /** - * runs one iteration of the procedure. - * - * @param rng - * random numbers generator - * @param data - * training data - * @param m - * number of random variables to select at each tree-node - * @param nbtrees - * number of trees to grow - */ - private void runIteration(Random rng, Data data, int m, int nbtrees) { - - log.info("Splitting the data"); - Data train = data.clone(); - Data test = train.rsplit(rng, (int) (data.size() * 0.1)); - - DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder(); - - SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder, train); - - // grow a forest with m = log2(M)+1 - treeBuilder.setM(m); - - long time = System.currentTimeMillis(); - log.info("Growing a forest with m={}", m); - DecisionForest forestM = forestBuilder.build(nbtrees); - sumTimeM += System.currentTimeMillis() - time; - numNodesM += forestM.nbNodes(); - - // grow a forest with m=1 - treeBuilder.setM(1); - - time = System.currentTimeMillis(); - log.info("Growing a forest with m=1"); - DecisionForest forestOne = forestBuilder.build(nbtrees); - sumTimeOne += System.currentTimeMillis() - time; - numNodesOne += forestOne.nbNodes(); - - // compute the test set error (Selection Error), and mean tree error (One Tree Error), - double[] testLabels = test.extractLabels(); - double[][] predictions = new double[test.size()][]; - - forestM.classify(test, predictions); - double[] sumPredictions = new double[test.size()]; - Arrays.fill(sumPredictions, 0.0); - for (int i = 0; i < predictions.length; i++) { - for (int j = 0; j < predictions[i].length; j++) { - sumPredictions[i] += predictions[i][j]; - } - } - sumTestErrM += ErrorEstimate.errorRate(testLabels, sumPredictions); - - forestOne.classify(test, predictions); - Arrays.fill(sumPredictions, 0.0); - for (int i = 0; i < predictions.length; i++) { - for (int j = 0; j < predictions[i].length; j++) { - sumPredictions[i] += predictions[i][j]; - } - } - sumTestErrOne += ErrorEstimate.errorRate(testLabels, sumPredictions); - } - - public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new BreimanExample(), args); - } - - @Override - public int run(String[] args) throws IOException { - - DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); - ArgumentBuilder abuilder = new ArgumentBuilder(); - GroupBuilder gbuilder = new GroupBuilder(); - - Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true).withArgument( - abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("Data path").create(); - - Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true).withArgument( - abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()).withDescription("Dataset path") - .create(); - - Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true).withArgument( - abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()).withDescription( - "Number of trees to grow, each iteration").create(); - - Option nbItersOpt = obuilder.withLongName("iterations").withShortName("i").withRequired(true) - .withArgument(abuilder.withName("numIterations").withMinimum(1).withMaximum(1).create()) - .withDescription("Number of times to repeat the test").create(); - - Option helpOpt = obuilder.withLongName("help").withDescription("Print out help").withShortName("h") - .create(); - - Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt).withOption( - nbItersOpt).withOption(nbtreesOpt).withOption(helpOpt).create(); - - Path dataPath; - Path datasetPath; - int nbTrees; - int nbIterations; - - try { - Parser parser = new Parser(); - parser.setGroup(group); - CommandLine cmdLine = parser.parse(args); - - if (cmdLine.hasOption("help")) { - CommandLineUtil.printHelp(group); - return -1; - } - - String dataName = cmdLine.getValue(dataOpt).toString(); - String datasetName = cmdLine.getValue(datasetOpt).toString(); - nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); - nbIterations = Integer.parseInt(cmdLine.getValue(nbItersOpt).toString()); - - dataPath = new Path(dataName); - datasetPath = new Path(datasetName); - } catch (OptionException e) { - log.error("Error while parsing options", e); - CommandLineUtil.printHelp(group); - return -1; - } - - // load the data - FileSystem fs = dataPath.getFileSystem(new Configuration()); - Dataset dataset = Dataset.load(getConf(), datasetPath); - Data data = DataLoader.loadData(dataset, fs, dataPath); - - // take m to be the first integer less than log2(M) + 1, where M is the - // number of inputs - int m = (int) Math.floor(FastMath.log(2.0, data.getDataset().nbAttributes()) + 1); - - Random rng = RandomUtils.getRandom(); - for (int iteration = 0; iteration < nbIterations; iteration++) { - log.info("Iteration {}", iteration); - runIteration(rng, data, m, nbTrees); - } - - log.info("********************************************"); - log.info("Random Input Test Error : {}", sumTestErrM / nbIterations); - log.info("Single Input Test Error : {}", sumTestErrOne / nbIterations); - log.info("Mean Random Input Time : {}", DFUtils.elapsedTime(sumTimeM / nbIterations)); - log.info("Mean Single Input Time : {}", DFUtils.elapsedTime(sumTimeOne / nbIterations)); - log.info("Mean Random Input Num Nodes : {}", numNodesM / nbIterations); - log.info("Mean Single Input Num Nodes : {}", numNodesOne / nbIterations); - - return 0; - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java b/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java deleted file mode 100644 index d945f39..0000000 --- a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.df.mapreduce; - -import java.io.IOException; - -import org.apache.commons.cli2.CommandLine; -import org.apache.commons.cli2.Group; -import org.apache.commons.cli2.Option; -import org.apache.commons.cli2.OptionException; -import org.apache.commons.cli2.builder.ArgumentBuilder; -import org.apache.commons.cli2.builder.DefaultOptionBuilder; -import org.apache.commons.cli2.builder.GroupBuilder; -import org.apache.commons.cli2.commandline.Parser; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.mahout.common.CommandLineUtil; -import org.apache.mahout.classifier.df.DFUtils; -import org.apache.mahout.classifier.df.DecisionForest; -import org.apache.mahout.classifier.df.builder.DecisionTreeBuilder; -import org.apache.mahout.classifier.df.data.Data; -import org.apache.mahout.classifier.df.data.DataLoader; -import org.apache.mahout.classifier.df.data.Dataset; -import org.apache.mahout.classifier.df.mapreduce.inmem.InMemBuilder; -import org.apache.mahout.classifier.df.mapreduce.partial.PartialBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Tool to builds a Random Forest using any given dataset (in UCI format). Can use either the in-mem mapred or - * partial mapred implementations. Stores the forest in the given output directory - */ -public class BuildForest extends Configured implements Tool { - - private static final Logger log = LoggerFactory.getLogger(BuildForest.class); - - private Path dataPath; - - private Path datasetPath; - - private Path outputPath; - - private Integer m; // Number of variables to select at each tree-node - - private boolean complemented; // tree is complemented - - private Integer minSplitNum; // minimum number for split - - private Double minVarianceProportion; // minimum proportion of the total variance for split - - private int nbTrees; // Number of trees to grow - - private Long seed; // Random seed - - private boolean isPartial; // use partial data implementation - - @Override - public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { - - DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); - ArgumentBuilder abuilder = new ArgumentBuilder(); - GroupBuilder gbuilder = new GroupBuilder(); - - Option dataOpt = obuilder.withLongName("data").withShortName("d").withRequired(true) - .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) - .withDescription("Data path").create(); - - Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true) - .withArgument(abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()) - .withDescription("Dataset path").create(); - - Option selectionOpt = obuilder.withLongName("selection").withShortName("sl").withRequired(false) - .withArgument(abuilder.withName("m").withMinimum(1).withMaximum(1).create()) - .withDescription("Optional, Number of variables to select randomly at each tree-node.\n" - + "For classification problem, the default is square root of the number of explanatory variables.\n" - + "For regression problem, the default is 1/3 of the number of explanatory variables.").create(); - - Option noCompleteOpt = obuilder.withLongName("no-complete").withShortName("nc").withRequired(false) - .withDescription("Optional, The tree is not complemented").create(); - - Option minSplitOpt = obuilder.withLongName("minsplit").withShortName("ms").withRequired(false) - .withArgument(abuilder.withName("minsplit").withMinimum(1).withMaximum(1).create()) - .withDescription("Optional, The tree-node is not divided, if the branching data size is " - + "smaller than this value.\nThe default is 2.").create(); - - Option minPropOpt = obuilder.withLongName("minprop").withShortName("mp").withRequired(false) - .withArgument(abuilder.withName("minprop").withMinimum(1).withMaximum(1).create()) - .withDescription("Optional, The tree-node is not divided, if the proportion of the " - + "variance of branching data is smaller than this value.\n" - + "In the case of a regression problem, this value is used. " - + "The default is 1/1000(0.001).").create(); - - Option seedOpt = obuilder.withLongName("seed").withShortName("sd").withRequired(false) - .withArgument(abuilder.withName("seed").withMinimum(1).withMaximum(1).create()) - .withDescription("Optional, seed value used to initialise the Random number generator").create(); - - Option partialOpt = obuilder.withLongName("partial").withShortName("p").withRequired(false) - .withDescription("Optional, use the Partial Data implementation").create(); - - Option nbtreesOpt = obuilder.withLongName("nbtrees").withShortName("t").withRequired(true) - .withArgument(abuilder.withName("nbtrees").withMinimum(1).withMaximum(1).create()) - .withDescription("Number of trees to grow").create(); - - Option outputOpt = obuilder.withLongName("output").withShortName("o").withRequired(true) - .withArgument(abuilder.withName("path").withMinimum(1).withMaximum(1).create()) - .withDescription("Output path, will contain the Decision Forest").create(); - - Option helpOpt = obuilder.withLongName("help").withShortName("h") - .withDescription("Print out help").create(); - - Group group = gbuilder.withName("Options").withOption(dataOpt).withOption(datasetOpt) - .withOption(selectionOpt).withOption(noCompleteOpt).withOption(minSplitOpt) - .withOption(minPropOpt).withOption(seedOpt).withOption(partialOpt).withOption(nbtreesOpt) - .withOption(outputOpt).withOption(helpOpt).create(); - - try { - Parser parser = new Parser(); - parser.setGroup(group); - CommandLine cmdLine = parser.parse(args); - - if (cmdLine.hasOption("help")) { - CommandLineUtil.printHelp(group); - return -1; - } - - isPartial = cmdLine.hasOption(partialOpt); - String dataName = cmdLine.getValue(dataOpt).toString(); - String datasetName = cmdLine.getValue(datasetOpt).toString(); - String outputName = cmdLine.getValue(outputOpt).toString(); - nbTrees = Integer.parseInt(cmdLine.getValue(nbtreesOpt).toString()); - - if (cmdLine.hasOption(selectionOpt)) { - m = Integer.parseInt(cmdLine.getValue(selectionOpt).toString()); - } - complemented = !cmdLine.hasOption(noCompleteOpt); - if (cmdLine.hasOption(minSplitOpt)) { - minSplitNum = Integer.parseInt(cmdLine.getValue(minSplitOpt).toString()); - } - if (cmdLine.hasOption(minPropOpt)) { - minVarianceProportion = Double.parseDouble(cmdLine.getValue(minPropOpt).toString()); - } - if (cmdLine.hasOption(seedOpt)) { - seed = Long.valueOf(cmdLine.getValue(seedOpt).toString()); - } - - if (log.isDebugEnabled()) { - log.debug("data : {}", dataName); - log.debug("dataset : {}", datasetName); - log.debug("output : {}", outputName); - log.debug("m : {}", m); - log.debug("complemented : {}", complemented); - log.debug("minSplitNum : {}", minSplitNum); - log.debug("minVarianceProportion : {}", minVarianceProportion); - log.debug("seed : {}", seed); - log.debug("nbtrees : {}", nbTrees); - log.debug("isPartial : {}", isPartial); - } - - dataPath = new Path(dataName); - datasetPath = new Path(datasetName); - outputPath = new Path(outputName); - - } catch (OptionException e) { - log.error("Exception", e); - CommandLineUtil.printHelp(group); - return -1; - } - - buildForest(); - - return 0; - } - - private void buildForest() throws IOException, ClassNotFoundException, InterruptedException { - // make sure the output path does not exist - FileSystem ofs = outputPath.getFileSystem(getConf()); - if (ofs.exists(outputPath)) { - log.error("Output path already exists"); - return; - } - - DecisionTreeBuilder treeBuilder = new DecisionTreeBuilder(); - if (m != null) { - treeBuilder.setM(m); - } - treeBuilder.setComplemented(complemented); - if (minSplitNum != null) { - treeBuilder.setMinSplitNum(minSplitNum); - } - if (minVarianceProportion != null) { - treeBuilder.setMinVarianceProportion(minVarianceProportion); - } - - Builder forestBuilder; - - if (isPartial) { - log.info("Partial Mapred implementation"); - forestBuilder = new PartialBuilder(treeBuilder, dataPath, datasetPath, seed, getConf()); - } else { - log.info("InMem Mapred implementation"); - forestBuilder = new InMemBuilder(treeBuilder, dataPath, datasetPath, seed, getConf()); - } - - forestBuilder.setOutputDirName(outputPath.getName()); - - log.info("Building the forest..."); - long time = System.currentTimeMillis(); - - DecisionForest forest = forestBuilder.build(nbTrees); - if (forest == null) { - return; - } - - time = System.currentTimeMillis() - time; - log.info("Build Time: {}", DFUtils.elapsedTime(time)); - log.info("Forest num Nodes: {}", forest.nbNodes()); - log.info("Forest mean num Nodes: {}", forest.meanNbNodes()); - log.info("Forest mean max Depth: {}", forest.meanMaxDepth()); - - // store the decision forest in the output path - Path forestPath = new Path(outputPath, "forest.seq"); - log.info("Storing the forest in: {}", forestPath); - DFUtils.storeWritable(getConf(), forestPath, forest); - } - - protected static Data loadData(Configuration conf, Path dataPath, Dataset dataset) throws IOException { - log.info("Loading the data..."); - FileSystem fs = dataPath.getFileSystem(conf); - Data data = DataLoader.loadData(dataset, fs, dataPath); - log.info("Data Loaded"); - - return data; - } - - public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new BuildForest(), args); - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java ---------------------------------------------------------------------- diff --git a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java b/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java deleted file mode 100644 index db39215..0000000 --- a/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.classifier.df.mapreduce; - -import org.apache.commons.cli2.CommandLine; -import org.apache.commons.cli2.Group; -import org.apache.commons.cli2.Option; -import org.apache.commons.cli2.OptionException; -import org.apache.commons.cli2.builder.ArgumentBuilder; -import org.apache.commons.cli2.builder.DefaultOptionBuilder; -import org.apache.commons.cli2.builder.GroupBuilder; -import org.apache.commons.cli2.commandline.Parser; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.mahout.classifier.ClassifierResult; -import org.apache.mahout.classifier.RegressionResultAnalyzer; -import org.apache.mahout.classifier.ResultAnalyzer; -import org.apache.mahout.classifier.df.DFUtils; -import org.apache.mahout.classifier.df.DecisionForest; -import org.apache.mahout.classifier.df.data.DataConverter; -import org.apache.mahout.classifier.df.data.Dataset; -import org.apache.mahout.classifier.df.data.Instance; -import org.apache.mahout.common.CommandLineUtil; -import org.apache.mahout.common.RandomUtils; -import org.apache.mahout.common.commandline.DefaultOptionCreator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Random; -import java.util.Scanner; - -/** - * Tool to classify a Dataset using a previously built Decision Forest - */ -public class TestForest extends Configured implements Tool { - - private static final Logger log = LoggerFactory.getLogger(TestForest.class); - - private FileSystem dataFS; - private Path dataPath; // test data path - - private Path datasetPath; - - private Path modelPath; // path where the forest is stored - - private FileSystem outFS; - private Path outputPath; // path to predictions file, if null do not output the predictions - - private boolean analyze; // analyze the classification results ? - - private boolean useMapreduce; // use the mapreduce classifier ? - - @Override - public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException { - - DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); - ArgumentBuilder abuilder = new ArgumentBuilder(); - GroupBuilder gbuilder = new GroupBuilder(); - - Option inputOpt = DefaultOptionCreator.inputOption().create(); - - Option datasetOpt = obuilder.withLongName("dataset").withShortName("ds").withRequired(true).withArgument( - abuilder.withName("dataset").withMinimum(1).withMaximum(1).create()).withDescription("Dataset path") - .create(); - - Option modelOpt = obuilder.withLongName("model").withShortName("m").withRequired(true).withArgument( - abuilder.withName("path").withMinimum(1).withMaximum(1).create()). - withDescription("Path to the Decision Forest").create(); - - Option outputOpt = DefaultOptionCreator.outputOption().create(); - - Option analyzeOpt = obuilder.withLongName("analyze").withShortName("a").withRequired(false).create(); - - Option mrOpt = obuilder.withLongName("mapreduce").withShortName("mr").withRequired(false).create(); - - Option helpOpt = DefaultOptionCreator.helpOption(); - - Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(datasetOpt).withOption(modelOpt) - .withOption(outputOpt).withOption(analyzeOpt).withOption(mrOpt).withOption(helpOpt).create(); - - try { - Parser parser = new Parser(); - parser.setGroup(group); - CommandLine cmdLine = parser.parse(args); - - if (cmdLine.hasOption("help")) { - CommandLineUtil.printHelp(group); - return -1; - } - - String dataName = cmdLine.getValue(inputOpt).toString(); - String datasetName = cmdLine.getValue(datasetOpt).toString(); - String modelName = cmdLine.getValue(modelOpt).toString(); - String outputName = cmdLine.hasOption(outputOpt) ? cmdLine.getValue(outputOpt).toString() : null; - analyze = cmdLine.hasOption(analyzeOpt); - useMapreduce = cmdLine.hasOption(mrOpt); - - if (log.isDebugEnabled()) { - log.debug("inout : {}", dataName); - log.debug("dataset : {}", datasetName); - log.debug("model : {}", modelName); - log.debug("output : {}", outputName); - log.debug("analyze : {}", analyze); - log.debug("mapreduce : {}", useMapreduce); - } - - dataPath = new Path(dataName); - datasetPath = new Path(datasetName); - modelPath = new Path(modelName); - if (outputName != null) { - outputPath = new Path(outputName); - } - } catch (OptionException e) { - log.warn(e.toString(), e); - CommandLineUtil.printHelp(group); - return -1; - } - - testForest(); - - return 0; - } - - private void testForest() throws IOException, ClassNotFoundException, InterruptedException { - - // make sure the output file does not exist - if (outputPath != null) { - outFS = outputPath.getFileSystem(getConf()); - if (outFS.exists(outputPath)) { - throw new IllegalArgumentException("Output path already exists"); - } - } - - // make sure the decision forest exists - FileSystem mfs = modelPath.getFileSystem(getConf()); - if (!mfs.exists(modelPath)) { - throw new IllegalArgumentException("The forest path does not exist"); - } - - // make sure the test data exists - dataFS = dataPath.getFileSystem(getConf()); - if (!dataFS.exists(dataPath)) { - throw new IllegalArgumentException("The Test data path does not exist"); - } - - if (useMapreduce) { - mapreduce(); - } else { - sequential(); - } - - } - - private void mapreduce() throws ClassNotFoundException, IOException, InterruptedException { - if (outputPath == null) { - throw new IllegalArgumentException("You must specify the ouputPath when using the mapreduce implementation"); - } - - Classifier classifier = new Classifier(modelPath, dataPath, datasetPath, outputPath, getConf()); - - classifier.run(); - - if (analyze) { - double[][] results = classifier.getResults(); - if (results != null) { - Dataset dataset = Dataset.load(getConf(), datasetPath); - if (dataset.isNumerical(dataset.getLabelId())) { - RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); - regressionAnalyzer.setInstances(results); - log.info("{}", regressionAnalyzer); - } else { - ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); - for (double[] res : results) { - analyzer.addInstance(dataset.getLabelString(res[0]), - new ClassifierResult(dataset.getLabelString(res[1]), 1.0)); - } - log.info("{}", analyzer); - } - } - } - } - - private void sequential() throws IOException { - - log.info("Loading the forest..."); - DecisionForest forest = DecisionForest.load(getConf(), modelPath); - - if (forest == null) { - log.error("No Decision Forest found!"); - return; - } - - // load the dataset - Dataset dataset = Dataset.load(getConf(), datasetPath); - DataConverter converter = new DataConverter(dataset); - - log.info("Sequential classification..."); - long time = System.currentTimeMillis(); - - Random rng = RandomUtils.getRandom(); - - List<double[]> resList = new ArrayList<>(); - if (dataFS.getFileStatus(dataPath).isDir()) { - //the input is a directory of files - testDirectory(outputPath, converter, forest, dataset, resList, rng); - } else { - // the input is one single file - testFile(dataPath, outputPath, converter, forest, dataset, resList, rng); - } - - time = System.currentTimeMillis() - time; - log.info("Classification Time: {}", DFUtils.elapsedTime(time)); - - if (analyze) { - if (dataset.isNumerical(dataset.getLabelId())) { - RegressionResultAnalyzer regressionAnalyzer = new RegressionResultAnalyzer(); - double[][] results = new double[resList.size()][2]; - regressionAnalyzer.setInstances(resList.toArray(results)); - log.info("{}", regressionAnalyzer); - } else { - ResultAnalyzer analyzer = new ResultAnalyzer(Arrays.asList(dataset.labels()), "unknown"); - for (double[] r : resList) { - analyzer.addInstance(dataset.getLabelString(r[0]), - new ClassifierResult(dataset.getLabelString(r[1]), 1.0)); - } - log.info("{}", analyzer); - } - } - } - - private void testDirectory(Path outPath, - DataConverter converter, - DecisionForest forest, - Dataset dataset, - Collection<double[]> results, - Random rng) throws IOException { - Path[] infiles = DFUtils.listOutputFiles(dataFS, dataPath); - - for (Path path : infiles) { - log.info("Classifying : {}", path); - Path outfile = outPath != null ? new Path(outPath, path.getName()).suffix(".out") : null; - testFile(path, outfile, converter, forest, dataset, results, rng); - } - } - - private void testFile(Path inPath, - Path outPath, - DataConverter converter, - DecisionForest forest, - Dataset dataset, - Collection<double[]> results, - Random rng) throws IOException { - // create the predictions file - FSDataOutputStream ofile = null; - - if (outPath != null) { - ofile = outFS.create(outPath); - } - - try (FSDataInputStream input = dataFS.open(inPath)){ - Scanner scanner = new Scanner(input, "UTF-8"); - - while (scanner.hasNextLine()) { - String line = scanner.nextLine(); - if (!line.isEmpty()) { - - Instance instance = converter.convert(line); - double prediction = forest.classify(dataset, rng, instance); - - if (ofile != null) { - ofile.writeChars(Double.toString(prediction)); // write the prediction - ofile.writeChar('\n'); - } - - results.add(new double[]{dataset.getLabel(instance), prediction}); - } - } - - scanner.close(); - } - } - - public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new TestForest(), args); - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java b/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java index 0ec5b55..f79a429 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/Bagging.java @@ -29,6 +29,7 @@ import java.util.Random; /** * Builds a tree using bagging */ +@Deprecated public class Bagging { private static final Logger log = LoggerFactory.getLogger(Bagging.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java index 86f99b6..c94292c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/DFUtils.java @@ -37,6 +37,7 @@ import org.apache.mahout.common.iterator.sequencefile.PathFilters; /** * Utility class that contains various helper methods */ +@Deprecated public final class DFUtils { private DFUtils() { http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java index bb4153e..c11cf34 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/DecisionForest.java @@ -39,6 +39,7 @@ import java.util.Random; /** * Represents a forest of decision trees. */ +@Deprecated public class DecisionForest implements Writable { private final List<Node> trees; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java b/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java index 2a7facc..13cd386 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/ErrorEstimate.java @@ -22,6 +22,7 @@ import com.google.common.base.Preconditions; /** * Various methods to compute from the output of a random forest */ +@Deprecated public final class ErrorEstimate { private ErrorEstimate() { http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java index 8a7d945..9f84e9c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilder.java @@ -41,6 +41,7 @@ import java.util.Random; * A classification tree is built when the criterion variable is the categorical attribute.<br> * A regression tree is built when the criterion variable is the numerical attribute. */ +@Deprecated public class DecisionTreeBuilder implements TreeBuilder { private static final Logger log = LoggerFactory.getLogger(DecisionTreeBuilder.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java index f03698d..3392fb1 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilder.java @@ -41,6 +41,7 @@ import java.util.Random; * <br><br> * This class can be used when the criterion variable is the categorical attribute. */ +@Deprecated public class DefaultTreeBuilder implements TreeBuilder { private static final Logger log = LoggerFactory.getLogger(DefaultTreeBuilder.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java index 3d4c6d6..bf686a4 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/builder/TreeBuilder.java @@ -25,6 +25,7 @@ import java.util.Random; /** * Abstract base class for TreeBuilders */ +@Deprecated public interface TreeBuilder { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java index c68ce52..77e5ed5 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Data.java @@ -30,6 +30,7 @@ import java.util.Random; * vectors (subset, count,...) * */ +@Deprecated public class Data implements Cloneable { private final List<Instance> instances; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java index 318c0d0..f1bdc95 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataConverter.java @@ -26,6 +26,7 @@ import java.util.regex.Pattern; /** * Converts String to Instance using a Dataset */ +@Deprecated public class DataConverter { private static final Pattern COMMA_SPACE = Pattern.compile("[, ]"); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java index c8d9dcd..c62dcac 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java @@ -45,6 +45,7 @@ import java.util.regex.Pattern; * adds an IGNORED first attribute that will contain a unique id for each instance, which is the line number * of the instance in the input data */ +@Deprecated public final class DataLoader { private static final Logger log = LoggerFactory.getLogger(DataLoader.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java index 3eb126c..0889370 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DataUtils.java @@ -26,6 +26,7 @@ import java.util.Random; /** * Helper methods that deals with data lists and arrays of values */ +@Deprecated public final class DataUtils { private DataUtils() { } http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java index 413389f..a392669 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java @@ -39,6 +39,7 @@ import java.util.Map; /** * Contains information about the attributes. */ +@Deprecated public class Dataset { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java index f4419f0..e7a10ff 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorException.java @@ -20,6 +20,7 @@ package org.apache.mahout.classifier.df.data; /** * Exception thrown when parsing a descriptor */ +@Deprecated public class DescriptorException extends Exception { public DescriptorException(String msg) { super(msg); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java index f2e0ce4..aadedbd 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/DescriptorUtils.java @@ -27,6 +27,7 @@ import java.util.Locale; /** * Contains various methods that deal with descriptor strings */ +@Deprecated public final class DescriptorUtils { private static final Splitter SPACE = Splitter.on(' ').omitEmptyStrings(); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java index 3abf124..6a23cb8 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/Instance.java @@ -22,6 +22,7 @@ import org.apache.mahout.math.Vector; /** * Represents one data instance. */ +@Deprecated public class Instance { /** attributes, except LABEL and IGNORED */ http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java index b199834..c16ca3f 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Condition.java @@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance; /** * Condition on Instance */ +@Deprecated public abstract class Condition { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java index 73f4ef6..c51082b 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Equals.java @@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance; /** * True if a given attribute has a given value */ +@Deprecated public class Equals extends Condition { private final int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java index 2db3f2e..3e3d1a4 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/GreaterOrEquals.java @@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance; /** * True if a given attribute has a value "greater or equal" than a given value */ +@Deprecated public class GreaterOrEquals extends Condition { private final int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java index 4e49eb7..577cb24 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/data/conditions/Lesser.java @@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Instance; /** * True if a given attribute has a value "lesser" than a given value */ +@Deprecated public class Lesser extends Condition { private final int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java index da2448f..32d7b5c 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Builder.java @@ -47,6 +47,7 @@ import java.util.Comparator; * </ul> * */ +@Deprecated public abstract class Builder { private static final Logger log = LoggerFactory.getLogger(Builder.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java index bdbaf2b..1a35cfe 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/Classifier.java @@ -54,6 +54,7 @@ import java.util.Random; /** * Mapreduce implementation that classifies the Input data using a previousely built decision forest */ +@Deprecated public class Classifier { private static final Logger log = LoggerFactory.getLogger(Classifier.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java index cfd93cd..4d0f3f1 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredMapper.java @@ -28,6 +28,7 @@ import java.io.IOException; /** * Base class for Mapred mappers. Loads common parameters from the job */ +@Deprecated public class MapredMapper<KEYIN,VALUEIN,KEYOUT,VALUEOUT> extends Mapper<KEYIN,VALUEIN,KEYOUT,VALUEOUT> { private boolean noOutput; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java index b177ce5..56cabb2 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/MapredOutput.java @@ -30,6 +30,7 @@ import java.util.Arrays; * Used by various implementation to return the results of a build.<br> * Contains a grown tree and and its oob predictions. */ +@Deprecated public class MapredOutput implements Writable, Cloneable { private Node tree; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java index 4c33e73..86d4404 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemBuilder.java @@ -44,6 +44,7 @@ import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable; * MapReduce implementation where each mapper loads a full copy of the data in-memory. The forest trees are * splitted across all the mappers */ +@Deprecated public class InMemBuilder extends Builder { public InMemBuilder(TreeBuilder treeBuilder, Path dataPath, Path datasetPath, Long seed, Configuration conf) { http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java index 51e5a3e..c3b2fa3 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java @@ -45,6 +45,7 @@ import org.slf4j.LoggerFactory; * each input split contains a subset of the trees.<br> * The number of splits is equal to the number of requested splits */ +@Deprecated public class InMemInputFormat extends InputFormat<IntWritable,NullWritable> { private static final Logger log = LoggerFactory.getLogger(InMemInputSplit.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java index 9e7e176..2fc67ba 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemMapper.java @@ -42,6 +42,7 @@ import java.util.Random; * In-memory mapper that grows the trees using a full copy of the data loaded in-memory. The number of trees * to grow is determined by the current InMemInputSplit. */ +@Deprecated public class InMemMapper extends MapredMapper<IntWritable,NullWritable,IntWritable,MapredOutput> { private static final Logger log = LoggerFactory.getLogger(InMemMapper.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java index 1c9a13b..9236af3 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilder.java @@ -45,6 +45,7 @@ import java.util.List; /** * Builds a random forest using partial data. Each mapper uses only the data given by its InputSplit */ +@Deprecated public class PartialBuilder extends Builder { private static final Logger log = LoggerFactory.getLogger(PartialBuilder.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java index 648472c..9474236 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1Mapper.java @@ -42,6 +42,7 @@ import java.util.Random; * First step of the Partial Data Builder. Builds the trees using the data available in the InputSplit. * Predict the oob classes for each tree in its growing partition (input split). */ +@Deprecated public class Step1Mapper extends MapredMapper<LongWritable,Text,TreeID,MapredOutput> { private static final Logger log = LoggerFactory.getLogger(Step1Mapper.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java index d0ed5df..c296061 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/mapreduce/partial/TreeID.java @@ -23,6 +23,7 @@ import org.apache.hadoop.io.LongWritable; /** * Indicates both the tree and the data partition used to grow the tree */ +@Deprecated public class TreeID extends LongWritable implements Cloneable { public static final int MAX_TREEID = 100000; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java index 3484866..1f91842 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/CategoricalNode.java @@ -25,7 +25,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; - +@Deprecated public class CategoricalNode extends Node { private int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java index 285a134..3360bb5 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/Leaf.java @@ -26,6 +26,7 @@ import java.io.IOException; /** * Represents a Leaf node */ +@Deprecated public class Leaf extends Node { private static final double EPSILON = 1.0e-6; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java index cb6deb2..73d516d 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/Node.java @@ -27,6 +27,7 @@ import java.io.IOException; /** * Represents an abstract node of a decision tree */ +@Deprecated public abstract class Node implements Writable { protected enum Type { http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java b/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java index 19b3e57..aa02089 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/node/NumericalNode.java @@ -26,6 +26,7 @@ import java.io.IOException; /** * Represents a node that splits using a numerical attribute */ +@Deprecated public class NumericalNode extends Node { /** numerical attribute to split for */ private int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java index d7f023b..7ef907e 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/ref/SequentialBuilder.java @@ -32,6 +32,7 @@ import java.util.Random; /** * Builds a Random Decision Forest using a given TreeBuilder to grow the trees */ +@Deprecated public class SequentialBuilder { private static final Logger log = LoggerFactory.getLogger(SequentialBuilder.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java index 38d3007..3f1cfdf 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/DefaultIgSplit.java @@ -25,6 +25,7 @@ import java.util.Arrays; /** * Default, not optimized, implementation of IgSplit */ +@Deprecated public class DefaultIgSplit extends IgSplit { /** used by entropy() */ http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java index da37cf3..aff94e1 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/IgSplit.java @@ -22,6 +22,7 @@ import org.apache.mahout.classifier.df.data.Data; /** * Computes the best split using the Information Gain measure */ +@Deprecated public abstract class IgSplit { static final double LOG2 = Math.log(2.0); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java index 7b15d2a..e3ab95f 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/OptIgSplit.java @@ -50,6 +50,7 @@ import java.util.TreeSet; * pw.close() * } */ +@Deprecated public class OptIgSplit extends IgSplit { private static final int MAX_NUMERIC_SPLITS = 16; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java index 2974bcb..38695a3 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java @@ -29,6 +29,7 @@ import java.util.Comparator; * Regression problem implementation of IgSplit. This class can be used when the criterion variable is the numerical * attribute. */ +@Deprecated public class RegressionSplit extends IgSplit { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java b/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java index bf079de..2a6a322 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/split/Split.java @@ -22,6 +22,7 @@ import java.util.Locale; /** * Contains enough information to identify each split */ +@Deprecated public final class Split { private final int attr; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java index 3b9d4ee..b421c4e 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java @@ -42,6 +42,7 @@ import org.slf4j.LoggerFactory; /** * This tool is to visualize the Decision Forest */ +@Deprecated public final class ForestVisualizer { private static final Logger log = LoggerFactory.getLogger(ForestVisualizer.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java index 4586540..c37af4e 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java @@ -42,6 +42,7 @@ import java.util.Arrays; * Compute the frequency distribution of the "class label"<br> * This class can be used when the criterion variable is the categorical attribute. */ +@Deprecated public final class Frequencies extends Configured implements Tool { private static final Logger log = LoggerFactory.getLogger(Frequencies.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java index d02d974..9d7e2ff 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/FrequenciesJob.java @@ -53,6 +53,7 @@ import java.util.Arrays; * Temporary class used to compute the frequency distribution of the "class attribute".<br> * This class can be used when the criterion variable is the categorical attribute. */ +@Deprecated public class FrequenciesJob { private static final Logger log = LoggerFactory.getLogger(FrequenciesJob.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java index d82b383..07928ab 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java @@ -34,6 +34,7 @@ import org.apache.mahout.classifier.df.node.NumericalNode; /** * This tool is to visualize the Decision tree */ +@Deprecated public final class TreeVisualizer { private TreeVisualizer() {} http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java b/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java index 06876e1..e1b55ab 100644 --- a/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java +++ b/mr/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java @@ -52,6 +52,7 @@ import org.slf4j.LoggerFactory; * partitions.<br> * This class can be used when the criterion variable is the categorical attribute. */ +@Deprecated public final class UDistrib { private static final Logger log = LoggerFactory.getLogger(UDistrib.class); http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java index f1ec07f..036d473 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java @@ -32,7 +32,7 @@ import org.apache.mahout.common.RandomUtils; import org.junit.Test; import com.google.common.collect.Lists; - +@Deprecated public final class DecisionForestTest extends MahoutTestCase { private static final String[] TRAIN_DATA = {"sunny,85,85,FALSE,no", http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java index 85244c8..56b4787 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DecisionTreeBuilderTest.java @@ -25,7 +25,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.junit.Test; - +@Deprecated public final class DecisionTreeBuilderTest extends MahoutTestCase { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java index 78fe65f..87fd44b 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/DefaultTreeBuilderTest.java @@ -24,7 +24,7 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.junit.Test; - +@Deprecated public final class DefaultTreeBuilderTest extends MahoutTestCase { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java index 16e7499..8ebc721 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/builder/InfiniteRecursionTest.java @@ -26,7 +26,7 @@ import org.apache.mahout.classifier.df.data.Utils; import org.junit.Test; import java.util.Random; - +@Deprecated public final class InfiniteRecursionTest extends MahoutTestCase { private static final double[][] dData = { http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java index 39858cf..dfae61d 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataConverterTest.java @@ -22,7 +22,7 @@ import java.util.Random; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.junit.Test; - +@Deprecated public final class DataConverterTest extends MahoutTestCase { private static final int ATTRIBUTE_COUNT = 10; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java index dce23db..aeb69fc 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataLoaderTest.java @@ -28,7 +28,7 @@ import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.classifier.df.data.Dataset.Attribute; import org.junit.Test; - +@Deprecated public final class DataLoaderTest extends MahoutTestCase { private Random rng; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java index 86e4461..70ed7f6 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DataTest.java @@ -24,7 +24,7 @@ import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.classifier.df.data.conditions.Condition; import org.junit.Test; - +@Deprecated public class DataTest extends MahoutTestCase { private static final int ATTRIBUTE_COUNT = 10; http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java index 3cdf65a..e5c9ee7 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DatasetTest.java @@ -18,7 +18,7 @@ package org.apache.mahout.classifier.df.data; import org.apache.mahout.common.MahoutTestCase; import org.junit.Test; - +@Deprecated public final class DatasetTest extends MahoutTestCase { @Test http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java index 121e1f8..619f067 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/DescriptorUtilsTest.java @@ -23,7 +23,7 @@ import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.classifier.df.data.Dataset.Attribute; import org.junit.Test; - +@Deprecated public final class DescriptorUtilsTest extends MahoutTestCase { /** http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java index 1cf8b6a..9b51ec9 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java @@ -36,6 +36,7 @@ import org.apache.mahout.common.MahoutTestCase; * Helper methods used by the tests * */ +@Deprecated public final class Utils { private Utils() {} http://git-wip-us.apache.org/repos/asf/mahout/blob/1ffa3a46/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java ---------------------------------------------------------------------- diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java index 0a4a034..6a17aa2 100644 --- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java +++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormatTest.java @@ -28,7 +28,7 @@ import org.apache.mahout.classifier.df.mapreduce.Builder; import org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.InMemInputSplit; import org.apache.mahout.classifier.df.mapreduce.inmem.InMemInputFormat.InMemRecordReader; import org.junit.Test; - +@Deprecated public final class InMemInputFormatTest extends MahoutTestCase { @Test
