Author: gsingers
Date: Fri Nov 4 21:56:39 2011
New Revision: 1197803
URL: http://svn.apache.org/viewvc?rev=1197803&view=rev
Log:
MAHOUT-344: added minhash to build-asf-email.sh and to driver.classes.props
Modified:
mahout/trunk/examples/bin/build-asf-email.sh
mahout/trunk/src/conf/driver.classes.props
Modified: mahout/trunk/examples/bin/build-asf-email.sh
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-asf-email.sh?rev=1197803&r1=1197802&r2=1197803&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-asf-email.sh (original)
+++ mahout/trunk/examples/bin/build-asf-email.sh Fri Nov 4 21:56:39 2011
@@ -61,11 +61,12 @@ if [ "x$alg" == "xrecommender" ]; then
elif [ "x$alg" == "xclustering" ]; then
MAIL_OUT="$OUT/clustering/seq-files"
SEQ2SP="$OUT/clustering/seq2sparse"
- algorithm=( kmeans dirichlet )
+ algorithm=( kmeans dirichlet minhash )
echo "Please select a number to choose the corresponding algorithm to run"
echo "1. ${algorithm[0]}"
echo "2. ${algorithm[1]}"
+ echo "3. ${algorithm[2]}"
read -p "Enter your choice : " choice
echo "ok. You chose $choice and we'll use ${algorithm[$choice-1]}"
@@ -89,6 +90,10 @@ elif [ "x$alg" == "xclustering" ]; then
CLUST_OUT="$OUT/clustering/dirichlet"
echo "Running Dirichlet"
$MAHOUT dirichlet --input "$SEQ2SP/tfidf-vectors" --output $CLUST_OUT -k
50 --maxIter 20 --distanceMeasure
org.apache.mahout.common.distance.CosineDistanceMeasure --method mapreduce
+ elif [ "x$nbalg" == "xminhash" ]; then
+ CLUST_OUT="$OUT/clustering/minhash"
+ echo "Running Minhash"
+ $MAHOUT minhash --input "$SEQ2SP/tfidf-vectors" --output $CLUST_OUT
fi
#classification
Modified: mahout/trunk/src/conf/driver.classes.props
URL:
http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.props?rev=1197803&r1=1197802&r2=1197803&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.props (original)
+++ mahout/trunk/src/conf/driver.classes.props Fri Nov 4 21:56:39 2011
@@ -5,6 +5,7 @@ org.apache.mahout.cf.taste.hadoop.als.Da
org.apache.mahout.cf.taste.hadoop.als.FactorizationEvaluator =
evaluateFactorization : compute RMSE and MAE of a rating matrix factorization
against probes
org.apache.mahout.clustering.kmeans.KMeansDriver = kmeans : K-means clustering
org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver = fkmeans : Fuzzy
K-means clustering
+org.apache.mahout.clustering.minhash.MinHashDriver = minhash : Run Minhash
clustering
org.apache.mahout.clustering.lda.LDADriver = lda : Latent Dirchlet Allocation
org.apache.mahout.clustering.lda.LDAPrintTopics = ldatopics : LDA Print Topics
org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
@@ -48,4 +49,4 @@ org.apache.mahout.utils.SplitInput = spl
org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb
: Train the Vector-based Bayes classifier
org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb :
Test the Vector-based Bayes classifier
org.apache.mahout.classifier.ConfusionMatrixDumper = cmdump : Dump confusion
matrix in HTML or text formats
-org.apache.mahout.utils.MatrixDumper = matrixdump : Dump matrix in CSV format
\ No newline at end of file
+org.apache.mahout.utils.MatrixDumper = matrixdump : Dump matrix in CSV format