Author: gsingers
Date: Fri Nov  4 21:56:39 2011
New Revision: 1197803

URL: http://svn.apache.org/viewvc?rev=1197803&view=rev
Log:
MAHOUT-344: added minhash to build-asf-email.sh and to driver.classes.props

Modified:
    mahout/trunk/examples/bin/build-asf-email.sh
    mahout/trunk/src/conf/driver.classes.props

Modified: mahout/trunk/examples/bin/build-asf-email.sh
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-asf-email.sh?rev=1197803&r1=1197802&r2=1197803&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-asf-email.sh (original)
+++ mahout/trunk/examples/bin/build-asf-email.sh Fri Nov  4 21:56:39 2011
@@ -61,11 +61,12 @@ if [ "x$alg" == "xrecommender" ]; then
 elif [ "x$alg" == "xclustering" ]; then
   MAIL_OUT="$OUT/clustering/seq-files"
   SEQ2SP="$OUT/clustering/seq2sparse"
-  algorithm=( kmeans dirichlet )
+  algorithm=( kmeans dirichlet minhash )
 
   echo "Please select a number to choose the corresponding algorithm to run"
   echo "1. ${algorithm[0]}"
   echo "2. ${algorithm[1]}"
+  echo "3. ${algorithm[2]}"
   read -p "Enter your choice : " choice
 
   echo "ok. You chose $choice and we'll use ${algorithm[$choice-1]}"
@@ -89,6 +90,10 @@ elif [ "x$alg" == "xclustering" ]; then
     CLUST_OUT="$OUT/clustering/dirichlet"
     echo "Running Dirichlet"
     $MAHOUT dirichlet --input "$SEQ2SP/tfidf-vectors" --output $CLUST_OUT -k 
50 --maxIter 20 --distanceMeasure 
org.apache.mahout.common.distance.CosineDistanceMeasure --method mapreduce
+  elif [ "x$nbalg" == "xminhash"  ]; then
+    CLUST_OUT="$OUT/clustering/minhash"
+    echo "Running Minhash"
+    $MAHOUT minhash --input "$SEQ2SP/tfidf-vectors" --output $CLUST_OUT
   fi
 
 #classification

Modified: mahout/trunk/src/conf/driver.classes.props
URL: 
http://svn.apache.org/viewvc/mahout/trunk/src/conf/driver.classes.props?rev=1197803&r1=1197802&r2=1197803&view=diff
==============================================================================
--- mahout/trunk/src/conf/driver.classes.props (original)
+++ mahout/trunk/src/conf/driver.classes.props Fri Nov  4 21:56:39 2011
@@ -5,6 +5,7 @@ org.apache.mahout.cf.taste.hadoop.als.Da
 org.apache.mahout.cf.taste.hadoop.als.FactorizationEvaluator = 
evaluateFactorization : compute RMSE and MAE of a rating matrix factorization 
against probes
 org.apache.mahout.clustering.kmeans.KMeansDriver = kmeans : K-means clustering
 org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver = fkmeans : Fuzzy 
K-means clustering
+org.apache.mahout.clustering.minhash.MinHashDriver = minhash : Run Minhash 
clustering
 org.apache.mahout.clustering.lda.LDADriver = lda : Latent Dirchlet Allocation
 org.apache.mahout.clustering.lda.LDAPrintTopics = ldatopics : LDA Print Topics
 org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
@@ -48,4 +49,4 @@ org.apache.mahout.utils.SplitInput = spl
 org.apache.mahout.classifier.naivebayes.training.TrainNaiveBayesJob = trainnb 
: Train the Vector-based Bayes classifier
 org.apache.mahout.classifier.naivebayes.test.TestNaiveBayesDriver = testnb : 
Test the Vector-based Bayes classifier
 org.apache.mahout.classifier.ConfusionMatrixDumper = cmdump : Dump confusion 
matrix in HTML or text formats
-org.apache.mahout.utils.MatrixDumper = matrixdump : Dump matrix in CSV format
\ No newline at end of file
+org.apache.mahout.utils.MatrixDumper = matrixdump : Dump matrix in CSV format


Reply via email to