Author: robinanil
Date: Mon Jun  4 01:39:29 2012
New Revision: 1345814

URL: http://svn.apache.org/viewvc?rev=1345814&view=rev
Log:
MAHOUT-1006 Final changes, fixes some flag issues and adds an option in example 
script to run classifier in cnaivebayes and naivebayes mode

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
    mahout/trunk/examples/bin/classify-20newsgroups.sh

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1345814&r1=1345813&r2=1345814&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
 Mon Jun  4 01:39:29 2012
@@ -72,7 +72,7 @@ public class TestNaiveBayesDriver extend
     addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
     addOption("model", "m", "The path to the model built during training", 
true);
     addOption(buildOption("testComplementary", "c", "test complementary?", 
false, false, String.valueOf(false)));
-    addOption(buildOption("runSequential", "seq", "run sequential?", true, 
false, String.valueOf(false)));
+    addOption(buildOption("runSequential", "seq", "run sequential?", false, 
false, String.valueOf(false)));
     addOption("labelIndex", "l", "The path to the location of the label 
index", true);
     Map<String, List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
@@ -82,8 +82,8 @@ public class TestNaiveBayesDriver extend
       HadoopUtil.delete(getConf(), getOutputPath());
     }
     
-    boolean complementary = parsedArgs.containsKey("testComplementary");
-    boolean sequential = Boolean.parseBoolean(getOption("runSequential"));
+    boolean complementary = hasOption("testComplementary");
+    boolean sequential = hasOption("runSequential");
     if (sequential) {
       FileSystem fs = FileSystem.get(getConf());
       NaiveBayesModel model = NaiveBayesModel.materialize(new 
Path(getOption("model")), getConf());

Modified: mahout/trunk/examples/bin/classify-20newsgroups.sh
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/classify-20newsgroups.sh?rev=1345814&r1=1345813&r2=1345814&view=diff
==============================================================================
--- mahout/trunk/examples/bin/classify-20newsgroups.sh (original)
+++ mahout/trunk/examples/bin/classify-20newsgroups.sh Mon Jun  4 01:39:29 2012
@@ -34,14 +34,15 @@ fi
 START_PATH=`pwd`
 
 WORK_DIR=/tmp/mahout-work-${USER}
-algorithm=( naivebayes sgd clean)
+algorithm=( cnaivebayes naivebayes sgd clean)
 if [ -n "$1" ]; then
   choice=$1
 else
   echo "Please select a number to choose the corresponding task to run"
   echo "1. ${algorithm[0]}"
   echo "2. ${algorithm[1]}"
-  echo "3. ${algorithm[2]} -- cleans up the work area in $WORK_DIR"
+  echo "2. ${algorithm[2]}"
+  echo "3. ${algorithm[3]} -- cleans up the work area in $WORK_DIR"
   read -p "Enter your choice : " choice
 fi
 
@@ -68,9 +69,15 @@ cd ../..
 
 set -e
 
-if [ "x$alg" == "xnaivebayes" ]; then
+if [ "x$alg" == "xnaivebayes"  -o  "x$alg" == "xcnaivebayes" ]; then
+  c=""
+  
+  if [ "x$alg" == "xcnaivebayes" ]; then
+    c=" -c"
+  fi
+  
   set -x
-  echo "Preparing Training Data"
+  echo "Preparing 20newsgroups data"
   rm -rf ${WORK_DIR}/20news-all
   mkdir ${WORK_DIR}/20news-all
   cp -R ${WORK_DIR}/20news-bydate/*/* ${WORK_DIR}/20news-all
@@ -85,7 +92,7 @@ if [ "x$alg" == "xnaivebayes" ]; then
     -i ${WORK_DIR}/20news-seq \
     -o ${WORK_DIR}/20news-vectors  -lnorm -nv  -wt tfidf
 
-  echo "Creating training and holdout set with a random 20% split of whole 
dataset"
+  echo "Creating training and holdout set with a random 80-20 split of the 
generated vector dataset"
   ./bin/mahout split \
     -i ${WORK_DIR}/20news-vectors/tfidf-vectors \
     --trainingOutput ${WORK_DIR}/20news-train-vectors \
@@ -97,7 +104,7 @@ if [ "x$alg" == "xnaivebayes" ]; then
     -i ${WORK_DIR}/20news-train-vectors -el \
     -o ${WORK_DIR}/model \
     -li ${WORK_DIR}/labelindex \
-    -ow -c
+    -ow $c
   
   echo "Self testing on training set"
 
@@ -105,7 +112,7 @@ if [ "x$alg" == "xnaivebayes" ]; then
     -i ${WORK_DIR}/20news-train-vectors\
     -m ${WORK_DIR}/model \
     -l ${WORK_DIR}/labelindex \
-    -ow -o ${WORK_DIR}/20news-testing 
+    -ow -o ${WORK_DIR}/20news-testing $c 
 
   echo "Testing on holdout set"
 
@@ -113,7 +120,7 @@ if [ "x$alg" == "xnaivebayes" ]; then
     -i ${WORK_DIR}/20news-test-vectors\
     -m ${WORK_DIR}/model \
     -l ${WORK_DIR}/labelindex \
-    -ow -o ${WORK_DIR}/20news-testing 
+    -ow -o ${WORK_DIR}/20news-testing $c
 
 elif [ "x$alg" == "xsgd" ]; then
   if [ ! -e "/tmp/news-group.model" ]; then


Reply via email to