Author: drew
Date: Mon May 31 02:04:01 2010
New Revision: 949649

URL: http://svn.apache.org/viewvc?rev=949649&view=rev
Log:
MAHOUT-398: eliminated separate tfidf directory for tfidf vector output.

Modified:
    mahout/trunk/examples/bin/build-reuters.sh
    
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java

Modified: mahout/trunk/examples/bin/build-reuters.sh
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/bin/build-reuters.sh?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
--- mahout/trunk/examples/bin/build-reuters.sh (original)
+++ mahout/trunk/examples/bin/build-reuters.sh Mon May 31 02:04:01 2010
@@ -41,9 +41,9 @@ cd ../..
 ./bin/mahout seqdirectory -i ./examples/bin/work/reuters-out/ -o 
./examples/bin/work/reuters-out-seqdir -c UTF-8 -chunk 5
 
 # to use k-Means clustering, uncomment the next three lines
-#./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o 
./examples/bin/work/reuters-out-seqdir-sparse
-#./bin/mahout kmeans -i 
./examples/bin/work/reuters-out-seqdir-sparse/tfidf/tfidf-vectors/ -c 
./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20 
-ow
-#./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d 
examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile 
-b 100 -n 20
+./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o 
./examples/bin/work/reuters-out-seqdir-sparse
+./bin/mahout kmeans -i 
./examples/bin/work/reuters-out-seqdir-sparse/tfidf-vectors/ -c 
./examples/bin/work/clusters -o ./examples/bin/work/reuters-kmeans -x 10 -k 20 
-ow
+./bin/mahout clusterdump -s examples/bin/work/reuters-kmeans/clusters-10 -d 
examples/bin/work/reuters-out-seqdir-sparse/dictionary.file-0 -dt sequencefile 
-b 100 -n 20
 
 # to use LDA clustering, uncomment the next three lines
 #./bin/mahout seq2sparse -i ./examples/bin/work/reuters-out-seqdir/ -o 
./examples/bin/work/reuters-out-seqdir-sparse -wt tf -seq -nr 3

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java?rev=949649&r1=949648&r2=949649&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/SparseVectorsFromSequenceFiles.java
 Mon May 31 02:04:01 2010
@@ -223,7 +223,7 @@ public final class SparseVectorsFromSequ
       if (processIdf) {
         TFIDFConverter.processTfIdf(
           new Path(outputDir, 
DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
-          new Path(outputDir, TFIDFConverter.TFIDF_OUTPUT_FOLDER), chunkSize, 
minDf, maxDFPercent, norm,
+          outputDir, chunkSize, minDf, maxDFPercent, norm,
           sequentialAccessOutput, reduceTasks);
       }
     } catch (OptionException e) {


Reply via email to