Author: srowen
Date: Thu Mar 31 21:38:45 2011
New Revision: 1087447
URL: http://svn.apache.org/viewvc?rev=1087447&view=rev
Log:
MAHOUT-647 2 small seq2sparse bugs
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java?rev=1087447&r1=1087446&r2=1087447&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
Thu Mar 31 21:38:45 2011
@@ -253,7 +253,8 @@ public final class CollocDriver extends
Configuration conf = new Configuration(baseConf);
conf.setLong(LLRReducer.NGRAM_TOTAL, nGramTotal);
conf.setBoolean(EMIT_UNIGRAMS, emitUnigrams);
-
+ conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);
+
Job job = new Job(conf);
job.setJobName(CollocDriver.class.getSimpleName() + ".computeNGrams: " +
output);
job.setJarByClass(CollocDriver.class);
@@ -274,7 +275,6 @@ public final class CollocDriver extends
job.setReducerClass(LLRReducer.class);
job.setNumReduceTasks(reduceTasks);
- conf.setFloat(LLRReducer.MIN_LLR, minLLRValue);
job.waitForCompletion(true);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java?rev=1087447&r1=1087446&r2=1087447&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
Thu Mar 31 21:38:45 2011
@@ -78,7 +78,7 @@ public class TFIDFPartialVectorReducer e
continue;
}
long df = dictionary.get(e.index());
- if (df / vectorCount > maxDfPercent) {
+ if (df * 100.0 / vectorCount > maxDfPercent) {
continue;
}
if (df < minDf) {