Author: robinanil
Date: Sun May 13 18:36:21 2012
New Revision: 1337959
URL: http://svn.apache.org/viewvc?rev=1337959&view=rev
Log:
MAHOUT-1014 Moving newsgroups helper to a common location
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
- copied, changed from r1337932,
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/NewsgroupHelper.java
Removed:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/NewsgroupHelper.java
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
Copied:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
(from r1337932,
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/NewsgroupHelper.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/NewsgroupHelper.java&r1=1337932&r2=1337959&rev=1337959&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/NewsgroupHelper.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
Sun May 13 18:36:21 2012
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.classifier.sgd;
+package org.apache.mahout.classifier;
import com.google.common.base.Charsets;
import com.google.common.collect.ConcurrentHashMultiset;
@@ -45,7 +45,7 @@ import java.util.Date;
import java.util.Locale;
import java.util.Random;
-final class NewsgroupHelper {
+public final class NewsgroupHelper {
private static final SimpleDateFormat[] DATE_FORMATS = {
new SimpleDateFormat("", Locale.ENGLISH),
@@ -63,19 +63,19 @@ final class NewsgroupHelper {
private final FeatureVectorEncoder encoder = new
StaticWordValueEncoder("body");
private final FeatureVectorEncoder bias = new
ConstantValueEncoder("Intercept");
- FeatureVectorEncoder getEncoder() {
+ public FeatureVectorEncoder getEncoder() {
return encoder;
}
- FeatureVectorEncoder getBias() {
+ public FeatureVectorEncoder getBias() {
return bias;
}
- Random getRandom() {
+ public Random getRandom() {
return rand;
}
- Vector encodeFeatureVector(File file, int actual, int leakType,
Multiset<String> overallCounts)
+ public Vector encodeFeatureVector(File file, int actual, int leakType,
Multiset<String> overallCounts)
throws IOException {
long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK *
rand.nextDouble()));
Multiset<String> words = ConcurrentHashMultiset.create();
@@ -113,7 +113,7 @@ final class NewsgroupHelper {
return v;
}
- private static void countWords(Analyzer analyzer,
+ public static void countWords(Analyzer analyzer,
Collection<String> words,
Reader in,
Multiset<String> overallCounts) throws
IOException {
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java?rev=1337959&r1=1337958&r2=1337959&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SGDHelper.java
Sun May 13 18:36:21 2012
@@ -17,9 +17,14 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Multiset;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.mahout.classifier.NewsgroupHelper;
import org.apache.mahout.ep.State;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.Vector;
@@ -27,12 +32,9 @@ import org.apache.mahout.math.function.D
import org.apache.mahout.math.function.Functions;
import org.apache.mahout.vectorizer.encoders.Dictionary;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Multiset;
public final class SGDHelper {
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java?rev=1337959&r1=1337958&r2=1337959&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
Sun May 13 18:36:21 2012
@@ -17,9 +17,13 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Multiset;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.List;
+
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.Option;
@@ -29,16 +33,14 @@ import org.apache.commons.cli2.builder.G
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.mahout.classifier.ClassifierResult;
+import org.apache.mahout.classifier.NewsgroupHelper;
import org.apache.mahout.classifier.ResultAnalyzer;
import org.apache.mahout.math.Vector;
import org.apache.mahout.vectorizer.encoders.Dictionary;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.util.Arrays;
-import java.util.List;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multiset;
/**
* Run the 20 news groups test data through SGD, as trained by {@link
org.apache.mahout.classifier.sgd.TrainNewsGroups}.
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java?rev=1337959&r1=1337958&r2=1337959&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
Sun May 13 18:36:21 2012
@@ -17,20 +17,22 @@
package org.apache.mahout.classifier.sgd;
-import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Multiset;
-import com.google.common.collect.Ordering;
-import org.apache.mahout.ep.State;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.vectorizer.encoders.Dictionary;
-
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
+import org.apache.mahout.classifier.NewsgroupHelper;
+import org.apache.mahout.ep.State;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.vectorizer.encoders.Dictionary;
+
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multiset;
+import com.google.common.collect.Ordering;
+
/**
* Reads and trains an adaptive logistic regression model on the 20 newsgroups
data.
* The first command line argument gives the path of the directory holding the
training