Author: tommaso
Date: Fri Jan 8 09:51:16 2016
New Revision: 1723671
URL: http://svn.apache.org/viewvc?rev=1723671&view=rev
Log:
OPENNLP-777 - NBModel always smoothed, removed DoccatNB as NB's to be enabled
via settings
Removed:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerNB.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
Fri Jan 8 09:51:16 2016
@@ -32,14 +32,11 @@ import opennlp.tools.ml.model.IndexHashT
/**
* Class implementing the multinomial Naive Bayes classifier model.
- *
- *
*/
public class NaiveBayesModel extends AbstractModel {
protected double[] outcomeTotals;
protected long vocabulary;
- private static boolean isSmoothed = true; // Turn this off only for
testing/validation
public NaiveBayesModel(Context[] params, String[] predLabels,
IndexHashTable<String> pmap, String[] outcomeNames) {
super(params, predLabels, pmap, outcomeNames);
@@ -126,7 +123,7 @@ public class NaiveBayesModel extends Abs
int oid = activeOutcomes[ai];
double numerator = oid == i ? activeParameters[ai++] * value : 0;
double denominator = outcomeTotals[i];
- probabilities.addIn(i, getProbability(numerator, denominator,
vocabulary), 1);
+ probabilities.addIn(i, getProbability(numerator, denominator,
vocabulary, true), 1);
}
}
}
@@ -145,7 +142,7 @@ public class NaiveBayesModel extends Abs
return prior;
}
- private static double getProbability(double numerator, double denominator,
double vocabulary) {
+ private static double getProbability(double numerator, double denominator,
double vocabulary, boolean isSmoothed) {
if (isSmoothed)
return getSmoothedProbability(numerator, denominator, vocabulary);
else if (denominator == 0 || denominator < Double.MIN_VALUE)
@@ -154,14 +151,6 @@ public class NaiveBayesModel extends Abs
return 1.0 * (numerator) / (denominator);
}
- static void setSmoothed(boolean flag) {
- isSmoothed = flag;
- }
-
- static boolean isSmoothed() {
- return isSmoothed;
- }
-
private static double getSmoothedProbability(double numerator, double
denominator, double vocabulary) {
final double delta = 0.05; // Lidstone smoothing
final double featureVocabularySize = vocabulary;
@@ -186,4 +175,4 @@ public class NaiveBayesModel extends Abs
System.out.println();
}
}
-}
+}
\ No newline at end of file
Modified:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
(original)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
Fri Jan 8 09:51:16 2016
@@ -22,6 +22,8 @@ import java.io.IOException;
import java.util.Set;
import java.util.SortedMap;
+import opennlp.tools.ml.AbstractTrainer;
+import opennlp.tools.ml.naivebayes.NaiveBayesTrainer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
@@ -43,11 +45,12 @@ public class DocumentCategorizerNBTest {
TrainingParameters params = new TrainingParameters();
params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+ params.put(AbstractTrainer.ALGORITHM_PARAM,
NaiveBayesTrainer.NAIVE_BAYES_VALUE);
- DoccatModel model = DocumentCategorizerNB.train("x-unspecified", samples,
+ DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
params, new BagOfWordsFeatureGenerator());
- DocumentCategorizer doccat = new DocumentCategorizerNB(model);
+ DocumentCategorizer doccat = new DocumentCategorizerME(model);
double aProbs[] = doccat.categorize("a");
assertEquals("1", doccat.getBestCategory(aProbs));
Modified:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
(original)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
Fri Jan 8 09:51:16 2016
@@ -26,11 +26,10 @@ import opennlp.tools.ml.model.MaxentMode
import opennlp.tools.ml.model.TwoPassDataIndexer;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ObjectStreamUtils;
+import org.junit.Test;
import static org.junit.Assert.assertEquals;
-import org.junit.Test;
-
/**
* Test for naive bayes classification correctness without smoothing
*/
@@ -39,72 +38,59 @@ public class NaiveBayesCorrectnessTest {
@Test
public void testNaiveBayes1() throws IOException {
- NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run
with smoothing, taken out here for mathematical verification
-
NaiveBayesModel model =
- (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
+ (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
String label = "politics";
- String[] context = { "bow=united", "bow=nations" };
+ String[] context = {"bow=united", "bow=nations"};
Event event = new Event(label, context);
- testModel(model, event, 1.0);
-
- NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid
interfering with other tests
+ // testModel(model, event, 1.0); // Expected value without smoothing
+ testModel(model, event, 0.9681650180264167); // Expected value with
smoothing
}
@Test
public void testNaiveBayes2() throws IOException {
- NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run
with smoothing, taken out here for mathematical verification
-
NaiveBayesModel model =
- (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
+ (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
String label = "sports";
- String[] context = { "bow=manchester", "bow=united" };
+ String[] context = {"bow=manchester", "bow=united"};
Event event = new Event(label, context);
- testModel(model, event, 1.0);
-
- NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid
interfering with other tests
+ // testModel(model, event, 1.0); // Expected value without smoothing
+ testModel(model, event, 0.9658833555831029); // Expected value with
smoothing
}
@Test
public void testNaiveBayes3() throws IOException {
- NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run
with smoothing, but I am taking it out here just for mathematical verification
-
NaiveBayesModel model =
- (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
+ (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
String label = "politics";
- String[] context = { "bow=united" };
+ String[] context = {"bow=united"};
Event event = new Event(label, context);
- testModel(model, event, 2.0/3.0);
-
- NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid
interfering with other tests
+ //testModel(model, event, 2.0/3.0); // Expected value without smoothing
+ testModel(model, event, 0.6655036407766989); // Expected value with
smoothing
}
@Test
public void testNaiveBayes4() throws IOException {
- NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run
with smoothing, but I am taking it out here just for mathematical verification
-
NaiveBayesModel model =
- (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
+ (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new
TwoPassDataIndexer(createTrainingStream(), 1, false));
String label = "politics";
- String[] context = { };
+ String[] context = {};
Event event = new Event(label, context);
- testModel(model, event, 7.0/12.0);
-
- NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid
interfering with other tests
+ testModel(model, event, 7.0 / 12.0);
}
@@ -131,22 +117,22 @@ public class NaiveBayesCorrectnessTest {
List<Event> trainingEvents = new ArrayList<Event>();
String label1 = "politics";
- String[] context1 = { "bow=the", "bow=united", "bow=nations" };
+ String[] context1 = {"bow=the", "bow=united", "bow=nations"};
trainingEvents.add(new Event(label1, context1));
String label2 = "politics";
- String[] context2 = { "bow=the", "bow=united", "bow=states", "bow=and" };
+ String[] context2 = {"bow=the", "bow=united", "bow=states", "bow=and"};
trainingEvents.add(new Event(label2, context2));
String label3 = "sports";
- String[] context3 = { "bow=manchester", "bow=united" };
+ String[] context3 = {"bow=manchester", "bow=united"};
trainingEvents.add(new Event(label3, context3));
String label4 = "sports";
- String[] context4 = { "bow=manchester", "bow=and", "bow=barca" };
+ String[] context4 = {"bow=manchester", "bow=and", "bow=barca"};
trainingEvents.add(new Event(label4, context4));
return ObjectStreamUtils.createObjectStream(trainingEvents);
}
-}
+}
\ No newline at end of file
Modified:
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
(original)
+++
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
Fri Jan 8 09:51:16 2016
@@ -31,13 +31,11 @@ import static org.junit.Assert.assertNot
* Tests for persisting and reading naive bayes models
*/
public class NaiveBayesModelReadWriteTest {
-
@Test
public void testBinaryModelPersistence() throws Exception {
NaiveBayesModel model = (NaiveBayesModel) new
NaiveBayesTrainer().trainModel(new TwoPassDataIndexer(
NaiveBayesCorrectnessTest.createTrainingStream(), 1, false));
- Path path = Paths.get(getClass().getResource("/").getFile());
- Path tempFile = Files.createTempFile(path, "bnb-", ".bin");
+ Path tempFile = Files.createTempFile("bnb-", ".bin");
File file = tempFile.toFile();
NaiveBayesModelWriter modelWriter = new BinaryNaiveBayesModelWriter(model,
file);
modelWriter.persist();
@@ -51,8 +49,7 @@ public class NaiveBayesModelReadWriteTes
public void testTextModelPersistence() throws Exception {
NaiveBayesModel model = (NaiveBayesModel) new
NaiveBayesTrainer().trainModel(new TwoPassDataIndexer(
NaiveBayesCorrectnessTest.createTrainingStream(), 1, false));
- Path path = Paths.get(getClass().getResource("/").getFile());
- Path tempFile = Files.createTempFile(path, "ptnb-", ".txt");
+ Path tempFile = Files.createTempFile("ptnb-", ".txt");
File file = tempFile.toFile();
NaiveBayesModelWriter modelWriter = new
PlainTextNaiveBayesModelWriter(model, file);
modelWriter.persist();
@@ -61,6 +58,4 @@ public class NaiveBayesModelReadWriteTes
AbstractModel abstractModel = reader.constructModel();
assertNotNull(abstractModel);
}
-
-
}
\ No newline at end of file