Author: tommaso
Date: Fri Jan  8 09:51:16 2016
New Revision: 1723671

URL: http://svn.apache.org/viewvc?rev=1723671&view=rev
Log:
OPENNLP-777 - NBModel always smoothed, removed DoccatNB as NB's to be enabled 
via settings

Removed:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerNB.java
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
    
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/ml/naivebayes/NaiveBayesModel.java
 Fri Jan  8 09:51:16 2016
@@ -32,14 +32,11 @@ import opennlp.tools.ml.model.IndexHashT
 
 /**
  * Class implementing the multinomial Naive Bayes classifier model.
- *
- *
  */
 public class NaiveBayesModel extends AbstractModel {
 
   protected double[] outcomeTotals;
   protected long vocabulary;
-  private static boolean isSmoothed = true; // Turn this off only for 
testing/validation
 
   public NaiveBayesModel(Context[] params, String[] predLabels, 
IndexHashTable<String> pmap, String[] outcomeNames) {
     super(params, predLabels, pmap, outcomeNames);
@@ -126,7 +123,7 @@ public class NaiveBayesModel extends Abs
           int oid = activeOutcomes[ai];
           double numerator = oid == i ? activeParameters[ai++] * value : 0;
           double denominator = outcomeTotals[i];
-          probabilities.addIn(i, getProbability(numerator, denominator, 
vocabulary), 1);
+          probabilities.addIn(i, getProbability(numerator, denominator, 
vocabulary, true), 1);
         }
       }
     }
@@ -145,7 +142,7 @@ public class NaiveBayesModel extends Abs
     return prior;
   }
 
-  private static double getProbability(double numerator, double denominator, 
double vocabulary) {
+  private static double getProbability(double numerator, double denominator, 
double vocabulary, boolean isSmoothed) {
     if (isSmoothed)
       return getSmoothedProbability(numerator, denominator, vocabulary);
     else if (denominator == 0 || denominator < Double.MIN_VALUE)
@@ -154,14 +151,6 @@ public class NaiveBayesModel extends Abs
       return 1.0 * (numerator) / (denominator);
   }
 
-  static void setSmoothed(boolean flag) {
-    isSmoothed = flag;
-  }
-
-  static boolean isSmoothed() {
-    return isSmoothed;
-  }
-
   private static double getSmoothedProbability(double numerator, double 
denominator, double vocabulary) {
     final double delta = 0.05; // Lidstone smoothing
     final double featureVocabularySize = vocabulary;
@@ -186,4 +175,4 @@ public class NaiveBayesModel extends Abs
       System.out.println();
     }
   }
-}
+}
\ No newline at end of file

Modified: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/doccat/DocumentCategorizerNBTest.java
 Fri Jan  8 09:51:16 2016
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.util.Set;
 import java.util.SortedMap;
 
+import opennlp.tools.ml.AbstractTrainer;
+import opennlp.tools.ml.naivebayes.NaiveBayesTrainer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
 import opennlp.tools.util.TrainingParameters;
@@ -43,11 +45,12 @@ public class DocumentCategorizerNBTest {
     TrainingParameters params = new TrainingParameters();
     params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(100));
     params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(0));
+    params.put(AbstractTrainer.ALGORITHM_PARAM, 
NaiveBayesTrainer.NAIVE_BAYES_VALUE);
 
-    DoccatModel model = DocumentCategorizerNB.train("x-unspecified", samples,
+    DoccatModel model = DocumentCategorizerME.train("x-unspecified", samples,
         params, new BagOfWordsFeatureGenerator());
 
-    DocumentCategorizer doccat = new DocumentCategorizerNB(model);
+    DocumentCategorizer doccat = new DocumentCategorizerME(model);
 
     double aProbs[] = doccat.categorize("a");
     assertEquals("1", doccat.getBestCategory(aProbs));

Modified: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesCorrectnessTest.java
 Fri Jan  8 09:51:16 2016
@@ -26,11 +26,10 @@ import opennlp.tools.ml.model.MaxentMode
 import opennlp.tools.ml.model.TwoPassDataIndexer;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
+import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 
-import org.junit.Test;
-
 /**
  * Test for naive bayes classification correctness without smoothing
  */
@@ -39,72 +38,59 @@ public class NaiveBayesCorrectnessTest {
   @Test
   public void testNaiveBayes1() throws IOException {
 
-    NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run 
with smoothing, taken out here for mathematical verification
-
     NaiveBayesModel model =
-        (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
 
     String label = "politics";
-    String[] context = { "bow=united", "bow=nations" };
+    String[] context = {"bow=united", "bow=nations"};
     Event event = new Event(label, context);
 
-    testModel(model, event, 1.0);
-
-    NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid 
interfering with other tests
+    // testModel(model, event, 1.0);  // Expected value without smoothing
+    testModel(model, event, 0.9681650180264167);   // Expected value with 
smoothing
 
   }
 
   @Test
   public void testNaiveBayes2() throws IOException {
 
-    NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run 
with smoothing, taken out here for mathematical verification
-
     NaiveBayesModel model =
-        (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
 
     String label = "sports";
-    String[] context = { "bow=manchester", "bow=united" };
+    String[] context = {"bow=manchester", "bow=united"};
     Event event = new Event(label, context);
 
-    testModel(model, event, 1.0);
-
-    NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid 
interfering with other tests
+    // testModel(model, event, 1.0);  // Expected value without smoothing
+    testModel(model, event, 0.9658833555831029);   // Expected value with 
smoothing
 
   }
 
   @Test
   public void testNaiveBayes3() throws IOException {
 
-    NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run 
with smoothing, but I am taking it out here just for mathematical verification
-
     NaiveBayesModel model =
-        (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
 
     String label = "politics";
-    String[] context = { "bow=united" };
+    String[] context = {"bow=united"};
     Event event = new Event(label, context);
 
-    testModel(model, event, 2.0/3.0);
-
-    NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid 
interfering with other tests
+    //testModel(model, event, 2.0/3.0);  // Expected value without smoothing
+    testModel(model, event, 0.6655036407766989);  // Expected value with 
smoothing
 
   }
 
   @Test
   public void testNaiveBayes4() throws IOException {
 
-    NaiveBayesModel.setSmoothed(false); // Naive Bayes should always be run 
with smoothing, but I am taking it out here just for mathematical verification
-
     NaiveBayesModel model =
-        (NaiveBayesModel)new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
+        (NaiveBayesModel) new NaiveBayesTrainer().trainModel(new 
TwoPassDataIndexer(createTrainingStream(), 1, false));
 
     String label = "politics";
-    String[] context = { };
+    String[] context = {};
     Event event = new Event(label, context);
 
-    testModel(model, event, 7.0/12.0);
-
-    NaiveBayesModel.setSmoothed(true); // Turning smoothing back on to avoid 
interfering with other tests
+    testModel(model, event, 7.0 / 12.0);
 
   }
 
@@ -131,22 +117,22 @@ public class NaiveBayesCorrectnessTest {
     List<Event> trainingEvents = new ArrayList<Event>();
 
     String label1 = "politics";
-    String[] context1 = { "bow=the", "bow=united", "bow=nations" };
+    String[] context1 = {"bow=the", "bow=united", "bow=nations"};
     trainingEvents.add(new Event(label1, context1));
 
     String label2 = "politics";
-    String[] context2 = { "bow=the", "bow=united", "bow=states", "bow=and" };
+    String[] context2 = {"bow=the", "bow=united", "bow=states", "bow=and"};
     trainingEvents.add(new Event(label2, context2));
 
     String label3 = "sports";
-    String[] context3 = { "bow=manchester", "bow=united" };
+    String[] context3 = {"bow=manchester", "bow=united"};
     trainingEvents.add(new Event(label3, context3));
 
     String label4 = "sports";
-    String[] context4 = { "bow=manchester", "bow=and", "bow=barca" };
+    String[] context4 = {"bow=manchester", "bow=and", "bow=barca"};
     trainingEvents.add(new Event(label4, context4));
 
     return ObjectStreamUtils.createObjectStream(trainingEvents);
   }
 
-}
+}
\ No newline at end of file

Modified: 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java?rev=1723671&r1=1723670&r2=1723671&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/ml/naivebayes/NaiveBayesModelReadWriteTest.java
 Fri Jan  8 09:51:16 2016
@@ -31,13 +31,11 @@ import static org.junit.Assert.assertNot
  * Tests for persisting and reading naive bayes models
  */
 public class NaiveBayesModelReadWriteTest {
-
   @Test
   public void testBinaryModelPersistence() throws Exception {
     NaiveBayesModel model = (NaiveBayesModel) new 
NaiveBayesTrainer().trainModel(new TwoPassDataIndexer(
         NaiveBayesCorrectnessTest.createTrainingStream(), 1, false));
-    Path path = Paths.get(getClass().getResource("/").getFile());
-    Path tempFile = Files.createTempFile(path, "bnb-", ".bin");
+    Path tempFile = Files.createTempFile("bnb-", ".bin");
     File file = tempFile.toFile();
     NaiveBayesModelWriter modelWriter = new BinaryNaiveBayesModelWriter(model, 
file);
     modelWriter.persist();
@@ -51,8 +49,7 @@ public class NaiveBayesModelReadWriteTes
   public void testTextModelPersistence() throws Exception {
     NaiveBayesModel model = (NaiveBayesModel) new 
NaiveBayesTrainer().trainModel(new TwoPassDataIndexer(
         NaiveBayesCorrectnessTest.createTrainingStream(), 1, false));
-    Path path = Paths.get(getClass().getResource("/").getFile());
-    Path tempFile = Files.createTempFile(path, "ptnb-", ".txt");
+    Path tempFile = Files.createTempFile("ptnb-", ".txt");
     File file = tempFile.toFile();
     NaiveBayesModelWriter modelWriter = new 
PlainTextNaiveBayesModelWriter(model, file);
     modelWriter.persist();
@@ -61,6 +58,4 @@ public class NaiveBayesModelReadWriteTes
     AbstractModel abstractModel = reader.constructModel();
     assertNotNull(abstractModel);
   }
-
-
 }
\ No newline at end of file


Reply via email to