Repository: opennlp
Updated Branches:
  refs/heads/LangDetect 21a1f84b4 -> 4a3a707b8


Repair test cases


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a3a707b
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a3a707b
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a3a707b

Branch: refs/heads/LangDetect
Commit: 4a3a707b8a84861ffda47b992ebb3ac026d3cd90
Parents: 21a1f84
Author: Jörn Kottmann <[email protected]>
Authored: Tue Jun 6 11:49:29 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Tue Jun 6 11:49:29 2017 +0200

----------------------------------------------------------------------
 .../LanguageDetectorContextGenerator.java       | 22 +++++++++++++-------
 .../LanguageDetectorContextGeneratorTest.java   | 17 +++++----------
 .../LanguageDetectorCrossValidatorTest.java     |  4 ++--
 3 files changed, 21 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a3a707b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
index dcfe0e9..b28c601 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java
@@ -25,6 +25,12 @@ import opennlp.tools.util.StringList;
 import opennlp.tools.util.StringUtil;
 import opennlp.tools.util.normalizer.AggregateCharSequenceNormalizer;
 import opennlp.tools.util.normalizer.CharSequenceNormalizer;
+import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UnicodeCharSequenceNormalizer;
+import opennlp.tools.util.normalizer.UrlCharSequenceNormalizer;
 
 /**
  * Context generator for document categorizer
@@ -40,12 +46,12 @@ class LanguageDetectorContextGenerator {
     this.maxLength = maxLength;
 
     this.normalizer = new AggregateCharSequenceNormalizer(
-        // EmojiCharSequenceNormalizer.getInstance(),
-        //UrlCharSequenceNormalizer.getInstance(),
-        //TwitterCharSequenceNormalizer.getInstance(),
-        //NumberCharSequenceNormalizer.getInstance(),
-        //UnicodeCharSequenceNormalizer.getInstance(),
-        //ShrinkCharSequenceNormalizer.getInstance());
+        EmojiCharSequenceNormalizer.getInstance(),
+        UrlCharSequenceNormalizer.getInstance(),
+        TwitterCharSequenceNormalizer.getInstance(),
+        NumberCharSequenceNormalizer.getInstance(),
+        UnicodeCharSequenceNormalizer.getInstance(),
+        ShrinkCharSequenceNormalizer.getInstance()
     );
   }
 
@@ -53,7 +59,7 @@ class LanguageDetectorContextGenerator {
    * Initializes the current instance with min 2 length and max 5 length of 
ngrams.
    */
   LanguageDetectorContextGenerator() {
-    this(3, 3);
+    this(2, 3);
   }
 
   public String[] getContext(String document) {
@@ -66,7 +72,7 @@ class LanguageDetectorContextGenerator {
 
     for (StringList tokenList : model) {
       if (tokenList.size() > 0) {
-        context.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
+        context.add(StringUtil.toLowerCase(tokenList.getToken(0)));
       }
     }
     return context.toArray(new String[context.size()]);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a3a707b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
index 787dc1e..f6c8b18 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorContextGeneratorTest.java
@@ -34,17 +34,10 @@ public class LanguageDetectorContextGeneratorTest {
 
     Collection<String> features = Arrays.asList(cg.getContext(doc));
 
-    Assert.assertEquals(38, features.size());
-    Assert.assertTrue(features.contains("ng=ab"));
-    Assert.assertTrue(features.contains("ng=abc"));
-    Assert.assertTrue(features.contains("ng=abcd"));
-    Assert.assertTrue(features.contains("ng=abcde"));
-    Assert.assertTrue(features.contains("ng=abcde"));
-
-    Assert.assertTrue(features.contains("ng= f"));
-    Assert.assertTrue(features.contains("ng= fg"));
-    Assert.assertTrue(features.contains("ng= fgh"));
-    Assert.assertTrue(features.contains("ng= fghi"));
-
+    Assert.assertEquals(21, features.size());
+    Assert.assertTrue(features.contains("ab"));
+    Assert.assertTrue(features.contains("abc"));
+    Assert.assertTrue(features.contains("e f"));
+    Assert.assertTrue(features.contains(" fg"));
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a3a707b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
index 8e814e8..cb38b27 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/langdetect/LanguageDetectorCrossValidatorTest.java
@@ -56,8 +56,8 @@ public class LanguageDetectorCrossValidatorTest {
 
     cv.evaluate(sampleStream, 2);
 
-    Assert.assertEquals(58, cv.getDocumentCount());
-    Assert.assertEquals(0.83, cv.getDocumentAccuracy(), 0.01);
+    Assert.assertEquals(99, cv.getDocumentCount());
+    Assert.assertEquals(0.98989898989899, cv.getDocumentAccuracy(), 0.01);
   }
 
 }

Reply via email to