Author: ogrisel
Date: Thu Apr  7 07:32:38 2011
New Revision: 1089761

URL: http://svn.apache.org/viewvc?rev=1089761&view=rev
Log:
STANBOL-153: switch back to SimpleTokenizer.INSTANCE for NER

Modified:
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
    
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java?rev=1089761&r1=1089760&r2=1089761&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
 Thu Apr  7 07:32:38 2011
@@ -37,8 +37,8 @@ import opennlp.tools.namefind.NameFinder
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.sentdetect.SentenceDetectorME;
 import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.Span;
 
@@ -58,8 +58,8 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 
-/** Core of our EnhancementEngine, separated from the OSGi service
- *  to make it easier to test this.
+/**
+ * Core of our EnhancementEngine, separated from the OSGi service to make it 
easier to test this.
  */
 public class EngineCore implements EnhancementEngine {
     protected static final String TEXT_PLAIN_MIMETYPE = "text/plain";
@@ -88,7 +88,7 @@ public class EngineCore implements Enhan
         locationNameModel = buildNameModel("location", 
OntologicalClasses.DBPEDIA_PLACE);
         organizationNameModel = buildNameModel("organization", 
OntologicalClasses.DBPEDIA_ORGANISATION);
     }
-    
+
     protected InputStream lookupModelStream(String modelRelativePath) throws 
IOException {
         return dataFileProvider.getInputStream(bundleSymbolicName, 
modelRelativePath, DATA_FILE_COMMENTS);
     }
@@ -239,9 +239,8 @@ public class EngineCore implements Enhan
         Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);
 
         NameFinderME finder = new NameFinderME(nameFinderModel);
-
+        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
         Map<String,List<NameOccurrence>> nameOccurrences = new 
LinkedHashMap<String,List<NameOccurrence>>();
-        Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
         for (int i = 0; i < sentenceSpans.length; i++) {
             String sentence = 
sentenceSpans[i].getCoveredText(text).toString().trim();
 
@@ -316,4 +315,4 @@ public class EngineCore implements Enhan
         }
         return CANNOT_ENHANCE;
     }
-}
\ No newline at end of file
+}

Modified: 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1089761&r1=1089760&r2=1089761&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
 Thu Apr  7 07:32:38 2011
@@ -114,7 +114,7 @@ public class TestNamedEntityExtractionEn
         assertEquals("Patrick Marshall", secondOccurrence.name);
         assertEquals(33, secondOccurrence.start.intValue());
         assertEquals(49, secondOccurrence.end.intValue());
-        assertEquals(0.85, secondOccurrence.confidence, 0.005);
+        assertEquals(0.997, secondOccurrence.confidence, 0.005);
     }
 
     @Test


Reply via email to