Author: ogrisel
Date: Thu Apr 7 07:32:38 2011
New Revision: 1089761
URL: http://svn.apache.org/viewvc?rev=1089761&view=rev
Log:
STANBOL-153: switch back to SimpleTokenizer.INSTANCE for NER
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java?rev=1089761&r1=1089760&r2=1089761&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/EngineCore.java
Thu Apr 7 07:32:38 2011
@@ -37,8 +37,8 @@ import opennlp.tools.namefind.NameFinder
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.Span;
@@ -58,8 +58,8 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
-/** Core of our EnhancementEngine, separated from the OSGi service
- * to make it easier to test this.
+/**
+ * Core of our EnhancementEngine, separated from the OSGi service to make it
easier to test this.
*/
public class EngineCore implements EnhancementEngine {
protected static final String TEXT_PLAIN_MIMETYPE = "text/plain";
@@ -88,7 +88,7 @@ public class EngineCore implements Enhan
locationNameModel = buildNameModel("location",
OntologicalClasses.DBPEDIA_PLACE);
organizationNameModel = buildNameModel("organization",
OntologicalClasses.DBPEDIA_ORGANISATION);
}
-
+
protected InputStream lookupModelStream(String modelRelativePath) throws
IOException {
return dataFileProvider.getInputStream(bundleSymbolicName,
modelRelativePath, DATA_FILE_COMMENTS);
}
@@ -239,9 +239,8 @@ public class EngineCore implements Enhan
Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);
NameFinderME finder = new NameFinderME(nameFinderModel);
-
+ Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
Map<String,List<NameOccurrence>> nameOccurrences = new
LinkedHashMap<String,List<NameOccurrence>>();
- Tokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
for (int i = 0; i < sentenceSpans.length; i++) {
String sentence =
sentenceSpans[i].getCoveredText(text).toString().trim();
@@ -316,4 +315,4 @@ public class EngineCore implements Enhan
}
return CANNOT_ENHANCE;
}
-}
\ No newline at end of file
+}
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1089761&r1=1089760&r2=1089761&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
Thu Apr 7 07:32:38 2011
@@ -114,7 +114,7 @@ public class TestNamedEntityExtractionEn
assertEquals("Patrick Marshall", secondOccurrence.name);
assertEquals(33, secondOccurrence.start.intValue());
assertEquals(49, secondOccurrence.end.intValue());
- assertEquals(0.85, secondOccurrence.confidence, 0.005);
+ assertEquals(0.997, secondOccurrence.confidence, 0.005);
}
@Test