impl: NEREngineConfig.java NEREngineCore.java NameOccurrence.java

rwesten Fri, 16 Nov 2012 06:31:03 -0800

Author: rwesten
Date: Fri Nov 16 14:30:35 2012
New Revision: 1410367

URL: http://svn.apache.org/viewvc?rev=1410367&view=rev
Log:
STANBOL-797: the opennlp-ner engine now also adds NER annotations to the 
AnalyzedText contentpart. This feature is only active if an AnalyzedText 
contentpart is already present. It does NOT create one.


Modified:
    
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java?rev=1410367&r1=1410366&r2=1410367&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java
 Fri Nov 16 14:30:35 2012
@@ -6,12 +6,15 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.CopyOnWriteArrayList;
 
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 
@@ -32,10 +35,11 @@ public class NEREngineConfig {
     }
     
     /**
-     * Holds the mappings of rdf:type used by concepts to dc:type values used
-     * by TextAnnotations. 
+     * Holds the configured {@link NerTag}s - the mappings from the
+     * named entity name to the {@link UriRef} type used for the
+     * <code>dc:type</code> value for <code>fise:TextAnnotation</code>s
      */
-    private Map<String,UriRef> typeMappings = new 
HashMap<String,UriRef>(DEFAULT_ENTITY_TYPE_MAPPINGS);
+    private TagSet<NerTag> nerTagSet = new TagSet<NerTag>("NER TagSet");
     
     private Map<String,Collection<String>> additionalNerModels = new 
HashMap<String,Collection<String>>();
     /**
@@ -50,6 +54,12 @@ public class NEREngineConfig {
     
     private String defaultLanguage;
     
+    public NEREngineConfig(){
+        for(Entry<String,UriRef> mapping : 
DEFAULT_ENTITY_TYPE_MAPPINGS.entrySet()){
+            nerTagSet.addTag(new NerTag(mapping.getKey(), mapping.getValue()));
+        }
+    }
+    
     public synchronized void addCustomNameFinderModel(String lang, String 
modelFileName){
         if(lang == null || lang.isEmpty()){
             throw new IllegalArgumentException("The parsed lanaguage MUST NOT 
be NULL or empty!");
@@ -115,17 +125,40 @@ public class NEREngineConfig {
         Collection<String> modelNames = additionalNerModels.get(lang);
         return modelNames == null ? Collections.EMPTY_LIST : modelNames;
     }
-    
-    public UriRef getMappedType(String namedEntityType){
-        return typeMappings.get(namedEntityType);
+    /**
+     * Getter for the {@link NerTag} of the parsed Named Entity
+     * name. If not yet present a new {@link NerTag} (with no
+     * <code>dc:type</code> mapping) is created and added to the
+     * configuration.
+     * @param namedEntityType the NamedEntity name.
+     * @return the NerTag. Guaranteed to be not <code>null</code>
+     * @throws IllegalArgumentException if the parsed NamedEntity
+     * type is <code>null</code> or an empty String.
+     */
+    public NerTag getNerTag(String namedEntityType){
+        if(namedEntityType == null || namedEntityType.isEmpty()){
+            throw new IllegalArgumentException("The parsed NamedEntity string 
MUST NOT be NULL nor empty!");
+        }
+        NerTag tag = nerTagSet.getTag(namedEntityType);
+        if(tag == null){
+            tag = new NerTag(namedEntityType);
+            nerTagSet.addTag(tag);
+        }
+        return tag;
     }
+    /**
+     * Setter for a NamedEntity name &gt; <code>dc:tyoe</code>
+     * mapping.
+     * @param namedEntityType the Named Entity type (as
+     * used by the OpenNLP NameFinder model)
+     * @param dcType the <code>dc:Type</code> used for the
+     * NamedEntity or <code>nulll</code> if non
+     * @throws IllegalArgumentException if the parsed NamedEntity
+     * type is <code>null</code> or an empty String.
+     */
     public void setMappedType(String namedEntityType,UriRef dcType){
         if(namedEntityType != null && !namedEntityType.isEmpty()){
-            if(dcType == null){
-                typeMappings.remove(namedEntityType);
-            } else {
-                typeMappings.put(namedEntityType, dcType);
-            }
+            nerTagSet.addTag(new NerTag(namedEntityType, dcType));
         } else {
             throw new IllegalArgumentException("The parsed NamedEntity type 
MUST NOT be NULL nor empty!");
         }

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1410367&r1=1410366&r2=1410367&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
 Fri Nov 16 14:30:35 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.opennlp.impl;
 
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -54,11 +55,15 @@ import org.apache.clerezza.rdf.core.impl
 import org.apache.commons.lang.StringUtils;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
 import 
org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
 import org.apache.stanbol.enhancer.nlp.model.Section;
 import org.apache.stanbol.enhancer.nlp.model.Sentence;
 import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -90,6 +95,7 @@ public abstract class NEREngineCore 
     
     protected NEREngineConfig config;
     
+    
     /** Comments about our models */
     public static final Map<String, String> DATA_FILE_COMMENTS;
     static {
@@ -486,18 +492,22 @@ public abstract class NEREngineCore 
                 for (int k = nameSpans[j].getStart(); k < 
nameSpans[j].getEnd(); k++) {
                     confidence *= probs[k];
                 }
-                int absoluteStart = 
tokens.get(nameSpans[j].getStart()).getStart();
-                int absoluteEnd = absoluteStart + name.length();
-                UriRef mappedType = 
config.getMappedType(nameSpans[j].getType());
-                NameOccurrence occurrence = new NameOccurrence(name, 
absoluteStart, absoluteEnd, 
-                    mappedType, context, confidence);
-
+                int start = tokens.get(nameSpans[j].getStart()).getStart();
+                int end = start + name.length();
+                NerTag nerTag = config.getNerTag(nameSpans[j].getType());
+                //create the occurrence for writing fise:TextAnnotations
+                NameOccurrence occurrence = new NameOccurrence(name, start, 
end, nerTag.getType(),
+                    context, confidence);
                 List<NameOccurrence> occurrences = nameOccurrences.get(name);
                 if (occurrences == null) {
                     occurrences = new ArrayList<NameOccurrence>();
                 }
                 occurrences.add(occurrence);
                 nameOccurrences.put(name, occurrences);
+                //add also the NerAnnotation to the AnalysedText
+                Chunk chunk = at.addChunk(start, end);
+                //TODO: build AnnotationModel based on the configured Mappings
+                chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, 
confidence));
             }
         }
         finder.clearAdaptiveData();
@@ -553,9 +563,9 @@ public abstract class NEREngineCore 
                 int start = tokenSpans[nameSpans[j].getStart()].getStart();
                 int absoluteStart = sentenceSpans[i].getStart() + start;
                 int absoluteEnd = absoluteStart + name.length();
-                UriRef mappedType = 
config.getMappedType(nameSpans[j].getType());
+                NerTag nerTag = config.getNerTag(nameSpans[j].getType());
                 NameOccurrence occurrence = new NameOccurrence(name, 
absoluteStart, absoluteEnd, 
-                    mappedType, context, confidence);
+                    nerTag.getType(),context, confidence);
 
                 List<NameOccurrence> occurrences = nameOccurrences.get(name);
                 if (occurrences == null) {

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java?rev=1410367&r1=1410366&r2=1410367&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java
 Fri Nov 16 14:30:35 2012
@@ -21,7 +21,9 @@ import org.apache.clerezza.rdf.core.UriR
 public class NameOccurrence {
 
     public final String name;
-
+    
+    public final UriRef type;
+    
     public final Integer start;
 
     public final Integer end;
@@ -30,14 +32,12 @@ public class NameOccurrence {
 
     public final Double confidence;
 
-    public final UriRef type;
-
-    public NameOccurrence(String name, Integer start, Integer end, UriRef type,
-            String context, Double confidence) {
-        this.start = start;
-        this.end = end;
+    public NameOccurrence(String name, Integer start, Integer end,
+            UriRef type,String context, Double confidence) {
         this.name = name;
         this.type = type;
+        this.start = start;
+        this.end = end;
         this.context = context;
         this.confidence = confidence;
     }

svn commit: r1410367 - in /stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl: NEREngineConfig.java NEREngineCore.java NameOccurrence.java

Reply via email to