Author: rwesten
Date: Sun Nov 24 11:42:04 2013
New Revision: 1544960
URL: http://svn.apache.org/r1544960
Log:
STANBOL-1219: merged implementation to the trunk
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1544960&r1=1544959&r2=1544960&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
Sun Nov 24 11:42:04 2013
@@ -140,13 +140,26 @@ import org.slf4j.LoggerFactory;
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not
support ProperNouns
"nl;lc=Noun"}), //same for Dutch
- //@Property(name=DEFAULT_MATCHING_LANGUAGE,value=""), //will only be used
when adding alt label support
+ @Property(name=EntityCoMentionEngine.ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ doubleValue=EntityCoMentionEngine.DEFAULT_CONFIDENCE_ADJUSTEMENT),
@Property(name=SERVICE_RANKING,intValue=0)
})
@Service(value=EnhancementEngine.class)
public class EntityCoMentionEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
+ /**
+ * Property used to configure if/how confidence values of existing
suggestions
+ * are modified if a co-mention is detected for a
fise:TextAnnotation.<p>
+ * Values MUST be in the range [0..1) the
+ * {@link #DEFAULT_CONFIDENCE_ADJUSTEMENT default} is <code>0.33</code>
<p>
+ * Added with <a
href="https://issues.apache.org/jira/browse/STANBOL-1219">STANBOL-1219</a>
+ */
+ public static final String ADJUST_EXISTING_SUGGESTION_CONFIDENCE =
"enhancer.engines.comention.adjustExistingConfidence";
/**
+ * Default value for {@link #ADJUST_EXISTING_SUGGESTION_CONFIDENCE}
+ */
+ public static final double DEFAULT_CONFIDENCE_ADJUSTEMENT = 0.33;
+ /**
* first of the post processing engines (note STANBOL-1218)
*/
private static final Integer ENGINE_ORDERING =
ServiceProperties.ORDERING_POST_PROCESSING + 80;
@@ -166,6 +179,8 @@ public class EntityCoMentionEngine exten
@Reference
protected LabelTokenizer labelTokenizer;
+ private double confidenceAdjustmentFactor;
+
// private BundleContext bundleContext;
/**
* EntityLinking configuration used for Co-Mention extractions
@@ -208,6 +223,29 @@ public class EntityCoMentionEngine exten
for(UriRef mappedUri : mappedUris){
linkerConfig.setTypeMapping(mappedUri.getUnicodeString(), null);
}
+ //parse confidence adjustment value (STANBOL-1219)
+ Object value = properties.get(ADJUST_EXISTING_SUGGESTION_CONFIDENCE);
+ final double confidenceAdjustment;
+ if(value == null){
+ confidenceAdjustment = DEFAULT_CONFIDENCE_ADJUSTEMENT;
+ } else if(value instanceof Number){
+ confidenceAdjustment = ((Number)value).doubleValue();
+ } else {
+ try {
+ confidenceAdjustment =
Double.parseDouble(value.toString());
+ } catch (NumberFormatException e){
+ throw new
ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ "The confidence adjustement value for
existing suggestions "
+ + "MUST BE a double value in the range
[0..1)", e);
+ }
+ }
+ if(confidenceAdjustment < 0 || confidenceAdjustment >= 1){
+ throw new
ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ "The confidence adjustement value for existing
suggestions "
+ + "MUST BE a double value in the range [0..1)
(parsed: "
+ + confidenceAdjustment +")!");
+ }
+ confidenceAdjustmentFactor = 1 - confidenceAdjustment;
//get the metadata later set to the enhancement engine
}
/**
@@ -330,6 +368,7 @@ public class EntityCoMentionEngine exten
if(!ignore){
//collect confidence values of co-mentions
Double maxConfidence = null;
+ Double maxExistingConfidence = null;
if(textAnnotation == null){ //not found ... create a new
TextAnnotation for the co-mention
textAnnotation =
EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation,
@@ -369,6 +408,26 @@ public class EntityCoMentionEngine exten
maxConfidence = confidnece;
}
}
+ Map<NonLiteral, Double> existingSuggestions = new
HashMap<NonLiteral,Double>();
+ if(maxConfidence != null && confidenceAdjustmentFactor
< 1){
+ //adapt confidence of existing annotations
+ for(Iterator<Triple> esIt =
metadata.filter(null, DC_RELATION, textAnnotation);esIt.hasNext();){
+ NonLiteral existingSuggestion =
esIt.next().getSubject();
+
existingSuggestions.put(existingSuggestion,
+
EnhancementEngineHelper.get(metadata, existingSuggestion,
+
ENHANCER_CONFIDENCE, Double.class, literalFactory));
+ }
+ for(Entry<NonLiteral,Double> entry :
existingSuggestions.entrySet()){
+ if(entry.getValue() != null){
+ double adjustedConfidence =
entry.getValue() * confidenceAdjustmentFactor;
+ if(maxExistingConfidence ==
null || adjustedConfidence > maxExistingConfidence){
+ maxExistingConfidence =
adjustedConfidence;
+ }
+
EnhancementEngineHelper.set(metadata, entry.getKey(),
+
ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
+ }
+ }
+ }
//add the suggestions of the initial mention to this
one
Set<Resource> values = new HashSet<Resource>();
for(Iterator<Triple> suggestions =
metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext();){
@@ -382,13 +441,29 @@ public class EntityCoMentionEngine exten
metadata.add(new TripleImpl(textAnnotation,
DC_RELATION, initialMention));
//metadata.add(new TripleImpl(initialMention,
DC_RELATION, textAnnotation));
}
- //finally add the collected dc:types of initial mentions
to the textAnnotation
+ // Adapt the dc:type values of the fise:TextAnnotation
+ // - if Suggestions added by this engine do have the max
confidence
+ // use the dc:type values of the initial mention
+ // - if the original suggestions do have a higher
confidence keep the
+ // existing
+ // - in case both do have the same confidence we add all
dc:types
+ boolean removeExistingDcTypes = maxConfidence != null &&
(maxExistingConfidence == null ||
+ maxConfidence.compareTo(maxExistingConfidence)
>= 0);
+ boolean addCoMentionDcTypes = maxExistingConfidence ==
null ||
+ (maxConfidence != null &&
maxConfidence.compareTo(maxExistingConfidence) >= 1);
Iterator<UriRef> existingDcTypesIt =
getReferences(metadata, textAnnotation, DC_TYPE);
while(existingDcTypesIt.hasNext()){ //do not add existing
- dcTypes.remove(existingDcTypesIt.next());
+ //remove dc:type triples if they are not re-added later
and
+ //removeExistingDcTypes == true
+ if((!dcTypes.remove(existingDcTypesIt.next()) ||
!addCoMentionDcTypes )
+ && removeExistingDcTypes){
+ existingDcTypesIt.remove(); //remove the dcType
+ }
}
- for(UriRef dcType : dcTypes){ //add missing
- metadata.add(new TripleImpl(textAnnotation, DC_TYPE,
dcType));
+ if(addCoMentionDcTypes){
+ for(UriRef dcType : dcTypes){ //add missing
+ metadata.add(new TripleImpl(textAnnotation,
DC_TYPE, dcType));
+ }
}
//TODO: support also Entities
if(maxConfidence != null){ //set the confidence value (if
known)
Modified:
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1544960&r1=1544959&r2=1544960&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
stanbol/trunk/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
Sun Nov 24 11:42:04 2013
@@ -58,3 +58,10 @@ used in addition to the language detecte
configuration is an empty string to search for labels without any language
defined, but for some data \
sets (such as DBpedia.org) that add languages to any labels it might improve
resuls to change this \
configuration (e.g. to 'en' in the case of DBpedia.org).
+
+enhancer.engines.comention.adjustExistingConfidence.name=Confidence Adjustment
+enhancer.engines.comention.adjustExistingConfidence.description=Used to adjust
the \
+confidence of existing suggestions for fise:TextAnnotation where a Co-Mention
is \
+detected by this Engine. Values MUST BE in the range [0..1) (default: 0.33).
Setting \
+this to 0.0 will deactivate this feature. The {adjusted-confidence} :=
{confidence} * \
+(1 - {value}) .. where {value} is the value configured for this property.