Author: rwesten
Date: Thu Mar 13 13:10:39 2014
New Revision: 1577150
URL: http://svn.apache.org/r1577150
Log:
implementation of STANBOL-1292 for 0.12.1
Modified:
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngine.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
Modified:
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java?rev=1577150&r1=1577149&r2=1577150&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/config/EntityLinkerConfig.java
Thu Mar 13 13:10:39 2014
@@ -217,6 +217,12 @@ public class EntityLinkerConfig {
* based on the entity ranking (popularity of the entity within the
knowledge base)
*/
public static final String RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS =
"enhancer.engines.linking.useEntityRankings";
+ /**
+ * Allows to enable/disable the inclusion of the
<code>fise:entity-ranking</code>
+ * property to <code>fise:EntityAnnotation</code> created by the linking
engine.
+ */
+ public static final String WRITE_ENTITY_RANKINGS =
"enhancer.engines.linking.writeEntityRankings";
+
/**
* The default number for the maximum number of terms suggested for a word
@@ -466,6 +472,13 @@ public class EntityLinkerConfig {
* higher ranking (popularity) do have an higher score.
*/
public static final boolean
DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS = true;
+
+ /**
+ * By default the <code>fise:entity-ranking</code> property is not added
to
+ * <code>fise:EntityAnnotation</code>.
+ */
+ public static final boolean DEFAULT_WRITE_ENTITY_RANKINGS = false;
+
/**
* If Tokens match is determined by comparing them using some algorithm.
* Results need to be in the range [0..1]. This factor defines the minimum
@@ -490,6 +503,8 @@ public class EntityLinkerConfig {
private boolean rankEqualScoresBasedOnEntityRankings =
DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS;
+ private boolean writeEntityRankings = DEFAULT_WRITE_ENTITY_RANKINGS;
+
/**
* Default constructor the initializes the configuration with the
* default values
@@ -908,6 +923,16 @@ public class EntityLinkerConfig {
linkerConfig.setRankEqualScoresBasedOnEntityRankings(
DEFAULT_RANK_EQUAL_SCORES_BASED_ON_ENTITY_RANKINGS);
}
+ //init WRITE ENTITY RANKINGS (STANBOL-1292)
+ value = configuration.get(WRITE_ENTITY_RANKINGS);
+ if(value instanceof Boolean){
+
linkerConfig.setWriteEntityRankings(((Boolean)value).booleanValue());
+ } else if (value != null){
+
linkerConfig.setWriteEntityRankings(Boolean.parseBoolean(value.toString()));
+ } else {
+ linkerConfig.setWriteEntityRankings(DEFAULT_WRITE_ENTITY_RANKINGS);
+ }
+
//init the list of whitelisted/blacklisted types
value = configuration.get(ENTITY_TYPES);
List<String> entityTypesConfig; //first collect and cleanup the config
@@ -1518,6 +1543,29 @@ public class EntityLinkerConfig {
}
/**
+ * getter for the state if <code>fise:entity-ranking</code> values should
+ * be added to <code>fise:EntityAnnotation</code> (if rankings are
available
+ * for the linked datasets
+ * @return the write entity ranking state
+ */
+ public boolean isWriteEntityRankings() {
+ return writeEntityRankings;
+ }
+ /**
+ * Setter for the {@link #WRITE_ENTITY_RANKINGS} state.
+ * @param writeEntityRankings the state. Parse <code>null</code> to set
+ * to the default
+ */
+ public void setWriteEntityRankings(Boolean writeEntityRankings) {
+ if(writeEntityRankings == null){
+ this.writeEntityRankings = DEFAULT_WRITE_ENTITY_RANKINGS;
+ } else {
+ this.writeEntityRankings = writeEntityRankings;
+ }
+ }
+
+
+ /**
* Adds an type to the blacklist
*/
public final void addBlacklistType(UriRef type, Integer order) {
Modified:
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngine.java?rev=1577150&r1=1577149&r2=1577150&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngine.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngine.java
Thu Mar 13 13:10:39 2014
@@ -44,6 +44,7 @@ import org.apache.clerezza.rdf.core.Trip
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.impl.TypedLiteralImpl;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
@@ -66,6 +67,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -96,6 +98,10 @@ public class EntityLinkingEngine impleme
*/
public static final Integer DEFAULT_ORDER =
ServiceProperties.ORDERING_DEFAULT - 10;
+ private static final UriRef XSD_DOUBLE = new
UriRef("http://www.w3.org/2001/XMLSchema#double");
+
+ private static final UriRef ENHANCER_ENTITY_RANKING = new
UriRef(NamespaceEnum.fise + "entity-ranking");
+
/**
* The name of this engine
*/
@@ -267,7 +273,8 @@ public class EntityLinkingEngine impleme
//write results (requires a write lock)
ci.getLock().writeLock().lock();
try {
- writeEnhancements(ci, entityLinker.getLinkedEntities().values(),
language);
+ writeEnhancements(ci, entityLinker.getLinkedEntities().values(),
language,
+ linkerConfig.isWriteEntityRankings());
} finally {
ci.getLock().writeLock().unlock();
}
@@ -280,7 +287,8 @@ public class EntityLinkingEngine impleme
* @param linkedEntities
* @param language
*/
- private void writeEnhancements(ContentItem ci, Collection<LinkedEntity>
linkedEntities, String language) {
+ private void writeEnhancements(ContentItem ci, Collection<LinkedEntity>
linkedEntities,
+ String language, boolean writeRankings) {
Language languageObject = null;
if(language != null && !language.isEmpty()){
languageObject = new Language(language);
@@ -359,6 +367,15 @@ public class EntityLinkingEngine impleme
originInfo.getKey(),value));
}
}
+ if(writeRankings){
+ Float ranking = suggestion.getEntity().getEntityRanking();
+ if(ranking != null){
+ metadata.add(new TripleImpl(entityAnnotation,
+ ENHANCER_ENTITY_RANKING,
+ //write the float as double
+ new TypedLiteralImpl(ranking.toString(),
XSD_DOUBLE)));
+ }
+ }
//in case dereferencing of Entities is enabled we need also to
//add the RDF data for entities
if(linkerConfig.isDereferenceEntitiesEnabled() &&
Modified:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java?rev=1577150&r1=1577149&r2=1577150&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
Thu Mar 13 13:10:39 2014
@@ -16,7 +16,6 @@
*/
package org.apache.stanbol.enhancer.engines.lucenefstlinking;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion.ENTITY_RANK_COMPARATOR;
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText;
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getSelectionContext;
@@ -31,7 +30,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -40,14 +38,11 @@ import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
-import java.util.Map.Entry;
import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.PlainLiteral;
-import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -58,14 +53,12 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.core.SolrCore;
-import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
import
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import
org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
import
org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity;
import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
-import
org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence;
import
org.apache.stanbol.enhancer.engines.lucenefstlinking.TaggingSession.Corpus;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
@@ -92,6 +85,8 @@ public class FstLinkingEngine implements
private static final Map<String,Object> SERVICE_PROPERTIES =
Collections.unmodifiableMap(Collections
.singletonMap(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
(Object) ENGINE_ORDERING));
+ private static final UriRef ENHANCER_ENTITY_RANKING = new
UriRef(NamespaceEnum.fise + "entity-ranking");
+
private final LiteralFactory literalFactory = LiteralFactory.getInstance();
protected final String name;
@@ -226,7 +221,8 @@ public class FstLinkingEngine implements
}
ci.getLock().writeLock().lock();
try {
- writeEnhancements(ci,at.getSpan(),tags.values(),language);
+ writeEnhancements(ci,at.getSpan(),tags.values(),language,
+ elConfig.isWriteEntityRankings());
} finally {
ci.getLock().writeLock().unlock();
}
@@ -515,7 +511,8 @@ public class FstLinkingEngine implements
* @param tags
* @param language
*/
- private void writeEnhancements(ContentItem ci, String text,
Collection<Tag> tags, String language) {
+ private void writeEnhancements(ContentItem ci, String text,
Collection<Tag> tags,
+ String language, boolean writeRankings) {
Language languageObject = null;
if(language != null && !language.isEmpty()){
languageObject = new Language(language);
@@ -589,6 +586,15 @@ public class FstLinkingEngine implements
// originInfo.getKey(),value));
// }
// }
+ if(writeRankings){
+ Double ranking = match.getRanking();
+ if(ranking != null){
+ metadata.add(new TripleImpl(entityAnnotation,
+ ENHANCER_ENTITY_RANKING,
+ literalFactory.createTypedLiteral(ranking)));
+ }
+ }
+
//TODO: dereferencing
// if(linkerConfig.isDereferenceEntitiesEnabled() &&
// dereferencedEntitis.add(entity.getUri())){ //not yet
dereferenced