Author: rwesten
Date: Fri May 25 11:34:33 2012
New Revision: 1342580

URL: http://svn.apache.org/viewvc?rev=1342580&view=rev
Log:
STANBOL-612: The KeywordLinkingEnginenow validates the enhancement by using the 
EnhancementStructureHelper Utility. Also added validation for STANBOL-625

Added:
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
   (contents, props changed)
      - copied, changed from r1340995, 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
Modified:
    
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1342580&r1=1342579&r2=1342580&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
 Fri May 25 11:34:33 2012
@@ -240,7 +240,11 @@ public class KeywordLinkingEngine 
         unbind = "disableOfflineMode", 
         strategy = ReferenceStrategy.EVENT)
     private OfflineMode offlineMode;
-    private String referencedSiteName;
+    /**
+     * The name of the reference site ('local' or 'entityhub') if the
+     * Entityhub is used for enhancing
+     */
+    protected String referencedSiteName;
 
     /**
      * Called by the ConfigurationAdmin to bind the {@link #offlineMode} if 
the service becomes available

Copied: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
 (from r1340995, 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java)
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java?p2=incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java&p1=incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java&r1=1340995&r2=1342580&rev=1342580&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/TestTaxonomyLinker.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
 Fri May 25 11:34:33 2012
@@ -14,9 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.stanbol.enhancer.engines.keywordextraction;
+package org.apache.stanbol.enhancer.engines.keywordextraction.engine;
 
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
+import static 
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
+import static 
org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEntityAnnotation;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
@@ -24,15 +33,24 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
 import opennlp.tools.tokenize.SimpleTokenizer;
 
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
 import org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig;
 import 
org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import 
org.apache.stanbol.enhancer.engines.keywordextraction.engine.KeywordLinkingEngine;
 import 
org.apache.stanbol.enhancer.engines.keywordextraction.impl.ClasspathDataFileProvider;
 import 
org.apache.stanbol.enhancer.engines.keywordextraction.impl.TestSearcherImpl;
 import 
org.apache.stanbol.enhancer.engines.keywordextraction.linking.EntityLinker;
@@ -43,23 +61,31 @@ import org.apache.stanbol.enhancer.engin
 import 
org.apache.stanbol.enhancer.engines.keywordextraction.linking.impl.OpenNlpAnalysedContentFactory;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
 import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * TODO: convert this to an integration test!
  * @author Rupert Westenthaler
  */
-public class TestTaxonomyLinker {
+public class KeywordLinkingEngineTest {
+    
+    private final static Logger log = 
LoggerFactory.getLogger(KeywordLinkingEngineTest.class);
 
     /**
      * The context for the tests (same as in TestOpenNLPEnhancementEngine)
@@ -72,10 +98,12 @@ public class TestTaxonomyLinker {
     
     private static final ContentItemFactory ciFactory = 
InMemoryContentItemFactory.getInstance();
     
+    private static final String TEST_REFERENCED_SITE_NAME = "dummRefSiteName";
+    
     static TestSearcherImpl searcher;
     static ValueFactory factory = InMemoryValueFactory.getInstance();
     private static OpenNLP openNLP;
-    
+        
     public static final String NAME = NamespaceEnum.rdfs+"label";
     public static final String TYPE = NamespaceEnum.rdf+"type";
     public static final String REDIRECT = NamespaceEnum.rdfs+"seeAlso";
@@ -115,6 +143,16 @@ public class TestTaxonomyLinker {
         rep.addNaturalText(NAME, "Otago");
         rep.addReference(TYPE, 
OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
         searcher.addEntity(rep);
+        //add a 2nd Otago (Place and University
+        rep = factory.createRepresentation("urn:test:Otago_Texas");
+        rep.addNaturalText(NAME, "Otago (Texas)");
+        rep.addNaturalText(NAME, "Otago");
+        rep.addReference(TYPE, 
OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
+        searcher.addEntity(rep);
+        rep = factory.createRepresentation("urn:test:UniversityOfOtago_Texas");
+        rep.addNaturalText(NAME, "University of Otago (Texas)");
+        rep.addReference(TYPE, 
OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
+        searcher.addEntity(rep);
     }
 
     @Before
@@ -132,7 +170,11 @@ public class TestTaxonomyLinker {
     public static ContentItem getContentItem(final String id, final String 
text) throws IOException {
         return ciFactory.createContentItem(new UriRef(id),new 
StringSource(text));
     }
-
+    /**
+     * This tests the EntityLinker functionality (if the expected Entities
+     * are linked)
+     * @throws Exception
+     */
     @Test
     public void testTaxonomyLinker() throws Exception {
         OpenNlpAnalysedContentFactory acf = 
OpenNlpAnalysedContentFactory.getInstance(openNLP,
@@ -150,7 +192,7 @@ public class TestTaxonomyLinker {
         expectedResults.put("New Zealand", new ArrayList<String>(
                 Arrays.asList("urn:test:NewZealand")));
         expectedResults.put("University of Otago", new ArrayList<String>(
-                Arrays.asList("urn:test:UniversityOfOtago")));
+                
Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas")));
         for(LinkedEntity linkedEntity : linker.getLinkedEntities().values()){
             List<String> expectedSuggestions = 
expectedResults.remove(linkedEntity.getSelectedText());
             assertNotNull("LinkedEntity "+linkedEntity.getSelectedText()+
@@ -176,5 +218,81 @@ public class TestTaxonomyLinker {
             }
         }
     }
-
+    /**
+     * This tests if the Enhancements created by the Engine confirm to the
+     * rules defined for the Stanbol Enhancement Structure.
+     * @throws IOException
+     * @throws EngineException
+     */
+    @Test
+    public void testEngine() throws IOException, EngineException {
+        EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
+        linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
+        KeywordLinkingEngine engine = 
KeywordLinkingEngine.createInstance(openNLP, searcher, new 
TextAnalyzerConfig(), 
+            linkerConfig);
+        engine.referencedSiteName = TEST_REFERENCED_SITE_NAME;
+        ContentItem ci = ciFactory.createContentItem(new 
StringSource(TEST_TEXT));
+        //tells the engine that this is an English text
+        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new 
PlainLiteralImpl("en")));
+        //compute the enhancements
+        engine.computeEnhancements(ci);
+        //validate the enhancement results
+        Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+        expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
+        
expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral(
+            engine.getClass().getName()));
+        //validate create fise:TextAnnotations
+        int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), 
TEST_TEXT, expectedValues);
+        assertEquals("Four fise:TextAnnotations are expected by this Test", 4, 
numTextAnnotations);
+        //validate create fise:EntityAnnotations
+        int numEntityAnnotations = validateAllEntityAnnotations(ci, 
expectedValues);
+        assertEquals("Five fise:EntityAnnotations are expected by this Test", 
5, numEntityAnnotations);
+    }
+    /**
+     * Similar to {@link 
EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.rdf.core.TripleCollection,
 Map)}
+     * but in addition checks fise:confidence [0..1] and entityhub:site 
properties
+     * @param ci
+     * @param expectedValues
+     * @return
+     */
+    private static int validateAllEntityAnnotations(ContentItem ci, 
Map<UriRef,Resource> expectedValues){
+        Iterator<Triple> entityAnnotationIterator = 
ci.getMetadata().filter(null,
+                RDF_TYPE, ENHANCER_ENTITYANNOTATION);
+        int entityAnnotationCount = 0;
+        while (entityAnnotationIterator.hasNext()) {
+            UriRef entityAnnotation = (UriRef) 
entityAnnotationIterator.next().getSubject();
+            // test if selected Text is added
+            validateEntityAnnotation(ci.getMetadata(), entityAnnotation, 
expectedValues);
+            //validate also that the confidence is between [0..1]
+            Iterator<Triple> confidenceIterator = 
ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null);
+            //NOTE: the fact that fise:confidence values are TypedLiterals of 
type xsd:double
+            //      is already validated at this point
+            //      Also that there are only [0..1] confidence values
+            assertTrue("Expected fise:confidence value is missing 
(entityAnnotation "
+                    +entityAnnotation+")",confidenceIterator.hasNext());
+            Double confidence = 
LiteralFactory.getInstance().createObject(Double.class,
+                (TypedLiteral)confidenceIterator.next().getObject());
+            assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
+                    + "',entityAnnotation " +entityAnnotation+")",
+                    1.0 >= confidence.doubleValue());
+            assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
+                    +"',entityAnnotation "+entityAnnotation+")",
+                    0.0 <= confidence.doubleValue());
+            //Test the entityhub:site property (STANBOL-625)
+            UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+            Iterator<Triple> entitySiteIterator = 
ci.getMetadata().filter(entityAnnotation, 
+                ENTITYHUB_SITE, null);
+            assertTrue("Expected entityhub:site value is missing 
(entityAnnotation "
+                    +entityAnnotation+")",entitySiteIterator.hasNext());
+            Resource siteResource = entitySiteIterator.next().getObject();
+            assertTrue("entityhub:site values MUST BE Literals", siteResource 
instanceof Literal);
+            assertEquals("'"+TEST_REFERENCED_SITE_NAME+"' is expected as "
+                + "entityhub:site value", TEST_REFERENCED_SITE_NAME, 
+                ((Literal)siteResource).getLexicalForm());
+            assertFalse("entityhub:site MUST HAVE only a single value", 
entitySiteIterator.hasNext());
+            entityAnnotationCount++;
+        }
+        return entityAnnotationCount;
+        
+    }
 }

Propchange: 
incubator/stanbol/trunk/enhancer/engines/keywordextraction/src/test/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngineTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to