Added: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java?rev=1526402&view=auto ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java (added) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java Thu Sep 26 06:57:10 2013 @@ -0,0 +1,311 @@ +package org.apache.stanbol.enhancer.engines.lucenefstlinking; + +import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESSED_LANGUAGES; +import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESS_ONLY_PROPER_NOUNS_STATE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE; +import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Dictionary; +import java.util.HashMap; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.clerezza.rdf.core.Literal; +import org.apache.clerezza.rdf.core.LiteralFactory; +import org.apache.clerezza.rdf.core.Resource; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; +import org.apache.clerezza.rdf.core.impl.TripleImpl; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; +import org.apache.solr.core.SolrCore; +import org.apache.stanbol.commons.solr.IndexReference; +import org.apache.stanbol.commons.solr.managed.ManagedSolrServer; +import org.apache.stanbol.commons.solr.managed.standalone.StandaloneEmbeddedSolrServerProvider; +import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory; +import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig; +import org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig; +import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.RedirectProcessingMode; +import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusCreationTask; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.CorpusInfo; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.FieldEncodingEnum; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngine; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.IndexConfiguration; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.FastLRUCacheManager; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.SolrEntityCache; +import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextParser; +import org.apache.stanbol.enhancer.nlp.model.AnalysedText; +import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory; +import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils; +import org.apache.stanbol.enhancer.nlp.pos.Pos; +import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration; +import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper; +import org.apache.stanbol.enhancer.servicesapi.Blob; +import org.apache.stanbol.enhancer.servicesapi.ContentItem; +import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory; +import org.apache.stanbol.enhancer.servicesapi.EngineException; +import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; +import org.apache.stanbol.enhancer.servicesapi.impl.StreamSource; +import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; +import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper; +import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory; +import org.apache.stanbol.entityhub.servicesapi.model.Representation; +import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory; +import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils; +import org.apache.stanbol.entityhub.yard.solr.impl.SolrYard; +import org.apache.stanbol.entityhub.yard.solr.impl.SolrYardConfig; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FstLinkingEngineTest { + + private final static Logger log = LoggerFactory.getLogger(FstLinkingEngineTest.class); + + /** + * The SolrYard used for {@link #testSetup()} to check if {@link #REQUIRED_ENTITIES} + * are present in the data.<p> + * NOTE that the {@link FstLinkingEngine} DOES NOT require a SolrYard, but directly + * operates on the #core + */ + protected static SolrYard yard; + protected static SolrCore core; + private static IndexConfiguration fstConfig; + /** + * The SolrDirectoryManager also tested within this unit test + */ + public static final String TEST_YARD_ID = "dbpedia"; + public static final String TEST_SOLR_CORE_NAME = "dbpedia"; + public static final String TEST_SOLR_CORE_CONFIGURATION = "dbpedia_26k.solrindex.bz2"; + protected static final String TEST_INDEX_REL_PATH = File.separatorChar + "target" + File.separatorChar + + ManagedSolrServer.DEFAULT_SOLR_DATA_DIR; + + public static final String TEST_TEXT_FILE = "merkel.txt"; + public static final String TEST_TEXT_NLP_FILE = "merkel_nlp.json"; + + private static final Literal EN_LANGUAGE = LiteralFactory.getInstance().createTypedLiteral("en"); + + protected static final String DBPEDIA = "http://dbpedia.org/resource/"; + + /** + * List used in {@link #testSetup()} to validate that all expected entities + * are contained in the SolrYard initialised based on the + * {@link #TEST_SOLR_CORE_CONFIGURATION}. + */ + private static final List<String> REQUIRED_ENTITIES = Arrays.asList( + DBPEDIA+"Christian_Democratic_Union_(Germany)", DBPEDIA+"Angela_Merkel", + DBPEDIA+"Germany", DBPEDIA+"Social_Democratic_Party_of_Germany", + DBPEDIA+"Greece"); + + private ContentItemFactory cif = InMemoryContentItemFactory.getInstance(); + private AnalysedTextFactory atf = AnalysedTextFactory.getDefaultInstance(); + private ContentItem ci; + private String content; + + /** + * Used with the {@link EnhancementStructureHelper} to validate Enhancement + * results + */ + private static Map<UriRef,Resource> EXPECTED_ENHANCEMENT_VALUES; + static{ + EXPECTED_ENHANCEMENT_VALUES = new HashMap<UriRef,Resource>(); + EXPECTED_ENHANCEMENT_VALUES.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral( + FstLinkingEngine.class.getName())); + //adding null as expected for confidence makes it a required property + EXPECTED_ENHANCEMENT_VALUES.put(Properties.ENHANCER_CONFIDENCE, null); + } + + + @BeforeClass + public static void setup() throws Exception { + // get the working directory + // use property substitution to test this feature! + String prefix = System.getProperty("basedir") == null ? "." : "${basedir}"; + String solrServerDir = prefix + TEST_INDEX_REL_PATH; + log.info("Test Solr Server Directory: {}", solrServerDir); + System.setProperty(ManagedSolrServer.MANAGED_SOLR_DIR_PROPERTY, solrServerDir); + SolrYardConfig config = new SolrYardConfig(TEST_YARD_ID, TEST_SOLR_CORE_NAME); + config.setAllowInitialisation(false); + config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION); //the dbpedia default data + config.setAllowInitialisation(true); //init from datafile provider + config.setName("DBpedia.org default data"); + config.setDescription("Data used for FstLinkingEngie tests"); + // create the Yard used for the tests + IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation()); + + SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer( + solrIndexRef, config.getIndexConfigurationName()); + Assert.assertNotNull("Unable to initialise SolrServer for testing",server); + core = ((EmbeddedSolrServer)server).getCoreContainer().getCore( + solrIndexRef.getIndex()); + Assert.assertNotNull("Unable to get SolrCore '" + config.getIndexConfigurationName() + + "' from SolrServer "+server, core); + yard = new SolrYard(server,config,null); + //setup the index configuration + LanguageConfiguration langConf = new LanguageConfiguration("not.used", + new String[]{"en;field=rdfs:label;generate=true"}); + fstConfig = new IndexConfiguration(langConf, core, FieldEncodingEnum.SolrYard); + fstConfig.setExecutorService(Executors.newFixedThreadPool(1)); + fstConfig.setTypeField("rdf:type"); + fstConfig.setRankingField("entityhub:entityRank"); + //fstConfig.setEntityCacheManager(new FastLRUCacheManager(2048)); + //activate the FST config + fstConfig.activate(); //activate this configuration + + //now create the FST modles and wait until finished + List<Future<?>> creationTasks = new ArrayList<Future<?>>(); + for(CorpusInfo corpus : fstConfig.getCorpora()){ + //check if the fst does not exist and the fstInfo allows creation + if(!corpus.isFstFile() && corpus.allowCreation){ + //create a task on the FST corpus creation service + creationTasks.add(fstConfig.getExecutorService().submit( + new CorpusCreationTask(fstConfig, corpus))); + } + } + for(Future<?> future : creationTasks){ //wait for completion + future.get(); + } + //validate that the index contains the expected entities + validateTestIndex(); + } + + private static void validateTestIndex() throws Exception { + log.info("check availability of {} entities", REQUIRED_ENTITIES.size()); + for(String context : REQUIRED_ENTITIES){ + log.debug(" > check Entity {}",context); + Representation rep = yard.getRepresentation(context); + assertNotNull(rep); + assertEquals(rep.getId(),context); + if(log.isDebugEnabled()){ + log.debug("Data for Entity {}: \n {}",rep.getId(), + ModelUtils.getRepresentationInfo(rep)); + } + } + log.info(" ... all Entities present"); + } + + + @AfterClass + public static void cleanup() throws Exception { + if(yard != null){ + yard.close(); + } + yard = null; + } + + /** + * Initialises the {@link #ci} and {@link #content} fields for tests. + * It creates a ContentItem containing a '<code>plain/text</code>' + * {@link Blob} for the {@value #TEST_TEXT_FILE} and an {@link AnalysedText} + * filled with the NLP analysis results stored in + * {@link #TEST_TEXT_NLP_FILE} + * @return the {@link ContentItem} as used for the tests + * @throws IOException on any IO releated error while reading the test files + */ + @Before + public void setupTest() throws IOException { + //create a contentItem for the plain text used for testing + InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE); + Assert.assertNotNull("Unable to load '"+TEST_TEXT_FILE+"' via classpath",is); + ContentItem ci = cif.createContentItem(new StreamSource(is,"text/plain")); + AnalysedText at = atf.createAnalysedText(ci, ci.getBlob()); + is.close(); + //parse the prepared NLP results and add it to the ContentItem + is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE); + Assert.assertNotNull("Unable to load '"+TEST_TEXT_NLP_FILE+"' via classpath",is); + AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at); + is.close(); + //set the language of the contentItem + ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, + EN_LANGUAGE)); + //set the contentItem and also the content + this.ci = ci; + this.content = at.getText().toString(); + } + @After + public void cleanupTest() { + ci = null; + content = null; + } + + @Test + public void testFstLinkingWithProperNouns() throws Exception { + Dictionary<String,Object> dict = new Hashtable<String,Object>(); + dict.put(PROCESSED_LANGUAGES, Arrays.asList("en;lmmtip;uc=LINK;prob=0.75;pprob=0.75")); + dict.put(PROCESS_ONLY_PROPER_NOUNS_STATE, true); + TextProcessingConfig tpc = TextProcessingConfig.createInstance(dict); + EntityLinkerConfig elc = new EntityLinkerConfig(); + elc.setMinFoundTokens(2);//this is assumed by this test + elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); + FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", + fstConfig, tpc, elc); + processConentItem(engine); + validateEnhancements(); + } + + @Test + public void testFstLinkingWithNouns() throws Exception { + Dictionary<String,Object> dict = new Hashtable<String,Object>(); + dict.put(PROCESSED_LANGUAGES, Arrays.asList("en;lmmtip;uc=LINK;prob=0.75;pprob=0.75")); + dict.put(PROCESS_ONLY_PROPER_NOUNS_STATE, false); + TextProcessingConfig tpc = TextProcessingConfig.createInstance(dict); + EntityLinkerConfig elc = new EntityLinkerConfig(); + elc.setMinFoundTokens(2);//this is assumed by this test + elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW); + FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", + fstConfig, tpc, elc); + processConentItem(engine); + validateEnhancements(); + + } + + /** + * @param expected + */ + private int[] validateEnhancements() { + Map<UriRef,Resource> expected = new HashMap<UriRef,Resource>(EXPECTED_ENHANCEMENT_VALUES); + expected.put(ENHANCER_EXTRACTED_FROM, ci.getUri()); + int[] num = new int[2]; + num[0] = EnhancementStructureHelper.validateAllTextAnnotations(ci.getMetadata(), + content, expected); + log.info(" ... validated {} fise:TextAnnotation",num[0]); + num[1] = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), + expected); + log.info(" ... validated {} fise:EntityAnnotation",num[1]); + return num; + } + + /** + * Processes the {@link #ci} with the parsed engine. + * @param engine + * @return returns {@link #ci} as convenience + * @throws EngineException + */ + private ContentItem processConentItem(FstLinkingEngine engine) throws EngineException { + Assert.assertEquals("The FST Linking engine is expected to enhance the " + + "test ContentItem EnhancementEngine.ENHANCE_ASYNC", + EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci)); + engine.computeEnhancements(ci); + return ci; + } +}
Added: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/impl/DbpediaDefaultDataFileProvider.java URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/impl/DbpediaDefaultDataFileProvider.java?rev=1526402&view=auto ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/impl/DbpediaDefaultDataFileProvider.java (added) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/impl/DbpediaDefaultDataFileProvider.java Thu Sep 26 06:57:10 2013 @@ -0,0 +1,33 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.stanbol.enhancer.engines.lucenefstlinking.impl; + +import org.apache.stanbol.commons.solr.managed.standalone.ClassPathDataFileProvider; +import org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineTest; +/** + * Ensures that the {@link FstLinkingEngineTest#TEST_SOLR_CORE_CONFIGURATION} + * is loaded via the classpath + * + */ +public class DbpediaDefaultDataFileProvider extends ClassPathDataFileProvider { + + private static final String DATA_FILES_DIR = "org/apache/stanbol/data/site/dbpedia/default/index/"; + + public DbpediaDefaultDataFileProvider() { + super(null,DATA_FILES_DIR); + } +} Added: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider?rev=1526402&view=auto ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider (added) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/META-INF/services/org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider Thu Sep 26 06:57:10 2013 @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.stanbol.enhancer.engines.lucenefstlinking.impl.DbpediaDefaultDataFileProvider Modified: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/log4j.properties URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/log4j.properties?rev=1526402&r1=1526401&r2=1526402&view=diff ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/log4j.properties (original) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/log4j.properties Thu Sep 26 06:57:10 2013 @@ -21,4 +21,4 @@ log4j.appender.stdout=org.apache.log4j.C log4j.appender.stdout.Target=System.out log4j.appender.stdout.layout=org.apache.log4j.PatternLayout log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n -log4j.logger.org.apache.stanbol.enhancer.engines.keywordextraction=DEBUG \ No newline at end of file +log4j.logger.org.apache.stanbol.enhancer.engines.lucenefstlinking=DEBUG \ No newline at end of file Added: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel.txt URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel.txt?rev=1526402&view=auto ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel.txt (added) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel.txt Thu Sep 26 06:57:10 2013 @@ -0,0 +1 @@ +There has been a worried response in Greece to the Sunday's election in Germany. The win of Chancellor Angela Merkel and the CSU means that there will not be a radical change in European policy. Greeks would have preferred SPD candidate Peer Steinbrueck, whose party lost Sunday. \ No newline at end of file Added: stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel_nlp.json URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel_nlp.json?rev=1526402&view=auto ============================================================================== --- stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel_nlp.json (added) +++ stanbol/trunk/enhancement-engines/lucenefstlinking/src/test/resources/merkel_nlp.json Thu Sep 26 06:57:10 2013 @@ -0,0 +1,644 @@ +{ + "spans" : [ { + "type" : "Text", + "start" : 0, + "end" : 279 + }, { + "type" : "Sentence", + "start" : 0, + "end" : 80 + }, { + "type" : "Token", + "start" : 0, + "end" : 5, + "stanbol.enhancer.nlp.pos" : { + "tag" : "EX", + "pos" : 190, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "there", + "pos" : [ { + "tag" : "EX", + "pos" : 190 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 6, + "end" : 9, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VBZ", + "pos" : 238, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "have", + "pos" : [ { + "tag" : "VBZ", + "pos" : 238 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 10, + "end" : 14, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VBN", + "pos" : 237, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "be", + "pos" : [ { + "tag" : "VBN", + "pos" : 237 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 15, + "end" : 16, + "stanbol.enhancer.nlp.pos" : { + "tag" : "DT", + "pos" : 57, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 17, + "end" : 24, + "stanbol.enhancer.nlp.pos" : { + "tag" : "JJ", + "lc" : 2, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 25, + "end" : 33, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 34, + "end" : 36, + "stanbol.enhancer.nlp.pos" : { + "tag" : "IN", + "pos" : [ 12, 37 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 37, + "end" : 43, + "stanbol.enhancer.nlp.ner" : { + "tag" : "LOCATION", + "uri" : "http://dbpedia.org/ontology/Place", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 37, + "end" : 43, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 44, + "end" : 46, + "stanbol.enhancer.nlp.pos" : { + "tag" : "TO", + "lc" : 3, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 47, + "end" : 50, + "stanbol.enhancer.nlp.pos" : { + "tag" : "DT", + "pos" : 57, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 51, + "end" : 57, + "stanbol.enhancer.nlp.ner" : { + "tag" : "DATE", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 51, + "end" : 57, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 57, + "end" : 59, + "stanbol.enhancer.nlp.pos" : { + "tag" : "POS", + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 60, + "end" : 68, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 69, + "end" : 71, + "stanbol.enhancer.nlp.pos" : { + "tag" : "IN", + "pos" : [ 12, 37 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 72, + "end" : 79, + "stanbol.enhancer.nlp.ner" : { + "tag" : "LOCATION", + "uri" : "http://dbpedia.org/ontology/Place", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 72, + "end" : 79, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 79, + "end" : 80, + "stanbol.enhancer.nlp.pos" : { + "tag" : ".", + "pos" : 119, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Sentence", + "start" : 81, + "end" : 194 + }, { + "type" : "Token", + "start" : 81, + "end" : 84, + "stanbol.enhancer.nlp.pos" : { + "tag" : "DT", + "pos" : 57, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "the", + "pos" : [ { + "tag" : "DT", + "pos" : 57 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 85, + "end" : 88, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VB", + "pos" : 233, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 89, + "end" : 91, + "stanbol.enhancer.nlp.pos" : { + "tag" : "IN", + "pos" : [ 12, 37 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 92, + "end" : 102, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 103, + "end" : 116, + "stanbol.enhancer.nlp.ner" : { + "tag" : "PERSON", + "uri" : "http://dbpedia.org/ontology/Person", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 103, + "end" : 109, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 110, + "end" : 116, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 117, + "end" : 120, + "stanbol.enhancer.nlp.pos" : { + "tag" : "CC", + "pos" : 31, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 121, + "end" : 124, + "stanbol.enhancer.nlp.pos" : { + "tag" : "DT", + "pos" : 57, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 125, + "end" : 128, + "stanbol.enhancer.nlp.ner" : { + "tag" : "ORGANIZATION", + "uri" : "http://dbpedia.org/ontology/Organisation", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 125, + "end" : 128, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 129, + "end" : 134, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VBZ", + "pos" : 238, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "mean", + "pos" : [ { + "tag" : "VBZ", + "pos" : 238 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 135, + "end" : 139, + "stanbol.enhancer.nlp.pos" : { + "tag" : "IN", + "pos" : [ 12, 37 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 140, + "end" : 145, + "stanbol.enhancer.nlp.pos" : { + "tag" : "EX", + "pos" : 190, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 146, + "end" : 150, + "stanbol.enhancer.nlp.pos" : { + "tag" : "MD", + "pos" : 219, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 151, + "end" : 154, + "stanbol.enhancer.nlp.pos" : { + "tag" : "RB", + "lc" : 4, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 155, + "end" : 157, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VB", + "pos" : 233, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 158, + "end" : 159, + "stanbol.enhancer.nlp.pos" : { + "tag" : "DT", + "pos" : 57, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 160, + "end" : 167, + "stanbol.enhancer.nlp.pos" : { + "tag" : "JJ", + "lc" : 2, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 168, + "end" : 174, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 175, + "end" : 177, + "stanbol.enhancer.nlp.pos" : { + "tag" : "IN", + "pos" : [ 12, 37 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 178, + "end" : 186, + "stanbol.enhancer.nlp.ner" : { + "tag" : "MISC", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 178, + "end" : 186, + "stanbol.enhancer.nlp.pos" : { + "tag" : "JJ", + "lc" : 2, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "european", + "pos" : [ { + "tag" : "JJ", + "lc" : 2 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 187, + "end" : 193, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 193, + "end" : 194, + "stanbol.enhancer.nlp.pos" : { + "tag" : ".", + "pos" : 119, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Sentence", + "start" : 195, + "end" : 279 + }, { + "type" : "Chunk", + "start" : 195, + "end" : 201, + "stanbol.enhancer.nlp.ner" : { + "tag" : "MISC", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 195, + "end" : 201, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNPS", + "pos" : [ 53, 157 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 202, + "end" : 207, + "stanbol.enhancer.nlp.pos" : { + "tag" : "MD", + "pos" : 219, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 208, + "end" : 212, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VB", + "pos" : 233, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 213, + "end" : 222, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VBN", + "pos" : 237, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "prefer", + "pos" : [ { + "tag" : "VBN", + "pos" : 237 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Chunk", + "start" : 223, + "end" : 226, + "stanbol.enhancer.nlp.ner" : { + "tag" : "ORGANIZATION", + "uri" : "http://dbpedia.org/ontology/Organisation", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 223, + "end" : 226, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "spd", + "pos" : [ { + "tag" : "NN", + "pos" : [ 45, 163 ] + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Token", + "start" : 227, + "end" : 236, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Chunk", + "start" : 237, + "end" : 253, + "stanbol.enhancer.nlp.ner" : { + "tag" : "PERSON", + "uri" : "http://dbpedia.org/ontology/Person", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 237, + "end" : 241, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 242, + "end" : 253, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 253, + "end" : 254, + "stanbol.enhancer.nlp.pos" : { + "tag" : ",", + "pos" : 140, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 255, + "end" : 260, + "stanbol.enhancer.nlp.pos" : { + "tag" : "WP$", + "pos" : [ 105, 109 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 261, + "end" : 266, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NN", + "pos" : [ 45, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 267, + "end" : 271, + "stanbol.enhancer.nlp.pos" : { + "tag" : "VBD", + "pos" : 237, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + }, + "stanbol.enhancer.nlp.morpho" : { + "lemma" : "lose", + "pos" : [ { + "tag" : "VBD", + "pos" : 237 + } ], + "class" : "org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures" + } + }, { + "type" : "Chunk", + "start" : 272, + "end" : 278, + "stanbol.enhancer.nlp.ner" : { + "tag" : "DATE", + "class" : "org.apache.stanbol.enhancer.nlp.ner.NerTag" + } + }, { + "type" : "Token", + "start" : 272, + "end" : 278, + "stanbol.enhancer.nlp.pos" : { + "tag" : "NNP", + "pos" : [ 53, 163 ], + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + }, { + "type" : "Token", + "start" : 278, + "end" : 279, + "stanbol.enhancer.nlp.pos" : { + "tag" : ".", + "pos" : 119, + "class" : "org.apache.stanbol.enhancer.nlp.pos.PosTag" + } + } ] +} \ No newline at end of file
