Author: rwesten Date: Wed Sep 19 13:43:19 2012 New Revision: 1387596 URL: http://svn.apache.org/viewvc?rev=1387596&view=rev Log: STANBOL-733: all Engines now support a default instance; added a nlp-chain configuration to the launcher; LanguageConfiguration utility now supports parameter (currently used to optionally parse the name of the OpenNLP model)
Added: incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config Modified: incubator/stanbol/branches/stanbol-nlp-processing/data/sentiment/sentiwordnet/src/main/resources/org/apache/stanbol/data/sentiment/sentiwordnet/ (props changed) incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java Propchange: incubator/stanbol/branches/stanbol-nlp-processing/data/sentiment/sentiwordnet/src/main/resources/org/apache/stanbol/data/sentiment/sentiwordnet/ ------------------------------------------------------------------------------ --- svn:ignore (added) +++ svn:ignore Wed Sep 19 13:43:19 2012 @@ -0,0 +1,3 @@ +LICENSE.SentiWordNet + +SentiWordNet_3.0.0_20120206.txt Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java Wed Sep 19 13:43:19 2012 @@ -43,6 +43,7 @@ import org.apache.stanbol.enhancer.servi import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; +import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; @@ -58,7 +59,8 @@ import org.slf4j.LoggerFactory; public class Nlp2RdfMetadataEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> { private final Logger log = LoggerFactory.getLogger(Nlp2RdfMetadataEngine.class); - + //TODO: replace this with a reald ontology + private final static UriRef SENTIMENT_PROPERTY = new UriRef(NamespaceEnum.fise+"sentiment-value"); private final LiteralFactory lf = LiteralFactory.getInstance(); /** @@ -159,10 +161,16 @@ public class Nlp2RdfMetadataEngine exten writePos(metadata, span, current); writePhrase(metadata, span, current); //OlIA does not include Sentiments -// Value<SentimentTag> sentiment = span.getAnnotation(NlpAnnotations.sentimentAnnotation); -// if(sentiment != null){ -// -// } + + Value<SentimentTag> sentiment = span.getAnnotation(NlpAnnotations.sentimentAnnotation); + if(sentiment != null){ + double sentimentVal = sentiment.probability(); + if(sentiment.value().isNegative()) { + sentimentVal = sentimentVal * -1; + } + metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY, + lf.createTypedLiteral(sentimentVal))); + } } } finally { ci.getLock().writeLock().unlock(); Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java Wed Sep 19 13:43:19 2012 @@ -60,12 +60,14 @@ import org.apache.stanbol.enhancer.nlp.m import org.apache.stanbol.enhancer.nlp.model.annotation.Value; import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag; import org.apache.stanbol.enhancer.nlp.pos.PosTag; +import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration; import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper; import org.apache.stanbol.enhancer.servicesapi.ContentItem; import org.apache.stanbol.enhancer.servicesapi.EngineException; import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; +import org.osgi.framework.Constants; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; @@ -81,13 +83,19 @@ import org.slf4j.LoggerFactory; * The noun phrase detector requires a {@link org.apache.stanbol.enhancer.engines.opennlp.pos.model.POSContentPart} to * be present in the content item and will extend each {@link org.apache.stanbol.enhancer.engines.opennlp.pos.model.POSSentence} * with an array of chunks. - * <p/> - * Author: Sebastian Schaffert + * + * @author Sebastian Schaffert */ -@Component(immediate = true, metatype = true, configurationFactory = true, policy = ConfigurationPolicy.REQUIRE) +@Component(immediate = true, metatype = true, + configurationFactory = true, //allow multiple instances to be configured + policy = ConfigurationPolicy.OPTIONAL) //create the default instance with the default config @Service @Properties(value={ - @Property(name= EnhancementEngine.PROPERTY_NAME,value="chunker") + @Property(name=EnhancementEngine.PROPERTY_NAME,value="opennlp-chunker"), + @Property(name=ChunkingEngine.CONFIG_LANGUAGES, + value = {"de;model=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"}), + @Property(name=ChunkingEngine.MIN_CHUNK_SCORE), + @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the default instance a ranking < 0 }) public class ChunkingEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> { @@ -95,19 +103,18 @@ public class ChunkingEngine extends Abst * Language configuration. Takes a list of ISO language codes of supported languages. Currently supported * are the languages given as default value. */ - @Property(value = {"de;OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"}) public static final String CONFIG_LANGUAGES = "org.apache.stanbol.enhancer.chunker.languages"; - @Property public static final String MIN_CHUNK_SCORE = "org.apache.stanbol.enhancer.chunker.minScore"; public static final String[] AVAILABLE_LANGUAGES = new String[] {"en","de"}; + private static final String MODEL_PARAM_NAME = "model"; private static Logger log = LoggerFactory.getLogger(ChunkingEngine.class); - - private Double minChunkScore; + private LanguageConfiguration languageConfiguration = new LanguageConfiguration(CONFIG_LANGUAGES, + new String []{"de;"+MODEL_PARAM_NAME+"=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"}); @Reference private OpenNLP openNLP; @@ -117,23 +124,7 @@ public class ChunkingEngine extends Abst */ private PhraseTagSetRegistry tagSetRegistry = PhraseTagSetRegistry.getInstance(); - /** - * Holds as key explicitly enabled languages and as value the name of the - * OpenNLP model used for Chunking. If the value is <code>null</code> this - * indicates that the default model ( - * provided by {@link OpenNLP#getChunkerModel(String)}) will be used.<p> - * NOTE: a configured language does not automatically mean that also the - * requested model is available. - */ - private Map<String,String> configuredLanguages; - /** - * Languages that are explicitly excluded - */ - private Set<String> excludedLanguages; - /** - * if '*' is used as language configuration - */ - private boolean allowAll; + private Double minChunkScore; /** @@ -378,62 +369,12 @@ public class ChunkingEngine extends Abst } //read the language configuration - configuredLanguages = new HashMap<String,String>(); - excludedLanguages = new HashSet<String>(); - allowAll = false; - if(properties.get(CONFIG_LANGUAGES) != null) { - String[] languages = (String[])properties.get(CONFIG_LANGUAGES); - - for(String lang : languages) { - String modelName; - int seperatorIndex = lang.indexOf(';'); - if(seperatorIndex >= 0){ - if(seperatorIndex <lang.length()-2){ - modelName = lang.substring(seperatorIndex+1).trim(); - } else { - modelName = null; - } - lang = lang.substring(0, seperatorIndex).trim(); - } else { - modelName = null; - } - if(lang.charAt(0) == '!'){ //exclude - lang = lang.substring(1); - if(configuredLanguages.containsKey(lang)){ - throw new ConfigurationException(CONFIG_LANGUAGES, - "Langauge '"+lang+"' is both included and excluded (config: " - + Arrays.toString(languages)+""); - } - excludedLanguages.add(lang); - if(modelName != null){ - log.warn("Parsed model names are ignored for excluded languages " - + "(langauge: {}, modelName: {})!", lang,modelName); - } - } else if("*".equals(lang)){ - allowAll = true; - if(modelName != null){ - log.warn("A parsed model name is ignored for the wildcard " - + "langauge (modelName: {})!", lang,modelName); - } - } else if(!lang.isEmpty()){ - if(excludedLanguages.contains(lang)){ - throw new ConfigurationException(CONFIG_LANGUAGES, - "Langauge '"+lang+"' is both included and excluded (config: " - + Arrays.toString(languages)+""); - } - configuredLanguages.put(lang,modelName); - } - } - } else { - allowAll = true; - } + languageConfiguration.setConfiguration(properties); } @Deactivate protected void deactivate(ComponentContext context){ - this.allowAll = false; - this.configuredLanguages = null; - this.excludedLanguages = null; + this.languageConfiguration.setDefault(); this.minChunkScore = null; super.deactivate(context); } @@ -450,9 +391,7 @@ public class ChunkingEngine extends Abst * language is not configured as beeing processed. */ boolean isLangaugeConfigured(String language, boolean exception){ - boolean state = allowAll ? - (!excludedLanguages.contains(language)) : - configuredLanguages.containsKey(language); + boolean state = languageConfiguration.isLanguage(language); if(!state && exception){ throw new IllegalStateException("Language "+language+" is not included " + "by the LanguageConfiguration of this engine (name "+ getName() @@ -465,7 +404,7 @@ public class ChunkingEngine extends Abst private ChunkerME initChunker(String language) { isLangaugeConfigured(language, true); //check if the parsed language is ok - String modelName = configuredLanguages.get(language); + String modelName = languageConfiguration.getParameter(language, MODEL_PARAM_NAME); ChunkerModel model; try { if(modelName == null){ // the default model Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java Wed Sep 19 13:43:19 2012 @@ -76,6 +76,7 @@ import org.apache.stanbol.enhancer.servi import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; +import org.osgi.framework.Constants; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; @@ -85,14 +86,18 @@ import org.slf4j.LoggerFactory; * A german language POS tagger. Requires that the content item has a text/plain part and a * language id of "de". Adds a POSContentPart to the content item that can be used for further * processing by other modules. - * <p/> - * Author: Sebastian Schaffert + * + * @author Sebastian Schaffert */ -@Component(immediate = true, metatype = true, configurationFactory = true, policy = ConfigurationPolicy.REQUIRE) +@Component(immediate = true, metatype = true, + configurationFactory = true, //allow multiple instances + policy = ConfigurationPolicy.OPTIONAL) //create a default instance with the default configuration @Service @Properties(value={ - @Property(name= EnhancementEngine.PROPERTY_NAME,value="pos-tagger") + @Property(name= EnhancementEngine.PROPERTY_NAME,value="opennlp-pos"), + @Property(name=POSTaggingEngine.CONFIG_LANGUAGES, value = {"*"},cardinality=Integer.MAX_VALUE), + @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the default instance a ranking < 0 }) public class POSTaggingEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> { @@ -101,17 +106,18 @@ public class POSTaggingEngine extends Ab * Language configuration. Takes a list of ISO language codes of supported languages. Currently supported * are the languages given as default value. */ - @Property(value = {"en","de","da","es","sv","pt","nl"}) public static final String CONFIG_LANGUAGES = "org.apache.stanbol.enhancer.pos.languages"; - - public static final String[] AVAILABLE_LANGUAGES = new String[] {"en","de","da","es","sv","pt","nl"}; + /** + * The parameter name used to configure the name of the OpenNLP model used for pos tagging + */ + private static final String MODEL_NAME_PARAM = "model"; private static Logger log = LoggerFactory.getLogger(POSTaggingEngine.class); //Langauge configuration - private LanguageConfiguration languageConfig = new LanguageConfiguration(CONFIG_LANGUAGES, AVAILABLE_LANGUAGES); + private LanguageConfiguration languageConfig = new LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"}); // private Set<String> configuredLanguages; // private Set<String> excludedLanguages; // private boolean allowAll; @@ -391,8 +397,14 @@ public class POSTaggingEngine extends Ab return null; } private POSTagger getPOSTagger(String language) { + String modelName = languageConfig.getParameter(language,MODEL_NAME_PARAM); try { - POSModel model = openNLP.getPartOfSpeachModel(language); + POSModel model; + if(modelName == null){ //use the default + model = openNLP.getPartOfSpeachModel(language); + } else { + model = openNLP.getModel(POSModel.class, modelName, null); + } if(model != null) { log.debug("POS Tagger Model {} for lanugage '{}' version: {}", new Object[]{model.getClass().getSimpleName(), Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java Wed Sep 19 13:43:19 2012 @@ -110,7 +110,10 @@ public class SentiWSComponent { } //all resources available ... start the service if(loadedSentiWsFiles.equals(sentiWsFileNames)){ + log.info("register Sentiment Classifier for SentiWs (german)"); registerService(); + } else { + log.info("loaded {} (required: {})",loadedSentiWsFiles,sentiWsFileNames); } //remove registration return true; @@ -142,7 +145,7 @@ public class SentiWSComponent { Dictionary<String,Object> serviceProperties = new Hashtable<String,Object>(); serviceProperties.put("language", "de"); //set the language BundleContext bc = bundleContext; - if(bc != null){ + if(bc != null && sentiWsClassifierService == null){ sentiWsClassifierService = bc.registerService( SentimentClassifier.class.getName(), sentiWsClassifier, serviceProperties); Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java Wed Sep 19 13:43:19 2012 @@ -54,6 +54,7 @@ import org.apache.stanbol.enhancer.servi import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper; import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine; +import org.osgi.framework.Constants; import org.osgi.service.cm.ConfigurationException; import org.osgi.service.component.ComponentContext; import org.slf4j.Logger; @@ -87,25 +88,30 @@ import java.util.Set; * <p/> * Author: Sebastian Schaffert */ -@Component(immediate = true, metatype = true, configurationFactory = true, policy = ConfigurationPolicy.REQUIRE) +@Component(immediate = true, metatype = true, + configurationFactory = true, //allow multiple instances + policy = ConfigurationPolicy.OPTIONAL) //create a default instance with the default configuration @Service @Properties(value={ - @Property(name= EnhancementEngine.PROPERTY_NAME,value="sentiment") + @Property(name= EnhancementEngine.PROPERTY_NAME,value="sentiment-wordclassifier"), + @Property(name=SentimentEngine.CONFIG_LANGUAGES,value={SentimentEngine.DEFAULT_LANGUAGE_CONFIG}), + @Property(name=SentimentEngine.CONFIG_ADJECTIVES, + boolValue=SentimentEngine.DEFAULT_PROCESS_ADJECTIVES_ONLY), + @Property(name=SentimentEngine.CONFIG_MIN_POS_CONFIDENCE, + doubleValue = SentimentEngine.DEFAULT_MIN_POS_CONFIDNECE), + @Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the default instance a ranking < 0 }) - public class SentimentEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> { /** * Language configuration. Takes a list of ISO language codes of supported languages. Currently supported * are the languages given as default value. */ - @Property(value={SentimentEngine.DEFAULT_LANGUAGE_CONFIG}) public static final String CONFIG_LANGUAGES = "org.apache.stanbol.enhancer.sentiment.languages"; /** * When set to true, only adjectives and nouns will be considered in sentiment analysis. */ - @Property(boolValue = SentimentEngine.DEFAULT_PROCESS_ADJECTIVES_ONLY ) public static final String CONFIG_ADJECTIVES = "org.apache.stanbol.enhancer.sentiment.adjectives"; /** * POS tags that are not selected by {@link SentimentClassifier#isAdjective(PosTag)} @@ -114,11 +120,8 @@ public class SentimentEngine extends Ab * that Words that do have a suitable TAG are still considered if the * confidence of the fitting tag is >= {min-pos-confidence}/2 */ - @Property(doubleValue = SentimentEngine.DEFAULT_MIN_POS_CONFIDNECE) public static final String CONFIG_MIN_POS_CONFIDENCE = "org.apache.stanbol.enhancer.sentiment.min-pos-confidence"; - @Property(boolValue=true) - public static final String DEBUG_SENTIMENTS = "debug"; boolean debugSentiments; public static final String DEFAULT_LANGUAGE_CONFIG = "*"; @@ -131,9 +134,9 @@ public class SentimentEngine extends Ab * {@link LexicalCategory#Noun Noun} if {@link #CONFIG_ADJECTIVES} is * deactivated) - default: 0.8<p> */ - private static final double DEFAULT_MIN_POS_CONFIDNECE = 0.8; + public static final double DEFAULT_MIN_POS_CONFIDNECE = 0.8; - private static final boolean DEFAULT_PROCESS_ADJECTIVES_ONLY = false; + public static final boolean DEFAULT_PROCESS_ADJECTIVES_ONLY = false; private static Logger log = LoggerFactory.getLogger(SentimentEngine.class); @@ -156,10 +159,12 @@ public class SentimentEngine extends Ab protected void bindClassifier(SentimentClassifier classifier){ log.info(" ... bind Sentiment Classifier {} for language {}", classifier.getClass().getSimpleName(),classifier.getLanguage()); - SentimentClassifier old = classifiers.put(classifier.getLanguage(), classifier); - if(old != null){ - log.warn("Replaced Sentiment Classifier for language {} (old: {}, new: {}", - new Object[]{old.getLanguage(),old,classifier}); + synchronized (classifiers) { + SentimentClassifier old = classifiers.put(classifier.getLanguage(), classifier); + if(old != null){ + log.warn("Replaced Sentiment Classifier for language {} (old: {}, new: {}", + new Object[]{old.getLanguage(),old,classifier}); + } } } /** unbind method for {@link #classifiers} */ @@ -285,41 +290,6 @@ public class SentimentEngine extends Ab // } finally { // ci.getLock().writeLock().unlock(); // } -// if(debugSentiments){ -// Iterator<Sentence> sentences = analysedText.getSentences(); -// if(sentences.hasNext()){ -// while(sentences.hasNext()){ -// Sentence sent = sentences.next(); -// log.info("Sentence: {}", sent.getSpan()); -// tokens = sent.getTokens(); -// double positive = 0.0; -// double negaitve = 0.0; -// while (tokens.hasNext()){ -// Token token = tokens.next(); -// Value<SentimentTag> sentiment = token.getAnnotation(NlpAnnotations.sentimentAnnotation); -// if(sentiment != null){ -// if(sentiment.value().isPositive()){ -// positive = positive+sentiment.probability(); -// } else { -// negaitve = negaitve+sentiment.probability(); -// } -// Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POSAnnotation); -// log.info(" - {} '{}'[{}] - value: {}", -// new Object []{ -// sentiment.value().isPositive()?"positive":"negative", -// token.getSpan(), -// posTag != null ? posTag.value(): "POS unknown", -// sentiment.probability() -// }); -// } -// } -// log.info(" > positive: {} | negative: {} | sum: {}", -// new Object []{positive, negaitve, (positive - negaitve)}); -// } -// } else { -// -// } -// } } @@ -365,12 +335,6 @@ public class SentimentEngine extends Ab "The configured minimum POS confidence value '" +minPOSConfidence+"' MUST BE > 0 and < 1!"); } - - //TODO: just for testing - value = properties.get(DEBUG_SENTIMENTS); - debugSentiments = value instanceof Boolean ? (Boolean)value : - value != null ? Boolean.parseBoolean(value.toString()) : - false; } @Deactivate Modified: incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java?rev=1387596&r1=1387595&r2=1387596&view=diff ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java (original) +++ incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java Wed Sep 19 13:43:19 2012 @@ -4,7 +4,9 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Dictionary; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Set; import org.osgi.service.cm.ConfigurationException; @@ -27,10 +29,12 @@ import org.osgi.service.cm.Configuration */ public class LanguageConfiguration { + private static final Map<String,String> EMPTY_PARAMS = Collections.emptyMap(); + private final String property; private final Collection<String> defaultConfig; //Langauge configuration - private Set<String> configuredLanguages = new HashSet<String>(); + private Map<String,Map<String,String>> configuredLanguages = new HashMap<String,Map<String,String>>(); private Set<String> excludedLanguages = new HashSet<String>(); private boolean allowAll; @@ -88,10 +92,13 @@ public class LanguageConfiguration { if(value == null){ continue; //ignore null values } - String lang = value.toString().trim().toLowerCase(); + String line = value.toString().trim(); + int sepIndex = line.indexOf(';'); + String lang = sepIndex < 0 ? line : line.substring(0, sepIndex).trim(); + lang = lang.toLowerCase(); if(lang.charAt(0) == '!'){ //exclude lang = lang.substring(1); - if(configuredLanguages.contains(lang)){ + if(configuredLanguages.containsKey(lang)){ throw new ConfigurationException(property, "Langauge '"+lang+"' is both included and excluded (config: " + config+")"); @@ -105,10 +112,47 @@ public class LanguageConfiguration { "Langauge '"+lang+"' is both included and excluded (config: " + config+")"); } - configuredLanguages.add(lang); + if(sepIndex >= 0){ + + } + configuredLanguages.put(lang,sepIndex >= 0 && sepIndex < line.length()-2 ? + parseParameters(line.substring(sepIndex, line.length()).trim()) : + EMPTY_PARAMS); + } + } + } + /** + * Parses optional parameters <code>{key}[={value}];{key2}[={value2}]</code>. Using + * the same key multiple times will override the previouse value + * @param paramString + * @return + * @throws ConfigurationException + */ + private Map<String,String> parseParameters(String paramString) throws ConfigurationException { + Map<String,String> params = new HashMap<String,String>(); + for(String param : paramString.split(";")){ + param = param.trim(); + int equalsPos = param.indexOf('='); + if(equalsPos == 0){ + throw new ConfigurationException(property, + "Parameter '"+param+"' has empty key!"); + } + String key = equalsPos > 0 ? param.substring(0, equalsPos).trim() : param; + String value; + if(equalsPos > 0){ + if(equalsPos < param.length()-2) { + value = param.substring(equalsPos+1).trim(); + } else { + value = ""; + } + } else { + value = null; } + params.put(key, value); } + return params.isEmpty() ? EMPTY_PARAMS : Collections.unmodifiableMap(params); } + /** * Checks if the parsed language is included in the configuration * @param language the language @@ -117,9 +161,19 @@ public class LanguageConfiguration { public boolean isLanguage(String language){ return allowAll ? (!excludedLanguages.contains(language)) : - configuredLanguages.contains(language); + configuredLanguages.containsKey(language); } /** + * Returns parsed parameters if <code>{@link #isLanguage(String)} == true</code> + * @param language the language + * @return the parameters or <code>null</code> if none or the parsed language + * is not active. + */ + public Map<String,String> getParameters(String language){ + return isLanguage(language) ? configuredLanguages.get(language) : null; + } + + /** * Resets the configuration to the default (as parsed in the constructor) */ public void setDefault() { @@ -130,6 +184,18 @@ public class LanguageConfiguration { // within the constructor } } + /** + * Returns the value of the parameter for the language (if present and the + * langage is active) + * @param language the language + * @param paramName the name of the param + * @return the param or <code>null</code> if not present OR the language + * is not active. + */ + public String getParameter(String language, String paramName) { + Map<String,String> params = getParameters(language); + return params == null ? null : params.get(paramName); + } } Added: incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config?rev=1387596&view=auto ============================================================================== --- incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config (added) +++ incubator/stanbol/branches/stanbol-nlp-processing/nlp-launcher/src/main/resources/resources/config/org.apache.stanbol.enhancer.chain.list.impl.ListChain-nlpchain.config Wed Sep 19 13:43:19 2012 @@ -0,0 +1,2 @@ +stanbol.enhancer.chain.name="nlp-processing" +stanbol.enhancer.chain.list.enginelist=["langdetect","opennlp-pos","opennlp-chunker","sentiment-wordclassifier","nlp2rdf"]