Author: rwesten
Date: Tue Nov 6 15:03:46 2012
New Revision: 1406170
URL: http://svn.apache.org/viewvc?rev=1406170&view=rev
Log:
STANBOL-733: Added support for ServiceProperties to all NLP engines;
STANBOL-736: added OpenNlp to the name of the ChunkerEngine; STANBOL-735: added
OpenNlp to the name of the PosTaggingEngine
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/OpenNlpChunkingEngine.java
- copied, changed from r1403231,
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/OpenNlpPosTaggingEngine.java
- copied, changed from r1403228,
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
Removed:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliAnalyzedTextLemmatizerEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/resources/OSGI-INF/metatype/metatype.properties
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/resources/OSGI-INF/metatype/metatype.properties
stanbol/branches/stanbol-nlp-processing/enhancer/engines/pom.xml
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliAnalyzedTextLemmatizerEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliAnalyzedTextLemmatizerEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliAnalyzedTextLemmatizerEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliAnalyzedTextLemmatizerEngine.java
Tue Nov 6 15:03:46 2012
@@ -10,6 +10,7 @@ import java.util.Collections;
import java.util.Dictionary;
import java.util.EnumMap;
import java.util.EnumSet;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -28,6 +29,8 @@ import org.apache.stanbol.enhancer.engin
import org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures;
import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
@@ -51,6 +54,15 @@ import org.osgi.service.component.Compon
@Property(name = CeliConstants.CELI_TEST_ACCOUNT, boolValue = false) })
public class CeliAnalyzedTextLemmatizerEngine extends
AbstractEnhancementEngine<IOException, RuntimeException> implements
EnhancementEngine, ServiceProperties {
+ private static final Map<String,Object> SERVICE_PROPERTIES;
+ static {
+ Map<String,Object> props = new HashMap<String,Object>();
+ props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ServiceProperties.ORDERING_NLP_LEMMATIZE);
+ props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
+ NlpProcessingRole.Lemmatize);
+ SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+ }
/**
* This ensures that no connections to external services are made if
Stanbol is started in offline mode as the OnlineMode service will only be
available if OfflineMode is deactivated.
@@ -65,12 +77,6 @@ public class CeliAnalyzedTextLemmatizerE
@Property(value={"it", "da", "de", "ru","ro"})
public static final String PROPERTY_SUPPORTED_LANGUAGES =
"org.apache.stanbol.enhancer.engines.celi.lemmatizer.languages";
- /**
- * The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
- */
- public static final Integer defaultOrder =
ServiceProperties.ORDERING_CONTENT_EXTRACTION;
-
private LanguageConfiguration languageConfig = new LanguageConfiguration(
PROPERTY_SUPPORTED_LANGUAGES, new String[]{"it", "da", "de",
"ru","ro"});
@@ -182,7 +188,7 @@ public class CeliAnalyzedTextLemmatizerE
@Override
public Map<String, Object> getServiceProperties() {
- return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
(Object) defaultOrder));
+ return SERVICE_PROPERTIES;
}
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
Tue Nov 6 15:03:46 2012
@@ -87,14 +87,14 @@ public class LanguageDetectionEnhancemen
public static final String MAX_SUGGESTED_PROP =
"org.apache.stanbol.enhancer.engines.langdetect.max-suggested";
/**
- * The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+ * The default value for the Execution of this Engine (
+ * {@link ServiceProperties#ORDERING_NLP_LANGAUGE_DETECTION})<p>
* NOTE: this information is used by the default and weighed {@link Chain}
* implementation to determine the processing order of
* {@link EnhancementEngine}s. Other {@link Chain} implementation do not
* use this information.
*/
- public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
+ public static final Integer defaultOrder =
ServiceProperties.ORDERING_NLP_LANGAUGE_DETECTION;
/**
* This contains the only MIME type directly supported by this enhancement
engine.
@@ -266,7 +266,7 @@ public class LanguageDetectionEnhancemen
}
public Map<String, Object> getServiceProperties() {
- return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
(Object) defaultOrder));
+ return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object)
defaultOrder);
}
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
Tue Nov 6 15:03:46 2012
@@ -75,14 +75,14 @@ public class LangIdEnhancementEngine
/**
- * The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+ * The default value for the Execution of this Engine (
+ * {@link ServiceProperties#ORDERING_NLP_LANGAUGE_DETECTION})<p>
* NOTE: this information is used by the default and weighed {@link Chain}
* implementation to determine the processing order of
* {@link EnhancementEngine}s. Other {@link Chain} implementation do not
* use this information.
*/
- public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
+ public static final Integer defaultOrder =
ServiceProperties.ORDERING_NLP_LANGAUGE_DETECTION;
/**
* This contains the only MIME type directly supported by this enhancement
engine.
@@ -184,7 +184,7 @@ public class LangIdEnhancementEngine
}
public Map<String, Object> getServiceProperties() {
- return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
(Object) defaultOrder));
+ return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object)
defaultOrder);
}
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/src/main/java/org/apache/stanbol/enhancer/engines/nlp2rdf/engine/Nlp2RdfMetadataEngine.java
Tue Nov 6 15:03:46 2012
@@ -6,9 +6,11 @@ import static org.apache.stanbol.enhance
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText;
import java.io.IOException;
+import java.util.Collections;
import java.util.Dictionary;
import java.util.EnumSet;
import java.util.Iterator;
+import java.util.Map;
import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.LiteralFactory;
@@ -40,6 +42,7 @@ import org.apache.stanbol.enhancer.nlp.u
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
@@ -55,7 +58,7 @@ import org.slf4j.LoggerFactory;
@Properties(value={
@Property(name= EnhancementEngine.PROPERTY_NAME,value="nlp2rdf")
})
-public class Nlp2RdfMetadataEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
+public class Nlp2RdfMetadataEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties{
private final Logger log =
LoggerFactory.getLogger(Nlp2RdfMetadataEngine.class);
//TODO: replace this with a reald ontology
@@ -172,6 +175,12 @@ public class Nlp2RdfMetadataEngine exten
}
}
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return
Collections.singletonMap(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ (Object)ServiceProperties.ORDERING_POST_PROCESSING);
+ }
+
Copied:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/OpenNlpChunkingEngine.java
(from r1403231,
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java)
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/OpenNlpChunkingEngine.java?p2=stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/OpenNlpChunkingEngine.java&p1=stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java&r1=1403231&r2=1406170&rev=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/ChunkingEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/services/OpenNlpChunkingEngine.java
Tue Nov 6 15:03:46 2012
@@ -50,6 +50,8 @@ import org.apache.felix.scr.annotations.
import org.apache.stanbol.commons.opennlp.OpenNLP;
import
org.apache.stanbol.enhancer.engines.opennlp.chunker.model.PhraseTagSetRegistry;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
import org.apache.stanbol.enhancer.nlp.model.Chunk;
@@ -66,6 +68,7 @@ import org.apache.stanbol.enhancer.nlp.u
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.framework.Constants;
@@ -93,13 +96,22 @@ import org.slf4j.LoggerFactory;
@Service
@Properties(value={
@Property(name=EnhancementEngine.PROPERTY_NAME,value="opennlp-chunker"),
- @Property(name=ChunkingEngine.CONFIG_LANGUAGES,
+ @Property(name=OpenNlpChunkingEngine.CONFIG_LANGUAGES,
value =
{"de;model=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"}),
- @Property(name=ChunkingEngine.MIN_CHUNK_SCORE),
+ @Property(name=OpenNlpChunkingEngine.MIN_CHUNK_SCORE),
@Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
-public class ChunkingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
+public class OpenNlpChunkingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
+ private static final Map<String,Object> SERVICE_PROPERTIES;
+ static {
+ Map<String,Object> props = new HashMap<String,Object>();
+ props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ServiceProperties.ORDERING_NLP_CHUNK);
+ props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
+ NlpProcessingRole.Chunking);
+ SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+ }
/**
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
* are the languages given as default value.
@@ -112,7 +124,7 @@ public class ChunkingEngine extends Abst
private static final String MODEL_PARAM_NAME = "model";
- private static Logger log = LoggerFactory.getLogger(ChunkingEngine.class);
+ private static Logger log =
LoggerFactory.getLogger(OpenNlpChunkingEngine.class);
private LanguageConfiguration languageConfiguration = new
LanguageConfiguration(CONFIG_LANGUAGES,
new String
[]{"de;"+MODEL_PARAM_NAME+"=OpenNLP_1.5.1-German-Chunker-TigerCorps07.zip","*"});
@@ -302,7 +314,12 @@ public class ChunkingEngine extends Abst
logChunks(at);
}
}
-
+
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return SERVICE_PROPERTIES;
+ }
+
private void logChunks(AnalysedText at){
Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,
SpanTypeEnum.Chunk));
while(it.hasNext()){
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/resources/OSGI-INF/metatype/metatype.properties
Tue Nov 6 15:03:46 2012
@@ -1,21 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Copyright 2012, FORMCEPT [http://www.formcept.com]
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-org.apache.stanbol.enhancer.engines.opennlp.chunker.services.ChunkingEngine.name=Apache
Stanbol Enhancer Engine: Chunking / Noun Phrase Detection
+org.apache.stanbol.enhancer.engines.opennlp.chunker.services.OpenNlpChunkingEngine.name=Apache
\
+Stanbol Enhancer Engine: OpenNLP Chunking / Noun Phrase Detection
+org.apache.stanbol.enhancer.engines.opennlp.chunker.services.OpenNlpChunkingEngine.description=Enhancement
\
+Engine that detects chunks in parsed text by using the OpenNLP chunker. It
requires proper POS tags \
+to be present in the AnalyzedText contentPart.
-stanbol.enhancer.engine.name.name=Noun Phrase Detector
+stanbol.enhancer.engine.name.name=name
stanbol.enhancer.engine.name.description=The name of the enhancement engine as
\
used in the RESTful interface '/engine/<name>'
service.ranking.name=Ranking
@@ -24,4 +30,8 @@ one with the higher ranking will be used
org.apache.stanbol.enhancer.chunker.languages.name=Language configuration
org.apache.stanbol.enhancer.chunker.languages.description=Takes a list of ISO \
- language codes of supported languages. Currently supported are the languages
given as default value.
+ language codes. '*' is the Wildcard; '!{lang}' to exclude a language; \
+ '{lang};model={chunker-model-file-name}' to configure a custom OpenNLP model
\
+ for a language. Models are loaded via the Stanbol DataFileProvider service. \
+ So users can e.g. put models in the datafiles directory \
+ (defaults to '{stanbol-working-dir}/stanbol/datafiles')
Copied:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/OpenNlpPosTaggingEngine.java
(from r1403228,
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java)
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/OpenNlpPosTaggingEngine.java?p2=stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/OpenNlpPosTaggingEngine.java&p1=stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java&r1=1403228&r2=1406170&rev=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/POSTaggingEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/services/OpenNlpPosTaggingEngine.java
Tue Nov 6 15:03:46 2012
@@ -16,24 +16,18 @@
package org.apache.stanbol.enhancer.engines.opennlp.pos.services;
-import static java.util.Collections.singleton;
import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION;
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
import static
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.initAnalysedText;
-import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.Dictionary;
import java.util.EnumSet;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
@@ -55,7 +49,8 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.enhancer.engines.opennlp.pos.model.PosTagSetRegistry;
-import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
@@ -73,8 +68,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
@@ -96,11 +90,20 @@ import org.slf4j.LoggerFactory;
@Service
@Properties(value={
@Property(name= EnhancementEngine.PROPERTY_NAME,value="opennlp-pos"),
- @Property(name=POSTaggingEngine.CONFIG_LANGUAGES, value =
{"*"},cardinality=Integer.MAX_VALUE),
+ @Property(name=OpenNlpPosTaggingEngine.CONFIG_LANGUAGES, value =
{"*"},cardinality=Integer.MAX_VALUE),
@Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
-public class POSTaggingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
+public class OpenNlpPosTaggingEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
+ private static final Map<String,Object> SERVICE_PROPERTIES;
+ static {
+ Map<String,Object> props = new HashMap<String,Object>();
+ props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ServiceProperties.ORDERING_NLP_POS);
+ props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
+ NlpProcessingRole.PartOfSpeachTagging);
+ SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+ }
/**
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
@@ -114,7 +117,7 @@ public class POSTaggingEngine extends Ab
private static final String MODEL_NAME_PARAM = "model";
- private static Logger log =
LoggerFactory.getLogger(POSTaggingEngine.class);
+ private static Logger log =
LoggerFactory.getLogger(OpenNlpPosTaggingEngine.class);
//Langauge configuration
private LanguageConfiguration languageConfig = new
LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"});
@@ -260,6 +263,12 @@ public class POSTaggingEngine extends Ab
}
}
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return SERVICE_PROPERTIES;
+ }
+
+
private void logAnnotations(AnalysedText at){
Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,
SpanTypeEnum.Token));
while(it.hasNext()){
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/src/main/resources/OSGI-INF/metatype/metatype.properties
Tue Nov 6 15:03:46 2012
@@ -1,23 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# Copyright 2012, FORMCEPT [http://www.formcept.com]
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
-org.apache.stanbol.enhancer.engines.opennlp.pos.services.POSTaggingEngine.name=Apache
Stanbol Enhancer Engine: POS Tagging
+org.apache.stanbol.enhancer.engines.opennlp.pos.services.OpenNlpPosTaggingEngine.name=Apache
Stanbol Enhancer Engine: OpenNLP POS Tagging
+org.apache.stanbol.enhancer.engines.opennlp.pos.services.OpenNlpPosTaggingEngine.description=Enhancement
\
+Engine for POS tagging using OpenNLP. Processed languages and language
specific POS models can be \
+configured. This Engine can use existing Tokens and Sentence annotations. If
none are present, than \
+it tries to use OpenNLP to create those.
-stanbol.enhancer.engine.name.name=POS Tagging Engine
+stanbol.enhancer.engine.name.name=POS Tagging Enginename
stanbol.enhancer.engine.name.description=The name of the enhancement engine as
\
used in the RESTful interface '/engine/<name>'
service.ranking.name=Ranking
@@ -26,4 +32,8 @@ one with the higher ranking will be used
org.apache.stanbol.enhancer.pos.languages.name=Language configuration
org.apache.stanbol.enhancer.pos.languages.description=Takes a list of ISO \
- language codes of supported languages. Currently supported are the languages
given as default value.
+ language codes. '*' is the Wildcard; '!{lang}' to exclude a language; \
+ '{lang};model={pos-model-file-name}' to configure a custom OpenNLP model \
+ for a language. Models are loaded via the Stanbol DataFileProvider service. \
+ So users can e.g. put models in the datafiles directory \
+ (defaults to '{stanbol-working-dir}/stanbol/datafiles')
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/pom.xml?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/pom.xml (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/pom.xml Tue Nov 6
15:03:46 2012
@@ -60,6 +60,8 @@
<module>dbpedia-spotlight</module>
<!-- NLP engines -->
+ <module>opennlp-sentence</module>
+ <module>opennlp-token</module>
<module>opennlp-pos</module>
<module>opennlp-chunker</module>
<module>sentiment-wordclassifier</module>
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
Tue Nov 6 15:03:46 2012
@@ -18,6 +18,8 @@ package org.apache.stanbol.enhancer.engi
import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.PHRASE_ANNOTATION;
import static
org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION;
+import static
org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ENHANCEMENT_ENGINE_ORDERING;
+import static
org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ORDERING_EXTRACTION_ENHANCEMENT;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
@@ -26,9 +28,11 @@ import static org.apache.stanbol.enhance
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.TreeMap;
@@ -59,6 +63,7 @@ import org.apache.stanbol.enhancer.nlp.u
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
@@ -83,7 +88,7 @@ import org.slf4j.LoggerFactory;
@Property(name=
EnhancementEngine.PROPERTY_NAME,value=SentimentSummarizationEngine.DEFAULT_ENGINE_NAME),
@Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the default
instance a ranking < 0
})
-public class SentimentSummarizationEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
+public class SentimentSummarizationEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
private final Logger log = LoggerFactory.getLogger(getClass());
@@ -162,6 +167,10 @@ public class SentimentSummarizationEngin
}
}
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
(Object)ORDERING_EXTRACTION_ENHANCEMENT);
+ }
/**
*
* @param at
@@ -310,6 +319,5 @@ public class SentimentSummarizationEngin
}
}
return content.substring(beginPos, endPos);
- }
-
+ }
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java?rev=1406170&r1=1406169&r2=1406170&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
Tue Nov 6 15:03:46 2012
@@ -39,6 +39,8 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
@@ -49,6 +51,7 @@ import org.apache.stanbol.enhancer.nlp.u
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
@@ -84,7 +87,7 @@ import org.slf4j.LoggerFactory;
doubleValue = SentimentEngine.DEFAULT_MIN_POS_CONFIDNECE),
@Property(name=Constants.SERVICE_RANKING,intValue=-100) //give the
default instance a ranking < 0
})
-public class SentimentEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> {
+public class SentimentEngine extends
AbstractEnhancementEngine<RuntimeException,RuntimeException> implements
ServiceProperties {
/**
* Language configuration. Takes a list of ISO language codes of supported
languages. Currently supported
@@ -121,6 +124,19 @@ public class SentimentEngine extends Ab
public static final boolean DEFAULT_PROCESS_ADJECTIVES_ONLY = false;
+ /**
+ * Service Properties used by this Engine
+ */
+ private static final Map<String,Object> SERVICE_PROPERTIES;
+ static {
+ Map<String,Object> props = new HashMap<String,Object>();
+ props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ServiceProperties.ORDERING_NLP_POS - 1); //after POS tagging
+ props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
+ NlpProcessingRole.SentimentTagging);
+ SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+ }
+
private static Logger log = LoggerFactory.getLogger(SentimentEngine.class);
@@ -324,5 +340,9 @@ public class SentimentEngine extends Ab
langaugeConfig.setDefault();
super.deactivate(ctx);
}
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return SERVICE_PROPERTIES;
+ }
}