Author: rwesten
Date: Tue Nov 6 14:56:09 2012
New Revision: 1406161
URL: http://svn.apache.org/viewvc?rev=1406161&view=rev
Log:
STANBOL-733: Added ORDERING service properties for the NLP processing steps;
Added NLP specific property for the NLP processing role of an NLP processing
EnhancementEngine.
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java?rev=1406161&view=auto
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
(added)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
Tue Nov 6 14:56:09 2012
@@ -0,0 +1,19 @@
+package org.apache.stanbol.enhancer.nlp;
+
+/**
+ * Defines NLP processing Roles engines can take. The idea is to use those
roles
+ * to ease the configuration or NLP enhancement chains. Basically users would
+ * just configure what NLP features the want to use and the NLP chain would
+ * choose the fitting Engines based on their "service.ranking" values.
+ *
+ */
+public enum NlpProcessingRole {
+
+ LanguageDetection,
+ SentenceDetection,
+ Tokenizing,
+ PartOfSpeachTagging,
+ Chunking,
+ SentimentTagging,
+ Lemmatize
+}
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java?rev=1406161&view=auto
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
(added)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
Tue Nov 6 14:56:09 2012
@@ -0,0 +1,13 @@
+package org.apache.stanbol.enhancer.nlp;
+
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+
+public interface NlpServiceProperties extends ServiceProperties{
+
+ /**
+ * Property Key used by NLP engines to provide their {@link
NlpProcessingRole}
+ */
+ String ENHANCEMENT_ENGINE_NLP_ROLE =
"org.apache.stanbol.enhancer.engine.nlp.role";
+
+
+}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
Tue Nov 6 14:56:09 2012
@@ -93,22 +93,6 @@ public enum LexicalCategory {
* categories. (http://www.ilc.cnr.it/EAGLES96/annotate/node16.html#mp
19.09.06)
*/
Unique,
-// /**
-// * A numeral is a word, functioning most typically as an adjective or
pronoun, that expresses a number,
-// * and relation to the number, such as one of the following: Quantity,
Sequence, Frequency, Fraction.
-// *
(http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsANumeral.htm
19.09.06)
-// */
-// Numeral,
-// not present in OLIA
-// /**
-// * Clitic Element covers only one aspect of the original MULTEXT-East
(and ISOcat) definitions of
-// * cliticness, i.e., that an element is a clitic
-// */
-// Clitic,
-// /**
-// * Proper nouns (also called proper names) are the names of unique
entities. (http://en.wikipedia.org/wiki/Noun 19.09.06)
-// */
-// ProperNoun,
;
static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
Tue Nov 6 14:56:09 2012
@@ -3,10 +3,13 @@ package org.apache.stanbol.enhancer.nlp.
import static java.util.Collections.singleton;
import java.io.IOException;
+import java.util.Dictionary;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
@@ -14,6 +17,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.slf4j.Logger;
@@ -203,5 +207,30 @@ public final class NlpEngineHelper {
return null;
}
}
-
+ /**
+ * Parsed the {@link NlpProcessingRole} typically provided by the
+ * {@link ServiceProperties#getServiceProperties()} provided by some
+ * EnhancementEngines.<p>
+ * This supports both {@link NlpProcessingRole} as well as String values
+ * using the {@link NlpProcessingRole#name()}.
+ * @param properties the properties (typically retrieved from the
+ * {@link ServiceProperties#getServiceProperties()} method)
+ * @return the NLP processing role or <code>null</code> if not present OR
+ * an error while parsing.
+ */
+ public static NlpProcessingRole getNlpProcessingRole(Map<String,Object>
properties){
+ Object value =
properties.get(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE);
+ if(value instanceof NlpProcessingRole){
+ return (NlpProcessingRole)value;
+ } else if(value != null){
+ try {
+ return NlpProcessingRole.valueOf(value.toString());
+ } catch (IllegalArgumentException e) {
+ log.warn("Unknown NLP processing role {} -> return
null",value);
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
Tue Nov 6 14:56:09 2012
@@ -49,7 +49,15 @@ public interface ServiceProperties {
/**
* Ordering values < {@link ServiceProperties#ORDERING_PRE_PROCESSING} and
* >= this value indicate, that an enhancement engine performs operations
- * that are only dependent on the parsed content.
+ * that are only dependent on the parsed content.<p>
+ * <b>NOTE:</b> the NLP processing specific orderings that are defined
+ * within this span
+ * @see #ORDERING_NLP_LANGAUGE_DETECTION
+ * @see #ORDERING_NLP_SENTENCE_DETECTION
+ * @see #ORDERING_NLP_TOKENIZING
+ * @see #ORDERING_NLP_POS
+ * @see #ORDERING_NLP_CHUNK
+ * @See #ORDERING_NLP_LEMMATIZE
*/
Integer ORDERING_CONTENT_EXTRACTION = 100;
@@ -76,5 +84,46 @@ public interface ServiceProperties {
* operations on existing enhancements.
*/
Integer ORDERING_POST_PROCESSING = -100;
-
+
+ /* -------
+ * NLP processing orderings (all within the ORDERING_CONTENT_EXTRACTION
range
+ * -------
+ */
+ /**
+ * Ordering values < {@link #ORDERING_PRE_PROCESSING} and >=
+ * {@link #ORDERING_NLP_LANGAUGE_DETECTION} are reserved for engines that
detect
+ * the language of an content
+ */
+ Integer ORDERING_NLP_LANGAUGE_DETECTION =
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 90;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_LANGAUGE_DETECTION} and >=
+ * {@link #ORDERING_NLP_SENTENCE_DETECTION} are reserved for engines that
extract
+ * sections within the text content
+ */
+ Integer ORDERING_NLP_SENTENCE_DETECTION =
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 80;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_SENTENCE_DETECTION} and >=
+ * {@link #ORDERING_NLP_TOKENIZING} are reserved for engines that tokenize
+ * the text
+ */
+ Integer ORDERING_NLP_TOKENIZING =
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 70;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_TOKENIZING} and >=
+ * {@link #ORDERING_NLP_POS} are reserved for engines that perform
+ * POS (Part of Speech) tagging
+ */
+ Integer ORDERING_NLP_POS = ServiceProperties.ORDERING_CONTENT_EXTRACTION +
60;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_POS} and >=
+ * {@link #ORDERING_NLP_CHUNK} are reserved for engines that annotate
+ * Chunks (such as Noun Phrases) in an text.
+ */
+ Integer ORDERING_NLP_CHUNK = ServiceProperties.ORDERING_CONTENT_EXTRACTION
+ 50;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_CHUNK} and >=
+ * {@link #ORDERING_NLP_LEMMATIZE} are reserved for engines that lemmatize
+ * texts.<p>
+ * TODO: maybe this should be the same as {@link #ORDERING_NLP_TOKENIZING}
+ */
+ Integer ORDERING_NLP_LEMMATIZE =
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 40;
}
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
(original)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
Tue Nov 6 14:56:09 2012
@@ -93,4 +93,9 @@ public abstract class AbstractEnhancemen
public final String getName(){
return name;
}
+
+ @Override
+ public String toString() {
+ return String.format("%s(name=%s)", getClass().getSimpleName(),name);
+ }
}