Author: rwesten
Date: Tue Nov  6 14:56:09 2012
New Revision: 1406161

URL: http://svn.apache.org/viewvc?rev=1406161&view=rev
Log:
STANBOL-733: Added ORDERING service properties for the NLP processing steps; 
Added NLP specific property for the NLP processing role of an NLP processing 
EnhancementEngine.

Added:
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
Modified:
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
    
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java

Added: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java?rev=1406161&view=auto
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
 (added)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpProcessingRole.java
 Tue Nov  6 14:56:09 2012
@@ -0,0 +1,19 @@
+package org.apache.stanbol.enhancer.nlp;
+
+/**
+ * Defines NLP processing Roles engines can take. The idea is to use those 
roles
+ * to ease the configuration or NLP enhancement chains. Basically users would
+ * just configure what NLP features the want to use and the NLP chain would
+ * choose the fitting Engines based on their "service.ranking" values.
+ *
+ */
+public enum NlpProcessingRole {
+
+    LanguageDetection,
+    SentenceDetection,
+    Tokenizing,
+    PartOfSpeachTagging,
+    Chunking, 
+    SentimentTagging, 
+    Lemmatize
+}

Added: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java?rev=1406161&view=auto
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
 (added)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpServiceProperties.java
 Tue Nov  6 14:56:09 2012
@@ -0,0 +1,13 @@
+package org.apache.stanbol.enhancer.nlp;
+
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+
+public interface NlpServiceProperties extends ServiceProperties{
+    
+    /**
+     * Property Key used by NLP engines to provide their {@link 
NlpProcessingRole}
+     */
+    String ENHANCEMENT_ENGINE_NLP_ROLE = 
"org.apache.stanbol.enhancer.engine.nlp.role";
+
+    
+}

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
 Tue Nov  6 14:56:09 2012
@@ -93,22 +93,6 @@ public enum LexicalCategory {
      * categories. (http://www.ilc.cnr.it/EAGLES96/annotate/node16.html#mp 
19.09.06)
      */
     Unique,
-//    /**
-//     * A numeral is a word, functioning most typically as an adjective or 
pronoun,   that expresses a number, 
-//     * and relation to the number, such as one of the following: Quantity, 
Sequence, Frequency, Fraction.
-//      * 
(http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsANumeral.htm 
19.09.06)
-//     */
-//    Numeral,
-// not present in OLIA
-//    /**
-//     * Clitic Element covers only one aspect of the original MULTEXT-East 
(and ISOcat) definitions of 
-//     * cliticness, i.e., that an element is a clitic
-//     */
-//    Clitic,
-//    /**
-//     * Proper nouns (also called proper names) are the names of unique 
entities. (http://en.wikipedia.org/wiki/Noun 19.09.06)
-//     */
-//    ProperNoun,
     ;
     static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";;
 

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java
 Tue Nov  6 14:56:09 2012
@@ -3,10 +3,13 @@ package org.apache.stanbol.enhancer.nlp.
 import static java.util.Collections.singleton;
 
 import java.io.IOException;
+import java.util.Dictionary;
 import java.util.Map;
 import java.util.Map.Entry;
 
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
@@ -14,6 +17,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.slf4j.Logger;
@@ -203,5 +207,30 @@ public final class NlpEngineHelper {
             return null;
         }
     }
-    
+    /**
+     * Parsed the {@link NlpProcessingRole} typically provided by the 
+     * {@link ServiceProperties#getServiceProperties()} provided by some
+     * EnhancementEngines.<p>
+     * This supports both {@link NlpProcessingRole} as well as String values
+     * using the {@link NlpProcessingRole#name()}.
+     * @param properties the properties (typically retrieved from the
+     * {@link ServiceProperties#getServiceProperties()} method)
+     * @return the NLP processing role or <code>null</code> if not present OR
+     * an error while parsing.
+     */
+    public static NlpProcessingRole getNlpProcessingRole(Map<String,Object> 
properties){
+        Object value = 
properties.get(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE);
+        if(value instanceof NlpProcessingRole){
+            return (NlpProcessingRole)value;
+        } else if(value != null){
+            try {
+                return NlpProcessingRole.valueOf(value.toString());
+            } catch (IllegalArgumentException e) {
+                log.warn("Unknown NLP processing role {} -> return 
null",value);
+                return null;
+            }
+        } else {
+            return null;
+        }
+    }
 }

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
 Tue Nov  6 14:56:09 2012
@@ -49,7 +49,15 @@ public interface ServiceProperties {
     /**
      * Ordering values < {@link ServiceProperties#ORDERING_PRE_PROCESSING} and
      * >= this value indicate, that an enhancement engine performs operations
-     * that are only dependent on the parsed content.
+     * that are only dependent on the parsed content.<p>
+     * <b>NOTE:</b> the NLP processing specific orderings that are defined
+     * within this span
+     * @see #ORDERING_NLP_LANGAUGE_DETECTION
+     * @see #ORDERING_NLP_SENTENCE_DETECTION
+     * @see #ORDERING_NLP_TOKENIZING
+     * @see #ORDERING_NLP_POS
+     * @see #ORDERING_NLP_CHUNK
+     * @See #ORDERING_NLP_LEMMATIZE
      */
     Integer ORDERING_CONTENT_EXTRACTION = 100;
 
@@ -76,5 +84,46 @@ public interface ServiceProperties {
      * operations on existing enhancements.
      */
     Integer ORDERING_POST_PROCESSING = -100;
-
+    
+    /* -------
+     * NLP processing orderings (all within the ORDERING_CONTENT_EXTRACTION 
range
+     * -------
+     */
+    /**
+     * Ordering values < {@link #ORDERING_PRE_PROCESSING} and >=
+     * {@link #ORDERING_NLP_LANGAUGE_DETECTION} are reserved for engines that 
detect
+     * the language of an content
+     */
+    Integer ORDERING_NLP_LANGAUGE_DETECTION = 
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 90;
+    /**
+     * Ordering values < {@link #ORDERING_NLP_LANGAUGE_DETECTION} and >=
+     * {@link #ORDERING_NLP_SENTENCE_DETECTION} are reserved for engines that 
extract
+     * sections within the text content
+     */
+    Integer ORDERING_NLP_SENTENCE_DETECTION = 
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 80;
+    /**
+     * Ordering values < {@link #ORDERING_NLP_SENTENCE_DETECTION} and >=
+     * {@link #ORDERING_NLP_TOKENIZING} are reserved for engines that tokenize
+     * the text
+     */
+    Integer ORDERING_NLP_TOKENIZING = 
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 70;
+    /**
+     * Ordering values < {@link #ORDERING_NLP_TOKENIZING} and >=
+     * {@link #ORDERING_NLP_POS} are reserved for engines that perform
+     * POS (Part of Speech) tagging
+     */
+    Integer ORDERING_NLP_POS = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 
60;
+    /**
+     * Ordering values < {@link #ORDERING_NLP_POS} and >=
+     * {@link #ORDERING_NLP_CHUNK} are reserved for engines that annotate
+     * Chunks (such as Noun Phrases) in an text.
+     */
+    Integer ORDERING_NLP_CHUNK = ServiceProperties.ORDERING_CONTENT_EXTRACTION 
+ 50;
+    /**
+     * Ordering values < {@link #ORDERING_NLP_CHUNK} and >=
+     * {@link #ORDERING_NLP_LEMMATIZE} are reserved for engines that lemmatize
+     * texts.<p>
+     * TODO: maybe this should be the same as {@link #ORDERING_NLP_TOKENIZING}
+     */
+    Integer ORDERING_NLP_LEMMATIZE = 
ServiceProperties.ORDERING_CONTENT_EXTRACTION + 40;
 }

Modified: 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java?rev=1406161&r1=1406160&r2=1406161&view=diff
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
 (original)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/impl/AbstractEnhancementEngine.java
 Tue Nov  6 14:56:09 2012
@@ -93,4 +93,9 @@ public abstract class AbstractEnhancemen
     public final String getName(){
         return name;
     }
+    
+    @Override
+    public String toString() {
+        return String.format("%s(name=%s)", getClass().getSimpleName(),name);
+    }
 }


Reply via email to