Author: rwesten
Date: Fri Sep 21 14:20:29 2012
New Revision: 1388519

URL: http://svn.apache.org/viewvc?rev=1388519&view=rev
Log:
STANBOL-739: added stanbol.nlp to the dependencies; added a PosTagSetRegistry 
for defining the POS tag sets used by CELI.

Added:
    
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
   (with props)
Modified:
    stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml?rev=1388519&r1=1388518&r2=1388519&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml 
(original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml Fri 
Sep 21 14:20:29 2012
@@ -1,151 +1,155 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
-       license agreements. See the NOTICE file distributed with this work for 
additional 
-       information regarding copyright ownership. The ASF licenses this file 
to 
-       You under the Apache License, Version 2.0 (the "License"); you may not 
use 
-       this file except in compliance with the License. You may obtain a copy 
of 
-       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
-       by applicable law or agreed to in writing, software distributed under 
the 
-       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
-       OF ANY KIND, either express or implied. See the License for the 
specific 
-       language governing permissions and limitations under the License. -->
+  license agreements. See the NOTICE file distributed with this work for 
additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
 <project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
 
-       <modelVersion>4.0.0</modelVersion>
+  <modelVersion>4.0.0</modelVersion>
 
-       <parent>
-               <groupId>org.apache.stanbol</groupId>
-               <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
-               <version>0.10.0-incubating-SNAPSHOT</version>
-               <relativePath>../../parent</relativePath>
-       </parent>
-
-       <groupId>org.apache.stanbol</groupId>
-       <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
-       <packaging>bundle</packaging>
-
-       <name>Apache Stanbol Enhancer Enhancement Engine: CELI  </name>
-       <description></description>
-       <inceptionYear>2012</inceptionYear>
-
-
-       <dependencies>
-               <dependency>
-                       <groupId>org.apache.stanbol</groupId>
-                       
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
-            <version>0.10.0-incubating-SNAPSHOT</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.apache.stanbol</groupId>
-                       
<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
-            <version>0.9.0-incubating</version>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.apache.clerezza</groupId>
-                       <artifactId>rdf.core</artifactId>
-               </dependency>
-
-               <dependency>
-                       <groupId>org.apache.felix</groupId>
-                       
<artifactId>org.apache.felix.scr.annotations</artifactId>
-                       <scope>provided</scope>
-               </dependency>
-
-               <!-- generic tax -->
-        <dependency>
-            <groupId>commons-lang</groupId>
-            <artifactId>commons-lang</artifactId>
-        </dependency> 
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>httpclient-osgi</artifactId>
-               </dependency>
-        <dependency>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-        </dependency>
-        <dependency>
-               <groupId>org.apache.stanbol</groupId>
-               
<artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
-            <version>0.9.0-incubating</version>
-               <scope>provided</scope>
-       </dependency> 
-               
-               <!-- test -->
-        <dependency>
-            <groupId>org.apache.stanbol</groupId>
-            <artifactId>org.apache.stanbol.enhancer.test</artifactId>
-            <version>0.10.0-incubating-SNAPSHOT</version>
-            <scope>test</scope>
-        </dependency>
-               <dependency>
-            <groupId>org.apache.stanbol</groupId>
-            <artifactId>org.apache.stanbol.enhancer.core</artifactId>
-            <version>0.10.0-incubating-SNAPSHOT</version>
-            <scope>test</scope>
-        </dependency>
-           <dependency><!--  for debugging enhancements -->
-               <groupId>org.apache.clerezza</groupId>
-               <artifactId>rdf.jena.serializer</artifactId>
-               <scope>test</scope>
-           </dependency>
-        <dependency> <!-- we use log4j 1.2 -->
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-
-               
-       </dependencies>
-
-       <build>
-               <plugins>
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-surefire-plugin</artifactId>
-                               <configuration>
-                                       <skipTests>false</skipTests>
-                               </configuration>
-                       </plugin>
-                       <plugin>
-                               <groupId>org.apache.felix</groupId>
-                               <artifactId>maven-bundle-plugin</artifactId>
-                               <extensions>true</extensions>
-                               <configuration>
-                                       <instructions>
-                                               <Private-Package>
-                                                       
org.apache.stanbol.enhancer.engines.celi.ner.impl.*, 
-                                                       
org.apache.stanbol.enhancer.engines.celi.langid.impl.*, 
-                                                       
org.apache.stanbol.enhancer.engines.celi.classification.impl.*, 
-                                                       
org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.*
-                                               </Private-Package>
-                                               <!-- 
<Embed-Dependency>true</Embed-Dependency> 
-                                               
<Embed-Transitive>true</Embed-Transitive>  -->
-                                               <Import-Package>
-                                                       org.apache.http,
-                                                       *;resolution:=optional
-                                               </Import-Package>
-                                       </instructions>
-                               </configuration>
-                       </plugin>
-                       <plugin>
-                               <groupId>org.apache.felix</groupId>
-                               <artifactId>maven-scr-plugin</artifactId>
-                       </plugin>
-               </plugins>
-       </build>
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <version>0.10.0-incubating-SNAPSHOT</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancer Enhancement Engine: CELI  </name>
+  <description></description>
+  <inceptionYear>2012</inceptionYear>
+
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+    </dependency>
+    <dependency> <!-- STANBOL-739: adapt Lemmatizer Engine to use AnalyzedText 
-->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      
<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
+      <version>0.9.0-incubating</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- generic tax -->
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient-osgi</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+      <version>0.9.0-incubating</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- test -->
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency><!-- for debugging enhancements -->
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.jena.serializer</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency> <!-- we use log4j 1.2 -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <skipTests>false</skipTests>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Private-Package>
+              org.apache.stanbol.enhancer.engines.celi.ner.impl.*,
+              org.apache.stanbol.enhancer.engines.celi.langid.impl.*,
+              org.apache.stanbol.enhancer.engines.celi.classification.impl.*,
+              org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.*
+            </Private-Package>
+            <!-- <Embed-Dependency>true</Embed-Dependency> 
<Embed-Transitive>true</Embed-Transitive> -->
+            <Import-Package>
+              org.apache.http,
+              *;resolution:=optional
+            </Import-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 
 </project>
\ No newline at end of file

Added: 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java?rev=1388519&view=auto
==============================================================================
--- 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
 (added)
+++ 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
 Fri Sep 21 14:20:29 2012
@@ -0,0 +1,73 @@
+package org.apache.stanbol.enhancer.engines.celi;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import 
org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
+import org.apache.stanbol.enhancer.nlp.TagSet;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.nlp.pos.olia.English;
+import org.apache.stanbol.enhancer.nlp.pos.olia.German;
+import org.apache.stanbol.enhancer.nlp.pos.olia.Spanish;
+
+/**
+ * {@link TagSet}s for known CELI (linguagrid.org) POS models.<p>
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+public final class PosTagSetRegistry {
+    
+    private static PosTagSetRegistry instance = new PosTagSetRegistry();
+    
+    private PosTagSetRegistry(){}
+    
+    private final Map<String, TagSet<PosTag>> models = new 
HashMap<String,TagSet<PosTag>>();
+    
+    public static PosTagSetRegistry getInstance(){
+        return instance;
+    }
+    
+    private void add(TagSet<PosTag> model) {
+        for(String lang : model.getLanguages()){
+            if(models.put(lang, model) != null){
+                throw new IllegalStateException("Multiple Models for Language 
'"
+                    + lang+"'! This is an error in the static confituration of 
"
+                    + "this class. Please report this to the stanbol-dev 
mailing"
+                    + "list!");
+            }
+        }
+    }
+    /**
+     * Getter for the {@link TagSet} by language. If no {@link TagSet}
+     * is available for an Language this will return <code>null</code>
+     * @param language the language
+     * @return the AnnotationModel or <code>null</code> if non is defined
+     */
+    public TagSet<PosTag> getTagSet(String language){
+        return models.get(language);
+    }
+    
+    /**
+     * TODO: create correct POS TagSets for the Languages supported by CELI
+     * This creates a default set for all languages supported by the
+     * CELI lemmatizer Engine
+     */
+    public static final TagSet<PosTag> ITALIEN = new TagSet<PosTag>("CELI 
Italien","it");
+    
+    static {
+        ITALIEN.addTag(new PosTag("ADJ",LexicalCategory.Adjective));
+        ITALIEN.addTag(new PosTag("ADV",LexicalCategory.Adverb));
+        ITALIEN.addTag(new PosTag("ART",LexicalCategory.PronounOrDeterminer));
+        ITALIEN.addTag(new PosTag("CLI")); //mapping ??
+        ITALIEN.addTag(new PosTag("CONJ",LexicalCategory.Conjuction));
+        ITALIEN.addTag(new PosTag("PREP",LexicalCategory.Adposition));
+        ITALIEN.addTag(new PosTag("NF",LexicalCategory.Noun));
+        ITALIEN.addTag(new PosTag("NM",LexicalCategory.Noun));
+        ITALIEN.addTag(new PosTag("V",LexicalCategory.Verb));
+        //add the PosSet to the registry
+        getInstance().add(ITALIEN);
+    }
+
+}

Propchange: 
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to