Author: rwesten
Date: Fri Sep 21 14:20:29 2012
New Revision: 1388519
URL: http://svn.apache.org/viewvc?rev=1388519&view=rev
Log:
STANBOL-739: added stanbol.nlp to the dependencies; added a PosTagSetRegistry
for defining the POS tag sets used by CELI.
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
(with props)
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml?rev=1388519&r1=1388518&r2=1388519&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml
(original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/pom.xml Fri
Sep 21 14:20:29 2012
@@ -1,151 +1,155 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor
- license agreements. See the NOTICE file distributed with this work for
additional
- information regarding copyright ownership. The ASF licenses this file
to
- You under the Apache License, Version 2.0 (the "License"); you may not
use
- this file except in compliance with the License. You may obtain a copy
of
- the License at http://www.apache.org/licenses/LICENSE-2.0 Unless
required
- by applicable law or agreed to in writing, software distributed under
the
- License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS
- OF ANY KIND, either express or implied. See the License for the
specific
- language governing permissions and limitations under the License. -->
+ license agreements. See the NOTICE file distributed with this work for
additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
- <relativePath>../../parent</relativePath>
- </parent>
-
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
- <packaging>bundle</packaging>
-
- <name>Apache Stanbol Enhancer Enhancement Engine: CELI </name>
- <description></description>
- <inceptionYear>2012</inceptionYear>
-
-
- <dependencies>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
-
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.stanbol</groupId>
-
<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
- <version>0.9.0-incubating</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.clerezza</groupId>
- <artifactId>rdf.core</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.felix</groupId>
-
<artifactId>org.apache.felix.scr.annotations</artifactId>
- <scope>provided</scope>
- </dependency>
-
- <!-- generic tax -->
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.httpcomponents</groupId>
- <artifactId>httpclient-osgi</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
-
<artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
- <version>0.9.0-incubating</version>
- <scope>provided</scope>
- </dependency>
-
- <!-- test -->
- <dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.test</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.core</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
- <scope>test</scope>
- </dependency>
- <dependency><!-- for debugging enhancements -->
- <groupId>org.apache.clerezza</groupId>
- <artifactId>rdf.jena.serializer</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency> <!-- we use log4j 1.2 -->
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
-
-
- </dependencies>
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <skipTests>false</skipTests>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-bundle-plugin</artifactId>
- <extensions>true</extensions>
- <configuration>
- <instructions>
- <Private-Package>
-
org.apache.stanbol.enhancer.engines.celi.ner.impl.*,
-
org.apache.stanbol.enhancer.engines.celi.langid.impl.*,
-
org.apache.stanbol.enhancer.engines.celi.classification.impl.*,
-
org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.*
- </Private-Package>
- <!--
<Embed-Dependency>true</Embed-Dependency>
-
<Embed-Transitive>true</Embed-Transitive> -->
- <Import-Package>
- org.apache.http,
- *;resolution:=optional
- </Import-Package>
- </instructions>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-scr-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer Enhancement Engine: CELI </name>
+ <description></description>
+ <inceptionYear>2012</inceptionYear>
+
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ </dependency>
+ <dependency> <!-- STANBOL-739: adapt Lemmatizer Engine to use AnalyzedText
-->
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+
<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
+ <version>0.9.0-incubating</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- generic tax -->
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient-osgi</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+ <version>0.9.0-incubating</version>
+ <scope>provided</scope>
+ </dependency>
+
+ <!-- test -->
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency><!-- for debugging enhancements -->
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.jena.serializer</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency> <!-- we use log4j 1.2 -->
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <skipTests>false</skipTests>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Private-Package>
+ org.apache.stanbol.enhancer.engines.celi.ner.impl.*,
+ org.apache.stanbol.enhancer.engines.celi.langid.impl.*,
+ org.apache.stanbol.enhancer.engines.celi.classification.impl.*,
+ org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.*
+ </Private-Package>
+ <!-- <Embed-Dependency>true</Embed-Dependency>
<Embed-Transitive>true</Embed-Transitive> -->
+ <Import-Package>
+ org.apache.http,
+ *;resolution:=optional
+ </Import-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
\ No newline at end of file
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java?rev=1388519&view=auto
==============================================================================
---
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
(added)
+++
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
Fri Sep 21 14:20:29 2012
@@ -0,0 +1,73 @@
+package org.apache.stanbol.enhancer.engines.celi;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import
org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
+import org.apache.stanbol.enhancer.nlp.TagSet;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.nlp.pos.olia.English;
+import org.apache.stanbol.enhancer.nlp.pos.olia.German;
+import org.apache.stanbol.enhancer.nlp.pos.olia.Spanish;
+
+/**
+ * {@link TagSet}s for known CELI (linguagrid.org) POS models.<p>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public final class PosTagSetRegistry {
+
+ private static PosTagSetRegistry instance = new PosTagSetRegistry();
+
+ private PosTagSetRegistry(){}
+
+ private final Map<String, TagSet<PosTag>> models = new
HashMap<String,TagSet<PosTag>>();
+
+ public static PosTagSetRegistry getInstance(){
+ return instance;
+ }
+
+ private void add(TagSet<PosTag> model) {
+ for(String lang : model.getLanguages()){
+ if(models.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language
'"
+ + lang+"'! This is an error in the static confituration of
"
+ + "this class. Please report this to the stanbol-dev
mailing"
+ + "list!");
+ }
+ }
+ }
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<PosTag> getTagSet(String language){
+ return models.get(language);
+ }
+
+ /**
+ * TODO: create correct POS TagSets for the Languages supported by CELI
+ * This creates a default set for all languages supported by the
+ * CELI lemmatizer Engine
+ */
+ public static final TagSet<PosTag> ITALIEN = new TagSet<PosTag>("CELI
Italien","it");
+
+ static {
+ ITALIEN.addTag(new PosTag("ADJ",LexicalCategory.Adjective));
+ ITALIEN.addTag(new PosTag("ADV",LexicalCategory.Adverb));
+ ITALIEN.addTag(new PosTag("ART",LexicalCategory.PronounOrDeterminer));
+ ITALIEN.addTag(new PosTag("CLI")); //mapping ??
+ ITALIEN.addTag(new PosTag("CONJ",LexicalCategory.Conjuction));
+ ITALIEN.addTag(new PosTag("PREP",LexicalCategory.Adposition));
+ ITALIEN.addTag(new PosTag("NF",LexicalCategory.Noun));
+ ITALIEN.addTag(new PosTag("NM",LexicalCategory.Noun));
+ ITALIEN.addTag(new PosTag("V",LexicalCategory.Verb));
+ //add the PosSet to the registry
+ getInstance().add(ITALIEN);
+ }
+
+}
Propchange:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
------------------------------------------------------------------------------
svn:mime-type = text/plain