Author: tommaso
Date: Mon Nov 21 14:41:27 2011
New Revision: 1204523
URL: http://svn.apache.org/viewvc?rev=1204523&view=rev
Log:
[CLEREZZA-658] - migrated the RegExAnnotator sample as an Apache OpenNLP based
service
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOService.java
- copied, changed from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateOpenNLPNERAOAE.xml
- copied, changed from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/PersonNameFinder.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/SentenceDetector.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/Tokenizer.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/TypeSystem.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-ner-person.bin
(with props)
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-sent.bin
(with props)
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-token.bin
(with props)
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOServiceTest.java
- copied, changed from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/resources/ner_test_page.html
Removed:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
Modified:
incubator/clerezza/trunk/parent/uima/uima.samples/README.txt
incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/UIMASamplesBundleActivator.java
Modified: incubator/clerezza/trunk/parent/uima/uima.samples/README.txt
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/README.txt?rev=1204523&r1=1204522&r2=1204523&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/uima/uima.samples/README.txt (original)
+++ incubator/clerezza/trunk/parent/uima/uima.samples/README.txt Mon Nov 21
14:41:27 2011
@@ -4,6 +4,8 @@ in the Clerezza console run (in this sam
:f start mvn:org.apache.clerezza/uima.ontologies
:f start mvn:org.apache.clerezza/uima.ontologies.ao
:f start mvn:org.apache.clerezza/uima.casconsumer
+ :f start mvn:org.apache.opennlp/opennlp-tools/1.5.2-incubating
+ :f start mvn:org.apache.opennlp/opennlp-maxent/3.0.2-incubating
:f start mvn:org.apache.clerezza/uima.samples
then try sending the following HTTP POST with cURL:
Modified: incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml?rev=1204523&r1=1204522&r2=1204523&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml (original)
+++ incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml Mon Nov 21
14:41:27 2011
@@ -54,25 +54,21 @@
<version>2.0.1</version>
</dependency>
<dependency>
- <groupId>org.apache.uima</groupId>
- <artifactId>RegularExpressionAnnotator</artifactId>
- <version>2.3.1</version>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-uima</artifactId>
+ <version>1.5.2-incubating</version>
</dependency>
<dependency>
- <groupId>org.apache.xmlbeans</groupId>
- <artifactId>xmlbeans</artifactId>
- <version>2.4.0</version>
- <exclusions>
- <exclusion> <!-- licensing issues, use geronimo instead -->
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- </exclusions>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-maxent</artifactId>
+ <version>3.0.2-incubating</version>
+ <scope>runtime</scope>
</dependency>
<dependency>
- <groupId>org.apache.geronimo.specs</groupId>
- <artifactId>geronimo-stax-api_1.0_spec</artifactId>
- <version>1.0.1</version>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-tools</artifactId>
+ <version>1.5.2-incubating</version>
+ <scope>runtime</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
@@ -80,6 +76,11 @@
<version>4.8.2</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>org.apache.clerezza.rdf.core</artifactId>
+ <version>0.12-incubating-SNAPSHOT</version>
+ </dependency>
</dependencies>
<build>
<plugins>
@@ -90,12 +91,11 @@
<configuration>
<instructions>
<Import-Package>
-
javax.xml.*;org.xml.*;org.apache.xml.*;org.apache.tools.*;com.sun.*;org.w3c.dom.*;resolution:=optional,javax.ws.rs.*;org.apache.uima.*;org.apache.clerezza.*
+
javax.ws.rs.*;org.apache.uima.*;org.apache.clerezza.*;opennlp.uima.*;opennlp.tools.*;opennlp.maxent.*
</Import-Package>
<Export-Package>org.apache.clerezza.uima.samples.*</Export-Package>
<Bundle-Activator>org.apache.clerezza.uima.samples.UIMASamplesBundleActivator</Bundle-Activator>
-
<Embed-Dependency>*;artifactId=commons-io|RegularExpressionAnnotator|xmlbeans|geronimo-stax-api_1.0_spec
- </Embed-Dependency>
+
<Embed-Dependency>*;artifactId=commons-io|opennlp-uima</Embed-Dependency>
</instructions>
</configuration>
</plugin>
Modified:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/UIMASamplesBundleActivator.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/UIMASamplesBundleActivator.java?rev=1204523&r1=1204522&r2=1204523&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/UIMASamplesBundleActivator.java
(original)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/UIMASamplesBundleActivator.java
Mon Nov 21 14:41:27 2011
@@ -18,9 +18,11 @@
*/
package org.apache.clerezza.uima.samples;
+import opennlp.uima.namefind.NameFinder;
+import opennlp.uima.sentdetect.SentenceDetector;
+import opennlp.uima.tokenize.Tokenizer;
import org.apache.clerezza.uima.utils.UIMABundleActivator;
-import
org.apache.clerezza.uima.utils.cl.AnalysisComponentsClassLoaderRepository;
-import org.apache.uima.annotator.regex.impl.RegExAnnotator;
+import org.apache.clerezza.uima.utils.cl.UIMAResourcesClassLoaderRepository;
/**
* {@link org.osgi.framework.BundleActivator} for uima.samples module
@@ -28,6 +30,8 @@ import org.apache.uima.annotator.regex.i
public class UIMASamplesBundleActivator extends UIMABundleActivator {
@Override
protected void classRegistered() {
-
AnalysisComponentsClassLoaderRepository.registerComponent(RegExAnnotator.class);
+ UIMAResourcesClassLoaderRepository.registerComponent(Tokenizer.class);
+
UIMAResourcesClassLoaderRepository.registerComponent(SentenceDetector.class);
+ UIMAResourcesClassLoaderRepository.registerComponent(NameFinder.class);
}
}
Copied:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOService.java
(from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java)
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOService.java?p2=incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOService.java&p1=incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java&r1=1202611&r2=1204523&rev=1204523&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
(original)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOService.java
Mon Nov 21 14:41:27 2011
@@ -37,16 +37,16 @@ import java.util.HashMap;
import java.util.Map;
/**
- * Sample REST service which uses {@link
org.apache.uima.annotator.regex.impl.RegExAnnotator} to extract named entities
+ * Sample REST service which uses {@link opennlp.uima.namefind.NameFinder} to
extract named entities
* from the text of a given URI
*/
@Component
@Service(Object.class)
@Property(name = "javax.ws.rs", boolValue = true)
@Path("/uima")
-public class RegExAnnotatorAOService {
+public class OpenNLPNERAOService {
- private static final String PATH = "/META-INF/AggregateRegExAOAE.xml";
+ private static final String PATH = "/META-INF/AggregateOpenNLPNERAOAE.xml";
private static final String OUTPUTGRAPH = "outputgraph";
@POST
Copied:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateOpenNLPNERAOAE.xml
(from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml)
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateOpenNLPNERAOAE.xml?p2=incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateOpenNLPNERAOAE.xml&p1=incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml&r1=1202611&r2=1204523&rev=1204523&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
(original)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateOpenNLPNERAOAE.xml
Mon Nov 21 14:41:27 2011
@@ -24,15 +24,21 @@
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
- <delegateAnalysisEngine key="RegExAnnotator">
- <import name="RegExAnnotator"/>
+ <delegateAnalysisEngine key="Tokenizer">
+ <import location="Tokenizer.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="SentDetect">
+ <import location="SentenceDetector.xml"/>
+ </delegateAnalysisEngine>
+ <delegateAnalysisEngine key="PersonNameFinder">
+ <import location="PersonNameFinder.xml"/>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="ClerezzaCASConsumerDescriptor">
<import name="ClerezzaCASConsumerDescriptor"/>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
- <name>AggregateRegExAOAE.xml</name>
+ <name>AggregateOpenNLPNERAOAE.xml</name>
<description/>
<version>1.0</version>
<vendor/>
@@ -55,15 +61,6 @@
<parameter>ClerezzaCASConsumerDescriptor/mappingStrategy</parameter>
</overrides>
</configurationParameter>
- <configurationParameter>
- <name>concepts</name>
- <type>String</type>
- <multiValued>true</multiValued>
- <mandatory>true</mandatory>
- <overrides>
- <parameter>RegExAnnotator/ConceptFiles</parameter>
- </overrides>
- </configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
@@ -78,18 +75,12 @@
<string>ao</string>
</value>
</nameValuePair>
- <nameValuePair>
- <name>concepts</name>
- <value>
- <array>
- <string>META-INF/concepts.xml</string>
- </array>
- </value>
- </nameValuePair>
</configurationParameterSettings>
<flowConstraints>
<fixedFlow>
- <node>RegExAnnotator</node>
+ <node>Tokenizer</node>
+ <node>SentDetect</node>
+ <node>PersonNameFinder</node>
<node>ClerezzaCASConsumerDescriptor</node>
</fixedFlow>
</flowConstraints>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/PersonNameFinder.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/PersonNameFinder.xml?rev=1204523&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/PersonNameFinder.xml
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/PersonNameFinder.xml
Mon Nov 21 14:41:27 2011
@@ -0,0 +1,119 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+
<annotatorImplementationName>opennlp.uima.namefind.NameFinder</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Person Name Finder</name>
+ <description></description>
+ <version>1.5.1-incubating</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters>
+
+ <configurationParameter>
+ <name>opennlp.uima.SentenceType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.TokenType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.NameType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+
+ <configurationParameterSettings>
+
+ <nameValuePair>
+ <name>opennlp.uima.SentenceType</name>
+ <value>
+ <string>uima.tcas.DocumentAnnotation</string>
+ </value>
+ </nameValuePair>
+
+ <nameValuePair>
+ <name>opennlp.uima.TokenType</name>
+ <value>
+ <string>opennlp.uima.Token</string>
+ </value>
+ </nameValuePair>
+
+ <nameValuePair>
+ <name>opennlp.uima.NameType</name>
+ <value>
+ <string>opennlp.uima.Person</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+
+ <typeSystemDescription>
+ <imports>
+ <import location="TypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ </analysisEngineMetaData>
+
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>opennlp.uima.ModelName</key>
+
<interfaceName>opennlp.uima.namefind.TokenNameFinderModelResource</interfaceName>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+
+ <resourceManagerConfiguration>
+ <externalResource>
+ <name>PersonModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:META-INF/en-ner-person.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.namefind.TokenNameFinderModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>opennlp.uima.ModelName</key>
+ <resourceName>PersonModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
+</analysisEngineDescription>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/SentenceDetector.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/SentenceDetector.xml?rev=1204523&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/SentenceDetector.xml
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/SentenceDetector.xml
Mon Nov 21 14:41:27 2011
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java
+ </frameworkImplementation>
+ <primitive>true</primitive>
+
<annotatorImplementationName>opennlp.uima.sentdetect.SentenceDetector</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Sentence Detector</name>
+ <description></description>
+ <version>1.5.1-incubating</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>opennlp.uima.SentenceType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>opennlp.uima.ContainerType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+
+ <configurationParameterSettings>
+
+ <nameValuePair>
+ <name>opennlp.uima.SentenceType</name>
+ <value>
+ <string>opennlp.uima.Sentence</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+
+ <typeSystemDescription>
+ <imports>
+ <import location="TypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ </analysisEngineMetaData>
+
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>opennlp.uima.ModelName</key>
+
<interfaceName>opennlp.uima.sentdetect.SentenceModelResource</interfaceName>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+
+ <resourceManagerConfiguration>
+ <externalResources>
+ <externalResource>
+ <name>SentenceModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:META-INF/en-sent.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName>
+ </externalResource>
+ </externalResources>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>opennlp.uima.ModelName</key>
+ <resourceName>SentenceModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
+</analysisEngineDescription>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/Tokenizer.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/Tokenizer.xml?rev=1204523&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/Tokenizer.xml
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/Tokenizer.xml
Mon Nov 21 14:41:27 2011
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+
<annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Tokenizer</name>
+ <description></description>
+ <version>1.5.1-incubating</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>opennlp.uima.SentenceType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.TokenType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>opennlp.uima.TokenType</name>
+ <value>
+ <string>opennlp.uima.Token</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.SentenceType</name>
+ <value>
+ <string>uima.tcas.DocumentAnnotation</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+
+ <typeSystemDescription>
+ <imports>
+ <import location="TypeSystem.xml"/>
+ </imports>
+ </typeSystemDescription>
+
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ </operationalProperties>
+ </analysisEngineMetaData>
+
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>opennlp.uima.ModelName</key>
+
<interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+
+ <resourceManagerConfiguration>
+ <externalResources>
+ <externalResource>
+ <name>TokenModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:META-INF/en-token.bin</fileUrl>
+ </fileResourceSpecifier>
+
<implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
+ </externalResource>
+ </externalResources>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>opennlp.uima.ModelName</key>
+ <resourceName>TokenModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
+</analysisEngineDescription>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/TypeSystem.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/TypeSystem.xml?rev=1204523&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/TypeSystem.xml
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/TypeSystem.xml
Mon Nov 21 14:41:27 2011
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<typeSystemDescription xmlns="http://uima.watson.ibm.com/resourceSpecifier">
+ <name>OpenNLP TypeSystem</name>
+ <description>
+ This is the default OpenNLP type system. All the sample
+ descriptors reference the types in this type system. To replace it against
+ a custom type system change the mapping in the descriptors to the
+ custom types and reference the custom type system.
+ </description>
+ <version>1.5.1</version>
+ <vendor>Apache Software Foundation</vendor>
+ <types>
+ <typeDescription>
+ <name>opennlp.uima.Sentence</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Token</name>
+ <supertypeName>uima.tcas.Annotation
+ </supertypeName>
+ <features>
+ <featureDescription>
+ <name>pos</name>
+ <description>Part of speech</description>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Chunk</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ <features>
+ <featureDescription>
+ <name>type</name>
+ <description></description>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Person</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Organization</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Location</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Date</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Time</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Money</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+
+ <typeDescription>
+ <name>opennlp.uima.Percentage</name>
+ <supertypeName>uima.tcas.Annotation</supertypeName>
+ </typeDescription>
+ </types>
+</typeSystemDescription>
\ No newline at end of file
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-ner-person.bin
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-ner-person.bin?rev=1204523&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-ner-person.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-sent.bin
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-sent.bin?rev=1204523&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-sent.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-token.bin
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-token.bin?rev=1204523&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/en-token.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Copied:
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOServiceTest.java
(from r1202611,
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java)
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOServiceTest.java?p2=incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOServiceTest.java&p1=incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java&r1=1202611&r2=1204523&rev=1204523&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
(original)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/OpenNLPNERAOServiceTest.java
Mon Nov 21 14:41:27 2011
@@ -25,18 +25,17 @@ import static org.junit.Assert.assertNot
import static org.junit.Assert.fail;
/**
- * Testcase for {@link RegExAnnotatorAOService}
+ * Testcase for {@link OpenNLPNERAOService}
*/
-public class RegExAnnotatorAOServiceTest {
+public class OpenNLPNERAOServiceTest {
@Test
public void serviceExecutionTest() {
try {
- RegExAnnotatorAOService service = new RegExAnnotatorAOService();
- Graph graph =
service.enrichUri("http://www.apache.org/foundation/sponsorship.html");
+ OpenNLPNERAOService service = new OpenNLPNERAOService();
+ Graph graph =
service.enrichUri(getClass().getResource("/ner_test_page.html").toURI().toString());
assertNotNull(graph);
} catch (Exception e) {
- e.printStackTrace();
fail(e.getLocalizedMessage());
}
}
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/resources/ner_test_page.html
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/test/resources/ner_test_page.html?rev=1204523&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/resources/ner_test_page.html
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/resources/ner_test_page.html
Mon Nov 21 14:41:27 2011
@@ -0,0 +1,13 @@
+<html>
+<head>
+ <title>
+ Noam Chomsky quote
+ </title>
+</head>
+<body>
+Thomas Jefferson, the leading Enlightenment figure in the United States, along
with Benjamin Franklin, who took exactly
+the same view, argued that dependence will lead to "subservience and
venality", and will "suffocate[s] the germs of
+virtue". And remember, by dependence he meant wage labor, which was considered
an abomination under classical liberal
+principles.
+</body>
+</html>
\ No newline at end of file