Author: tommaso
Date: Tue Nov 8 11:04:23 2011
New Revision: 1199190
URL: http://svn.apache.org/viewvc?rev=1199190&view=rev
Log:
moved RegExAnnotatorAOService into services package, fixed the AE aggregate
descriptor, added a local copy of the concepts file, added a simple service
test, fixed import-package directive
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
Removed:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/RegExAnnotatorAOService.java
Modified:
incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
Modified: incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml?rev=1199190&r1=1199189&r2=1199190&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml (original)
+++ incubator/clerezza/trunk/parent/uima/uima.samples/pom.xml Tue Nov 8
11:04:23 2011
@@ -58,6 +58,12 @@
<artifactId>RegularExpressionAnnotator</artifactId>
<version>2.3.1</version>
</dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.2</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
<plugins>
@@ -68,7 +74,7 @@
<configuration>
<instructions>
<Import-Package>
-
javax.ws.rs.*;javax.xml.stream.*;org.osgi.service.component.*;org.apache.uima.*;org.apache.clerezza.*;org.apache.commons.io.*
+
javax.ws.rs.*;org.apache.uima.*;org.apache.clerezza.*;org.apache.commons.io.*
</Import-Package>
<Export-Package>org.apache.clerezza.uima.samples.*</Export-Package>
<Bundle-Activator>org.apache.clerezza.uima.samples.UIMASamplesBundleActivator</Bundle-Activator>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java?rev=1199190&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOService.java
Tue Nov 8 11:04:23 2011
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.uima.samples.services;
+
+import org.apache.clerezza.rdf.core.Graph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.access.TcManager;
+import org.apache.clerezza.uima.utils.UIMAExecutor;
+import org.apache.clerezza.uima.utils.UIMAExecutorFactory;
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.uima.util.XMLInputSource;
+
+import javax.ws.rs.*;
+import javax.ws.rs.core.Response;
+import java.net.URI;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Sample REST service which uses {@link
org.apache.uima.annotator.regex.impl.RegExAnnotator} to extract named entities
+ * from the text of a given URI
+ */
+@Component
+@Service(Object.class)
+@Property(name = "javax.ws.rs", boolValue = true)
+@Path("/uima")
+public class RegExAnnotatorAOService {
+
+ private static final String PATH = "/META-INF/AggregateRegExAOAE.xml";
+ private static final String OUTPUTGRAPH = "outputgraph";
+
+ @POST
+ @Path("regex")
+ @Produces("application/rdf+xml")
+ public Graph enrichUri(@QueryParam("uri") String uriString) {
+ if (uriString == null || uriString.length() == 0)
+ throw new WebApplicationException(Response.status(
+ Response.Status.BAD_REQUEST).entity(new StringBuilder("No URI
specified").toString()).build());
+
+ UIMAExecutor executor =
UIMAExecutorFactory.getInstance().createUIMAExecutor();
+ Map<String, Object> parameters = new HashMap<String, Object>();
+ parameters.put(OUTPUTGRAPH, uriString);
+ try {
+ URL url = URI.create(uriString).toURL();
+ String text = IOUtils.toString(url.openStream());
+ executor.analyzeDocument(text, new
XMLInputSource(getClass().getResource(PATH)), parameters);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new WebApplicationException(Response.status(
+ Response.Status.INTERNAL_SERVER_ERROR).entity(new
StringBuilder("Failed UIMA execution on URI ").
+ append(uriString).append(" due to
\n").append(e.getLocalizedMessage()).toString()).build());
+ }
+ return TcManager.getInstance().getMGraph(new UriRef(uriString)).getGraph();
+ }
+
+
+}
+
Modified:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml?rev=1199190&r1=1199189&r2=1199190&view=diff
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
(original)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/AggregateRegExAOAE.xml
Tue Nov 8 11:04:23 2011
@@ -56,12 +56,12 @@
</overrides>
</configurationParameter>
<configurationParameter>
- <name>alchemykey</name>
+ <name>concepts</name>
<type>String</type>
- <multiValued>false</multiValued>
+ <multiValued>true</multiValued>
<mandatory>true</mandatory>
<overrides>
- <parameter>UrlConceptTaggingAEDescriptor/apikey</parameter>
+ <parameter>RegExAnnotator/ConceptFiles</parameter>
</overrides>
</configurationParameter>
</configurationParameters>
@@ -78,11 +78,18 @@
<string>ao</string>
</value>
</nameValuePair>
+ <nameValuePair>
+ <name>concepts</name>
+ <value>
+ <array>
+ <string>META-INF/concepts.xml</string>
+ </array>
+ </value>
+ </nameValuePair>
</configurationParameterSettings>
- >
<flowConstraints>
<fixedFlow>
- <node>UrlConceptTaggingAEDescriptor</node>
+ <node>RegExAnnotator</node>
<node>ClerezzaCASConsumerDescriptor</node>
</fixedFlow>
</flowConstraints>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml?rev=1199190&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/main/resources/META-INF/concepts.xml
Tue Nov 8 11:04:23 2011
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+-->
+<conceptSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns="http://incubator.apache.org/uima/regex"
+ xsi:schemaLocation="concept.xsd">
+
+ <concept name="emailAddressDetection">
+ <rules>
+ <rule
+
regEx="([a-zA-Z0-9!#$%*+'/=?^_\x2D`{|}~.\x26]+)@([a-zA-Z0-9._-]+[a-zA-Z]{2,4})"
+ matchStrategy="matchAll"
matchType="uima.tcas.DocumentAnnotation"/>
+ </rules>
+ <createAnnotations>
+ <annotation id="emailAnnot"
+ type="org.apache.uima.EmailAddress">
+ <begin group="0"/>
+ <end group="0"/>
+ <setFeature name="localPart" type="String"
+ normalization="ToLowerCase">
+ $1
+ </setFeature>
+ <setFeature name="domainPart" type="String"
+ normalization="ToLowerCase">
+ $2
+ </setFeature>
+ <setFeature name="normalizedEmail" type="String"
+ normalization="ToLowerCase">
+ $0
+ </setFeature>
+ </annotation>
+ </createAnnotations>
+ </concept>
+
+ <concept name="isbnNumberDetection">
+ <rules>
+ <rule regEx="(97(8|9))?-?(\d{9}|(\d|-){11})-?(\d|X)"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+ confidence="1.0"/>
+ </rules>
+ <createAnnotations>
+ <annotation id="isbnNumber"
+ type="org.apache.uima.ISBNNumber"
+
validate="org.apache.uima.annotator.regex.extension.impl.ISBNNumberValidator">
+ <begin group="0"/>
+ <end group="0"/>
+ <setFeature name="confidence" type="Confidence"/>
+ </annotation>
+ </createAnnotations>
+ </concept>
+
+ <concept name="creditCardNumberDetection" processAllRules="true">
+ <rules>
+ <rule ruleId="AmericanExpress"
+ regEx="(((34|37)\d{2}[- ]?)(\d{6}[- ]?)\d{5})"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+ confidence="1.0"/>
+ <rule ruleId="Visa"
+ regEx="((4\d{3}[- ]?)(\d{4}[- ]?){2}\d{4})"
matchStrategy="matchAll"
+ matchType="uima.tcas.DocumentAnnotation" confidence="1.0"/>
+ <rule ruleId="MasterCard"
+ regEx="((5[1-5]\d{2}[- ]?)(\d{4}[- ]?){2}\d{4})"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+ confidence="1.0"/>
+ <rule ruleId="unknown"
+ regEx="(([1-6]\d{3}[- ])(\d{4}[-
]){2}\d{4})|([1-6]\d{13,18})|([1-6]\d{3}[- ]\d{6}[- ]\d{5})"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"
+ confidence="1.0"/>
+ </rules>
+ <createAnnotations>
+ <annotation id="creditCardNumber"
+ type="org.apache.uima.CreditCardNumber"
+
validate="org.apache.uima.annotator.regex.extension.impl.CreditCardNumberValidator">
+ <begin group="0"/>
+ <end group="0"/>
+ <setFeature name="confidence" type="Confidence"/>
+ <setFeature name="cardType" type="RuleId"/>
+ </annotation>
+ </createAnnotations>
+ </concept>
+
+ <concept name="MoneyAmountDetection" processAllRules="true">
+ <!-- \p{Sc} -> currentySymbol -->
+ <!-- (?i) -> case insensitive match -->
+ <!-- \s -> whitespace character -->
+ <rules>
+ <rule
regEx="\m{currency}(\p{Sc}\s?|(?i)USD\s?|(?i)Dollars\s?|(?i)Dollar\s?|(?i)CNY\s?|(?i)CAD\s?|(?i)GBP\s?|(?i)Pounds\s?|(?i)Pound\s?|(?i)Euros\s?|(?i)Euro\s?|(?i)Yen\s?|(?i)EUR\s?)\m{amount}(\d+(,\d\d\d)*(\.\d\d?)?)\m{amountText}(\s?(?i)million|\s?(?i)billion)?"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>
+ <rule
regEx="\m{amount}(\d+(,\d\d\d)*(\.\d\d?\d?)?)\m{amountText}(\s?(?i)million|\s?(?i)billion)?\m{currency}(\s?\p{Sc}|\s?(?i)USD\b|\s?(?i)Dollars\b|\s?(?i)Dollar\b|\s?(?i)CNY\b|\s?(?i)CAD\b|\s?(?i)GBP\b|\s?(?i)Pounds\b|\s?(?i)Pound\b|\s?(?i)Euros\b|\s?(?i)Euro\b|\s?(?i)Yen\b|\s?(?i)EUR\b)"
+ matchStrategy="matchAll" matchType="uima.tcas.DocumentAnnotation"/>
+ </rules>
+ <createAnnotations>
+ <annotation type="org.apache.uima.MoneyAmount">
+ <begin group="0"/>
+ <end group="0"/>
+ <setFeature name="currency" type="String"
normalization="Trim">${currency}</setFeature>
+ <setFeature name="amount" type="Float">${amount}</setFeature>
+ <setFeature name="amountText" type="String"
normalization="Trim">${amountText}</setFeature>
+ </annotation>
+ </createAnnotations>
+ </concept>
+</conceptSet>
Added:
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
URL:
http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java?rev=1199190&view=auto
==============================================================================
---
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
(added)
+++
incubator/clerezza/trunk/parent/uima/uima.samples/src/test/java/org/apache/clerezza/uima/samples/services/RegExAnnotatorAOServiceTest.java
Tue Nov 8 11:04:23 2011
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.clerezza.uima.samples.services;
+
+import org.apache.clerezza.rdf.core.Graph;
+import org.junit.Test;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+/**
+ * Testcase for {@link RegExAnnotatorAOService}
+ */
+public class RegExAnnotatorAOServiceTest {
+
+ @Test
+ public void serviceExecutionTest() {
+ try {
+ RegExAnnotatorAOService service = new RegExAnnotatorAOService();
+ Graph graph =
service.enrichUri("http://www.apache.org/foundation/sponsorship.html");
+ assertNotNull(graph);
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getLocalizedMessage());
+ }
+ }
+}