Author: rwesten
Date: Tue Jan 22 06:41:12 2013
New Revision: 1436763
URL: http://svn.apache.org/viewvc?rev=1436763&view=rev
Log:
STANBOL-895: implementation of the RESTfuil Langident engine
Added:
stanbol/trunk/enhancement-engines/restful-langident/ (with props)
stanbol/trunk/enhancement-engines/restful-langident/pom.xml
stanbol/trunk/enhancement-engines/restful-langident/src/
stanbol/trunk/enhancement-engines/restful-langident/src/main/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/LangSuggestion.java
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/RestfulLangidentEngine.java
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/metatype.properties
Propchange: stanbol/trunk/enhancement-engines/restful-langident/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Jan 22 06:41:12 2013
@@ -0,0 +1,7 @@
+.project
+
+.settings
+
+.classpath
+
+target
Added: stanbol/trunk/enhancement-engines/restful-langident/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-langident/pom.xml?rev=1436763&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/restful-langident/pom.xml (added)
+++ stanbol/trunk/enhancement-engines/restful-langident/pom.xml Tue Jan 22
06:41:12 2013
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more
contributor
+ license agreements. See the NOTICE file distributed with this work for
additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <groupId>org.apache.stanbol</groupId>
+ <version>0.10.0-SNAPSHOT</version>
+ <relativePath>../../enhancer/parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+
<artifactId>org.apache.stanbol.enhancer.engines.restful.langident</artifactId>
+ <version>0.10.0-SNAPSHOT</version>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancement Engine : RESTful Language
Identification</name>
+ <description>A Stanbol engine that uses a standadized API to call
+ a RESTful service for Language Identification</description>
+
+ <inceptionYear>2013</inceptionYear>
+
+ <scm>
+ <connection>
+
scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/restful-langidnet/
+ </connection>
+ <developerConnection>
+
scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/restful-langident/
+ </developerConnection>
+ <url>http://stanbol.apache.org/</url>
+ </scm>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Import-Package>
+ org.apache.stanbol.enhancer.servicesapi; provide:=true,
+ org.apache.stanbol.enhancer.servicesapi.impl; provide:=true,
+ *
+ </Import-Package>
+ <Private-Package>
+ org.apache.stanbol.enhancer.engines.restful.langident.impl
+ </Private-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 License -->
+ <exclude>src/license/THIRD-PARTY.properties</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpcore-osgi</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient-osgi</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-core-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.codehaus.jackson</groupId>
+ <artifactId>jackson-mapper-asl</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ </dependencies>
+
+</project>
Added:
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/LangSuggestion.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/LangSuggestion.java?rev=1436763&view=auto
==============================================================================
---
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/LangSuggestion.java
(added)
+++
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/LangSuggestion.java
Tue Jan 22 06:41:12 2013
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.restful.langident.impl;
+
+/**
+ * A Lanugage Suggestion
+ */
+public class LangSuggestion {
+
+ protected final String lang;
+ protected final double prob;
+
+ public LangSuggestion(String lang, double prob) {
+ this.lang = lang;
+ this.prob = prob;
+ }
+
+ public boolean hasProbability(){
+ return prob >= 0;
+ }
+
+ public String getLanguage() {
+ return lang;
+ }
+
+ public double getProbability() {
+ return prob;
+ }
+
+}
Added:
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/RestfulLangidentEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/RestfulLangidentEngine.java?rev=1436763&view=auto
==============================================================================
---
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/RestfulLangidentEngine.java
(added)
+++
stanbol/trunk/enhancement-engines/restful-langident/src/main/java/org/apache/stanbol/enhancer/engines/restful/langident/impl/RestfulLangidentEngine.java
Tue Jan 22 06:41:12 2013
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.restful.langident.impl;
+
+import static java.util.Collections.singleton;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpException;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpRequest;
+import org.apache.http.HttpRequestInterceptor;
+import org.apache.http.HttpResponse;
+import org.apache.http.StatusLine;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.AuthState;
+import org.apache.http.auth.Credentials;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.client.HttpResponseException;
+import org.apache.http.client.ResponseHandler;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.entity.ContentType;
+import org.apache.http.entity.InputStreamEntity;
+import org.apache.http.impl.auth.BasicScheme;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.protocol.ExecutionContext;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.util.EntityUtils;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonNode;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.node.ArrayNode;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An enhancement engine that uses a RESTful service for NLP processing of
+ * the pain text content part of processed {@link ContentItem}s.<p>
+ * The RESTful API of the remote service is standardised by
+ * <a href="https://issues.apache.org/jira/browse/STANBOL-894">STANBOL-894</a>
<p>
+ *
+ * @author Rupert Westenthaler
+ */
+
+@Component(immediate = true, metatype = true, policy =
ConfigurationPolicy.REQUIRE)
+@Service
+@Properties(value={
+ @Property(name= EnhancementEngine.PROPERTY_NAME,value="changeme"),
+ @Property(name=RestfulLangidentEngine.ANALYSIS_SERVICE_URL, value
="http://changeme"),
+ @Property(name=RestfulLangidentEngine.ANALYSIS_SERVICE_USER, value
=""),
+ @Property(name=RestfulLangidentEngine.ANALYSIS_SERVICE_PWD, value =""),
+ @Property(name=Constants.SERVICE_RANKING,intValue=0)
+})
+public class RestfulLangidentEngine extends
AbstractEnhancementEngine<IOException,RuntimeException> implements
ServiceProperties {
+
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
+ /**
+ * The URI for the remote analyses service
+ */
+ public static final String ANALYSIS_SERVICE_URL =
"enhancer.engine.restful.langident.service";
+ /**
+ * The User for the remote analyses service
+ */
+ public static final String ANALYSIS_SERVICE_USER =
"enhancer.engine.restful.langident.service.user";
+ /**
+ * The User for the remote analyses service
+ */
+ public static final String ANALYSIS_SERVICE_PWD =
"enhancer.engine.restful.langident.service.pwd";
+
+ private static final Map<String,Object> SERVICE_PROPERTIES;
+ static {
+ Map<String,Object> props = new HashMap<String,Object>();
+ //by default register as Tokenizing engine
+ props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ServiceProperties.ORDERING_NLP_LANGAUGE_DETECTION);
+// props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
+// NlpProcessingRole.Tokenizing);
+ SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+ }
+
+
+ private static Logger log =
LoggerFactory.getLogger(RestfulLangidentEngine.class);
+
+ private URI serviceUrl;
+
+ private final LiteralFactory literalFactory = LiteralFactory.getInstance();
+ //JSON Parser
+ private final JsonFactory jsonFactory = new JsonFactory();
+ private final ObjectMapper objectMapper = new ObjectMapper(jsonFactory);
+ //HTTP client
+ protected DefaultHttpClient httpClient;
+ private BasicHttpParams httpParams;
+ private PoolingClientConnectionManager connectionManager;
+
+ /**
+ * Indicate if this engine can enhance supplied ContentItem, and if it
+ * suggests enhancing it synchronously or asynchronously. The
+ * {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}
can
+ * force sync/async mode if desired, it is just a suggestion from the
engine.
+ * <p/>
+ * Returns ENHANCE_ASYNC in case there is a text/plain content part and a
tagger
+ * for the language identified for the content item, CANNOT_ENHANCE
otherwise.
+ *
+ * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+ * if the introspecting process of the content item
+ * fails
+ */
+ @Override
+ public int canEnhance(ContentItem ci) throws EngineException {
+ // check if content is present
+ Map.Entry<UriRef,Blob> entry = getPlainText(this, ci, false);
+ if(entry == null || entry.getValue() == null) {
+ return CANNOT_ENHANCE;
+ }
+
+ log.trace(" > can enhance ContentItem {} by processing blob {}",ci,
entry.getKey());
+ return ENHANCE_ASYNC;
+ }
+
+ /**
+ * Compute enhancements for supplied ContentItem. The results of the
process
+ * are expected to be stored in the metadata of the content item.
+ * <p/>
+ * The client (usually an {@link
org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take
care of
+ * persistent storage of the enhanced {@link
org.apache.stanbol.enhancer.servicesapi.ContentItem}.
+ * <p/>
+ * This method creates a new POSContentPart using {@link
org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart}
from a text/plain part and
+ * stores it as a new part in the content item. The metadata is not
changed.
+ *
+ * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+ * if the underlying process failed to work as
+ * expected
+ */
+ @Override
+ public void computeEnhancements(ContentItem ci) throws EngineException {
+ //get the plain text Blob
+ Map.Entry<UriRef,Blob> textBlob = getPlainText(this, ci, false);
+ Blob blob = textBlob.getValue();
+ //send the text to the server
+ HttpPost request = new HttpPost(serviceUrl);
+ request.setEntity(new InputStreamEntity(
+ blob.getStream(), blob.getContentLength(),
+ ContentType.create(blob.getMimeType(),
+ blob.getParameter().get("charset"))));
+ //execute the request
+ List<LangSuggestion> detected;
+ try {
+ detected = httpClient.execute(request, new
LangIdentResponseHandler(ci,objectMapper));
+ } catch (ClientProtocolException e) {
+ throw new EngineException(this, ci, "Exception while executing
Request "
+ + "on RESTful Language Identification Service at "+serviceUrl,
e);
+ } catch (IOException e) {
+ throw new EngineException(this, ci, "Exception while executing
Request "
+ + "on RESTful Language Identification Service at
"+serviceUrl, e);
+ }
+ MGraph metadata = ci.getMetadata();
+ log.debug("Detected Languages for ContentItem {} and Blob {}");
+ ci.getLock().writeLock().lock();
+ try { //write TextAnnotations for the detected languages
+ for(LangSuggestion suggestion : detected){
+ // add a hypothesis
+ log.debug(" > {}@{}", suggestion.getLanguage(),
+ suggestion.hasProbability() ? suggestion.getProbability()
: "-,--");
+ UriRef textEnhancement =
EnhancementEngineHelper.createTextEnhancement(ci, this);
+ metadata.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new
PlainLiteralImpl(suggestion.getLanguage())));
+ metadata.add(new TripleImpl(textEnhancement, DC_TYPE,
DCTERMS_LINGUISTIC_SYSTEM));
+ if(suggestion.hasProbability()){
+ metadata.add(new TripleImpl(textEnhancement,
ENHANCER_CONFIDENCE,
literalFactory.createTypedLiteral(suggestion.getProbability())));
+ }
+ }
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+
+ protected class LangIdentResponseHandler implements
ResponseHandler<List<LangSuggestion>>{
+
+ protected final ContentItem ci;
+ protected final JsonFactory jsonFactory;
+ protected final ObjectMapper mapper;
+
+
+ protected LangIdentResponseHandler(ContentItem ci, ObjectMapper
objectMapper){
+ this.ci = ci;
+ this.mapper = objectMapper;
+ this.jsonFactory = objectMapper.getJsonFactory();
+ }
+
+ @Override
+ public List<LangSuggestion> handleResponse(HttpResponse response)
throws ClientProtocolException, IOException {
+ StatusLine statusLine = response.getStatusLine();
+ HttpEntity entity = response.getEntity();
+ if (statusLine.getStatusCode() >= 300) {
+ EntityUtils.consume(entity);
+ throw new HttpResponseException(statusLine.getStatusCode(),
+ statusLine.getReasonPhrase());
+ }
+ //parse the results
+ InputStream in = null;
+ try {
+ in = entity.getContent();
+ Charset charset = entity.getContentEncoding() != null ?
+
Charset.forName(entity.getContentEncoding().getValue()) : UTF8;
+ //parse the received data and add it to the AnalysedText of
the
+ //contentItem
+ JsonNode root =
mapper.readTree(jsonFactory.createJsonParser(new
InputStreamReader(in,charset)));
+ if(root.isArray()){
+ List<LangSuggestion> detected = new
ArrayList<LangSuggestion>(
+ ((ArrayNode)root).size());
+ for(int i=0;i<((ArrayNode)root).size();i++){
+ String lang;
+ double prob;
+ JsonNode entry = ((ArrayNode)root).get(i);
+ if(entry.isObject()){
+ JsonNode field = entry.path("lang");
+ if(field.isTextual()){
+ lang = field.getTextValue();
+ } else {
+ throw new IOException("Unable to prsed
LanguageIdent Service response! "
+ + "The field 'lang' MUST BE presnet and
have a textual value! "
+ + "(entry: "+entry+", received:
"+root+")!");
+ }
+ field = entry.path("prob");
+ if(field.isNumber()){
+ prob = field.asDouble();
+ } else {
+ prob = -1;
+ }
+ detected.add(new LangSuggestion(lang, prob));
+
+ } else {
+ throw new IOException("Unable to prsed
LanguageIdent Service response! "
+ + "All members of the root Json Array MUST BE
Json Objects "
+ + "(received: "+root+")");
+ }
+ }
+ return detected;
+ } else {
+ throw new IOException("Unable to prsed LanguageIdent
Service response! "
+ +" Root Element MUST BE an Json Array (received:
"+root+")");
+ }
+ } finally {
+ //ensure that the stream is closed
+ IOUtils.closeQuietly(in);
+ }
+ }
+ }
+
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return SERVICE_PROPERTIES;
+ }
+ /**
+ * Activate and read the properties. Configures and initialises a
POSTagger for each language configured in
+ * CONFIG_LANGUAGES.
+ *
+ * @param ce the {@link org.osgi.service.component.ComponentContext}
+ */
+ @Activate
+ protected void activate(ComponentContext ce) throws
ConfigurationException, IOException {
+ super.activate(ce);
+ log.info("activate {} '{}'",getClass().getSimpleName(),getName());
+ @SuppressWarnings("unchecked")
+ Dictionary<String, Object> properties = ce.getProperties();
+
+ Object value = properties.get(ANALYSIS_SERVICE_URL);
+ if(value == null){
+ throw new ConfigurationException(ANALYSIS_SERVICE_URL,
+ "The RESTful Language Identification Service URL is missing in
the provided configuration!");
+ } else {
+ try {
+ serviceUrl = new URI(value.toString());
+ log.info(" ... service: {}",serviceUrl);
+ } catch (URISyntaxException e) {
+ throw new ConfigurationException(ANALYSIS_SERVICE_URL,
+ "The parsed RESTful Language Identification Service
URL '"+ value
+ + "'is not a valid URL!",e);
+ }
+ }
+ String usr;
+ String pwd;
+ value = properties.get(ANALYSIS_SERVICE_USER);
+ if(value != null && !value.toString().isEmpty()){
+ usr = value.toString();
+ value = properties.get(ANALYSIS_SERVICE_PWD);
+ pwd = value == null ? null : value.toString();
+ } else { // no user set
+ usr = null;
+ pwd = null;
+ }
+
+ //init the http client
+ httpParams = new BasicHttpParams();
+ httpParams.setParameter(CoreProtocolPNames.USER_AGENT,
+ "Apache Stanbol RESTful Language Identification Engine");
+ httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, true);
+ httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS, 3);
+ httpParams.setBooleanParameter(CoreConnectionPNames.SO_KEEPALIVE,
true);
+
+ connectionManager = new PoolingClientConnectionManager();
+ connectionManager.setMaxTotal(20);
+ connectionManager.setDefaultMaxPerRoute(20);
+
+ httpClient = new DefaultHttpClient(connectionManager,httpParams);
+ if(usr != null){
+ log.info(" ... setting user to {}",usr);
+ httpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
+ new UsernamePasswordCredentials(usr, pwd));
+ // And add request interceptor to have preemptive authentication
+ httpClient.addRequestInterceptor(new PreemptiveAuthInterceptor(),
0);
+ }
+ }
+
+ @Deactivate
+ protected void deactivate(ComponentContext context) {
+ //shutdown the Http Client
+ httpClient = null;
+ httpParams = null;
+ connectionManager.shutdown();
+ connectionManager = null;
+ super.deactivate(context);
+ }
+
+ /**
+ * Getter for the language of the content
+ * @param ci the ContentItem
+ * @param exception <code>false</code> id used in {@link
#canEnhance(ContentItem)}
+ * and <code>true</code> when called from {@link
#computeEnhancements(ContentItem)}
+ * @return the AnalysedText or <code>null</code> if not found.
+ * @throws IllegalStateException if exception is <code>true</code> and the
+ * language could not be retrieved from the parsed {@link ContentItem}.
+ */
+ public static Entry<UriRef,Blob> getPlainText(EnhancementEngine engine,
ContentItem ci, boolean exception) {
+ Entry<UriRef,Blob> textBlob = ContentItemHelper.getBlob(
+ ci, singleton("text/plain"));
+ if(textBlob != null) {
+ return textBlob;
+ }
+ if(exception){
+ throw new IllegalStateException("Unable to retrieve 'text/plain'
ContentPart for ContentItem "
+ + ci+". As this is also checked in canEnhancer this may
indicate an Bug in the "
+ + "used EnhancementJobManager!");
+ } else {
+ log.warn("The Enhancement Engine '{} (impl: {})' CAN NOT enhance "
+ + "ContentItem {} because no 'text/plain' ContentPart is "
+ + "present in this ContentItem. Users that need to enhance
"
+ + "non-plain-text Content need to add an EnhancementEngine
"
+ + "that supports the conversion of '{}' files to plain
text "
+ + "to the current EnhancementChain!",
+ new Object[]{engine.getName(),
engine.getClass().getSimpleName(),ci,ci.getMimeType()});
+ return null;
+ }
+ }
+
+ /**
+ * HttpRequestInterceptor for preemptive authentication, based on
httpclient
+ * 4.0 example
+ */
+ private static class PreemptiveAuthInterceptor implements
HttpRequestInterceptor {
+
+ public void process(HttpRequest request, HttpContext context) throws
HttpException, IOException {
+
+ AuthState authState = (AuthState)
context.getAttribute(ClientContext.TARGET_AUTH_STATE);
+ CredentialsProvider credsProvider = (CredentialsProvider)
context.getAttribute(ClientContext.CREDS_PROVIDER);
+ HttpHost targetHost = (HttpHost)
context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
+
+ // If not auth scheme has been initialized yet
+ if (authState.getAuthScheme() == null) {
+ AuthScope authScope = new AuthScope(targetHost.getHostName(),
targetHost.getPort());
+
+ // Obtain credentials matching the target host
+ Credentials creds = credsProvider.getCredentials(authScope);
+
+ // If found, generate BasicScheme preemptively
+ if (creds != null) {
+ authState.update(new BasicScheme(), creds);
+ }
+ }
+ }
+ }
+}
Added:
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1436763&view=auto
==============================================================================
---
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/metatype.properties
(added)
+++
stanbol/trunk/enhancement-engines/restful-langident/src/main/resources/OSGI-INF/metatype/metatype.properties
Tue Jan 22 06:41:12 2013
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+org.apache.stanbol.enhancer.engines.restful.langident.impl.RestfulLangidentEngine.name=Apache
\
+Stanbol Enhancer Engine: RESTful Language Identification Engine
+org.apache.stanbol.enhancer.engines.restful.langident.impl.RestfulLangidentEngine.description=Enhancement
\
+Engine that uses a RESTful Language Identification service to enhance parsed
Text.
+
+
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as
\
+used in the RESTful interface '/engine/<name>'
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are
active the \
+one with the higher ranking will be used to process parsed content items.
+
+enhancer.engine.restful.langident.service.name=Service URL
+enhancer.engine.restful.langident.service.description=The {service-baseuri} of
\
+ the endpoint implementing the RESTful service as specified by STANBOL-892
+
+enhancer.engine.restful.langident.service.user.name=User Name
+enhancer.engine.restful.langident.service.user.description=The user name for \
+ the service (optional)
+
+enhancer.engine.restful.nlp.analysis.service.pwd.name=Password
+enhancer.engine.restful.nlp.analysis.service.pwd.description=The password for \
+ the service (optional)
+