Author: rwesten
Date: Sat Jan 19 20:53:32 2013
New Revision: 1435689

URL: http://svn.apache.org/viewvc?rev=1435689&view=rev
Log:
STANBOL-893: Implementation of the RESTful NLP analysis engine

Added:
    stanbol/trunk/enhancement-engines/restful-nlp/   (with props)
    stanbol/trunk/enhancement-engines/restful-nlp/pom.xml
    stanbol/trunk/enhancement-engines/restful-nlp/src/
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/
    stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/
    
stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties

Propchange: stanbol/trunk/enhancement-engines/restful-nlp/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Sat Jan 19 20:53:32 2013
@@ -0,0 +1,7 @@
+target
+
+.classpath
+
+.settings
+
+.project

Added: stanbol/trunk/enhancement-engines/restful-nlp/pom.xml
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-nlp/pom.xml?rev=1435689&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/restful-nlp/pom.xml (added)
+++ stanbol/trunk/enhancement-engines/restful-nlp/pom.xml Sat Jan 19 20:53:32 
2013
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
+  license agreements. See the NOTICE file distributed with this work for 
additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <groupId>org.apache.stanbol</groupId>
+    <version>0.10.0-SNAPSHOT</version>
+    <relativePath>../../enhancer/parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.restful.nlp</artifactId>
+  <version>0.10.0-SNAPSHOT</version>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancement Engine : RESTful NLP processing</name>
+  <description>A Stanbol engine that uses a standadized API to call
+    a RESTful service for NLP processing. The service is based on JSON 
serialized
+    AnalysedText ContentPart. </description>
+
+  <inceptionYear>2013</inceptionYear>
+
+  <scm>
+    <connection>
+      
scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/restful-nlp/
+    </connection>
+    <developerConnection>
+      
scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/restful-nlp/
+    </developerConnection>
+    <url>http://stanbol.apache.org/</url>
+  </scm>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Import-Package>
+              org.apache.stanbol.enhancer.servicesapi; provide:=true,
+              org.apache.stanbol.enhancer.servicesapi.impl; provide:=true,
+              *
+            </Import-Package>
+            <Private-Package>
+              org.apache.stanbol.enhancer.engines.restful.nlp.impl
+            </Private-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 License -->
+            <exclude>src/license/THIRD-PARTY.properties</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp.json</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore-osgi</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient-osgi</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+  </dependencies>
+
+</project>

Added: 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java?rev=1435689&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java
 (added)
+++ 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java
 Sat Jan 19 20:53:32 2013
@@ -0,0 +1,531 @@
+/*
+ * Copyright (c) 2012 Sebastian Schaffert
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package org.apache.stanbol.enhancer.engines.restful.nlp.impl;
+
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
+import static 
org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import javax.print.attribute.TextSyntax;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.io.IOUtils;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpException;
+import org.apache.http.HttpHeaders;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpRequest;
+import org.apache.http.HttpRequestInterceptor;
+import org.apache.http.HttpResponse;
+import org.apache.http.StatusLine;
+import org.apache.http.auth.AuthScope;
+import org.apache.http.auth.AuthState;
+import org.apache.http.auth.Credentials;
+import org.apache.http.auth.UsernamePasswordCredentials;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.CredentialsProvider;
+import org.apache.http.client.HttpResponseException;
+import org.apache.http.client.ResponseHandler;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.params.ClientPNames;
+import org.apache.http.client.protocol.ClientContext;
+import org.apache.http.entity.ContentType;
+import org.apache.http.entity.InputStreamEntity;
+import org.apache.http.impl.auth.BasicScheme;
+import org.apache.http.impl.client.BasicResponseHandler;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.impl.conn.PoolingClientConnectionManager;
+import org.apache.http.message.BasicHeader;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.CoreConnectionPNames;
+import org.apache.http.params.CoreProtocolPNames;
+import org.apache.http.protocol.ExecutionContext;
+import org.apache.http.protocol.HttpContext;
+import org.apache.http.util.EntityUtils;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextParser;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
+import org.apache.stanbol.enhancer.nlp.model.Sentence;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
+import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.osgi.framework.Constants;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An enhancement engine that uses a RESTful service for NLP processing of
+ * the pain text content part of processed {@link ContentItem}s.<p>
+ * The RESTful API of the remote service is standardized by STANBOL-TODO <p>
+ * 
+ * @author Rupert Westenthaler
+ */
+
+@Component(immediate = true, metatype = true,  policy = 
ConfigurationPolicy.REQUIRE)
+@Service
+@Properties(value={
+        @Property(name= EnhancementEngine.PROPERTY_NAME,value="changeme"),
+        @Property(name=RestfulNlpAnalysisEngine.CONFIG_LANGUAGES, value = 
{"*"},cardinality=Integer.MAX_VALUE),
+        @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_URL, value 
="http://changeme";),
+        @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_USER, value 
=""),
+        @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_PWD, value 
=""),
+        @Property(name=Constants.SERVICE_RANKING,intValue=0)
+})
+public class RestfulNlpAnalysisEngine extends 
AbstractEnhancementEngine<IOException,RuntimeException> implements 
ServiceProperties {
+
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+    
+    /**
+     * The URI for the remote analyses service
+     */
+    public static final String ANALYSIS_SERVICE_URL = 
"enhancer.engine.restful.nlp.analysis.service";
+    /**
+     * The User for the remote analyses service
+     */
+    public static final String ANALYSIS_SERVICE_USER = 
"enhancer.engine.restful.nlp.analysis.service.user";
+    /**
+     * The User for the remote analyses service
+     */
+    public static final String ANALYSIS_SERVICE_PWD = 
"enhancer.engine.restful.nlp.analysis.service.pwd";
+    
+    /**
+     * Language configuration. Takes a list of ISO language codes to be 
processed
+     * by this engine. This list will be joined with the list of languages 
supported
+     * by the RESTful NLP analysis service.
+     */
+    public static final String CONFIG_LANGUAGES = 
"enhancer.engine.restful.nlp.languages";
+
+    /**
+     * The maximum size of the preix/suffix for the selection context
+     */
+    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
+
+    private static final Map<String,Object> SERVICE_PROPERTIES;
+    static {
+        Map<String,Object> props = new HashMap<String,Object>();
+        //by default register as Tokenizing engine
+        props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING, 
+            ServiceProperties.ORDERING_NLP_TOKENIZING);
+//        props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE, 
+//            NlpProcessingRole.Tokenizing);
+        SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
+    }
+
+
+    private static Logger log = 
LoggerFactory.getLogger(RestfulNlpAnalysisEngine.class);
+
+    private URI analysisServiceUrl;
+    
+    //Langauge configuration
+    private LanguageConfiguration languageConfig = new 
LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"});
+    
+    private final Set<String> supportedLanguages = new HashSet<String>();
+    
+    protected DefaultHttpClient httpClient;
+    private BasicHttpParams httpParams;
+    private PoolingClientConnectionManager connectionManager;
+    /**
+     * List of HttpHeaders reused for each request. This avoids to re-create 
them
+     * for every request
+     */
+    private static List<? extends Header> DEFAULT_HEADERS = Arrays.asList(
+        new BasicHeader(HttpHeaders.ACCEPT_ENCODING, UTF8.name()),
+        new BasicHeader(HttpHeaders.CONTENT_TYPE, "text/plain; 
charset="+UTF8.name()));
+    
+    @Reference
+    private AnalysedTextFactory analysedTextFactory;
+    
+    /**
+     * Used to parse {@link AnalysedText} instances from responses of the
+     * RESTful analysis service.
+     */
+    @Reference
+    private AnalyzedTextParser analyzedTextParser;
+    
+    /**
+     * Indicate if this engine can enhance supplied ContentItem, and if it
+     * suggests enhancing it synchronously or asynchronously. The
+     * {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} 
can force sync/async mode if desired, it is
+     * just a suggestion from the engine.
+     * <p/>
+     * Returns ENHANCE_ASYNC in case there is a text/plain content part and a 
tagger for the language identified for
+     * the content item, CANNOT_ENHANCE otherwise.
+     *
+     * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+     *          if the introspecting process of the content item
+     *          fails
+     */
+    @Override
+    public int canEnhance(ContentItem ci) throws EngineException {
+        // check if content is present
+        Map.Entry<UriRef,Blob> entry = NlpEngineHelper.getPlainText(this, ci, 
false);
+        if(entry == null || entry.getValue() == null) {
+            return CANNOT_ENHANCE;
+        }
+
+        String language = getLanguage(this,ci,false);
+        if(language == null) {
+            return CANNOT_ENHANCE;
+        }
+        if(!languageConfig.isLanguage(language)){
+            log.trace(" > can NOT enhance ContentItem {} because language {} 
is "
+                + "not enabled by this engines configuration",ci,language);
+            return CANNOT_ENHANCE;
+        }
+        if(!supportedLanguages.contains(language)){
+            log.trace(" > the RESTful Analysis service does not support '{}' 
(supported: {})",
+                language, supportedLanguages);
+        }
+        log.trace(" > can enhance ContentItem {} with language 
{}",ci,language);
+        return ENHANCE_ASYNC;
+    }
+
+    /**
+     * Compute enhancements for supplied ContentItem. The results of the 
process
+     * are expected to be stored in the metadata of the content item.
+     * <p/>
+     * The client (usually an {@link 
org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take 
care of
+     * persistent storage of the enhanced {@link 
org.apache.stanbol.enhancer.servicesapi.ContentItem}.
+     * <p/>
+     * This method creates a new POSContentPart using {@link 
org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} 
from a text/plain part and
+     * stores it as a new part in the content item. The metadata is not 
changed.
+     *
+     * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
+     *          if the underlying process failed to work as
+     *          expected
+     */
+    @Override
+    public void computeEnhancements(ContentItem ci) throws EngineException {
+        //get the plain text Blob
+        Map.Entry<UriRef,Blob> textBlob = NlpEngineHelper.getPlainText(this, 
ci, false);
+        Blob blob = textBlob.getValue();
+        //send the text to the server
+        String language = getLanguage(this, ci, true);
+        HttpPost request = new HttpPost(analysisServiceUrl);
+        request.addHeader(HttpHeaders.CONTENT_LANGUAGE, language);
+        request.setEntity(new InputStreamEntity(
+            blob.getStream(), blob.getContentLength(),
+            ContentType.create(blob.getMimeType(), 
+                blob.getParameter().get("charset"))));
+        //execute the request
+        AnalysedText at;
+        try {
+            at = httpClient.execute(request, new AnalysisResponseHandler(ci, 
textBlob));
+        } catch (ClientProtocolException e) {
+            throw new EngineException(this, ci, "Exception while executing 
Request "
+                + "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
+        } catch (IOException e) {
+            throw new EngineException(this, ci, "Exception while executing 
Request "
+                    + "on RESTful NLP Analysis Service at 
"+analysisServiceUrl, e);
+        }
+        Iterator<Span> spans = 
at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
+        Sentence context = null;
+        MGraph metadata = ci.getMetadata();
+        Language lang = new Language(language);
+        LiteralFactory lf = LiteralFactory.getInstance();
+        ci.getLock().writeLock().lock();
+        try { //write TextAnnotations for Named Entities
+            while(spans.hasNext()){
+                Span span = spans.next();
+                switch (span.getType()) {
+                    case Sentence:
+                        context = (Sentence)context;
+                        break;
+                    default:
+                        Value<NerTag> nerAnno = 
span.getAnnotation(NER_ANNOTATION);
+                        if(nerAnno != null){
+                            UriRef ta = 
EnhancementEngineHelper.createTextEnhancement(ci, this);
+                            //add span related data
+                            metadata.add(new TripleImpl(ta, 
ENHANCER_SELECTED_TEXT, 
+                                new PlainLiteralImpl(span.getSpan(), lang)));
+                            metadata.add(new TripleImpl(ta, ENHANCER_START, 
+                                lf.createTypedLiteral(span.getStart())));
+                            metadata.add(new TripleImpl(ta, ENHANCER_END, 
+                                lf.createTypedLiteral(span.getEnd())));
+                            metadata.add(new TripleImpl(ta, 
ENHANCER_SELECTION_CONTEXT, 
+                                new PlainLiteralImpl(context == null ?
+                                        
getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
+                                            context.getSpan(), lang)));
+                            //add the NER type
+                            if(nerAnno.value().getType() != null){
+                                metadata.add(new 
TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
+                            }
+                            if(nerAnno.probability() >= 0) {
+                                metadata.add(new TripleImpl(ta, 
ENHANCER_CONFIDENCE, 
+                                    
lf.createTypedLiteral(nerAnno.probability())));
+                            }
+                        }
+                        break;
+                }
+            }
+        } finally {
+            ci.getLock().writeLock().unlock();
+        }
+    }
+
+    protected class AnalysisResponseHandler implements 
ResponseHandler<AnalysedText>{
+        
+        protected final ContentItem ci;
+        protected final Entry<UriRef,Blob> textBlob;
+
+
+        protected AnalysisResponseHandler(ContentItem ci, 
Map.Entry<UriRef,Blob> textBlob){
+            this.ci = ci;
+            this.textBlob = textBlob;
+        }
+
+        @Override
+        public AnalysedText handleResponse(HttpResponse response) throws 
ClientProtocolException, IOException {
+            StatusLine statusLine = response.getStatusLine();
+            HttpEntity entity = response.getEntity();
+            if (statusLine.getStatusCode() >= 300) {
+                EntityUtils.consume(entity);
+                throw new HttpResponseException(statusLine.getStatusCode(),
+                    statusLine.getReasonPhrase());
+            }
+            //parse the results
+            InputStream in = null;
+            try {
+                in = entity.getContent();
+                Charset charset = entity.getContentEncoding() != null ? 
+                        
Charset.forName(entity.getContentEncoding().getValue()) : UTF8;
+                //parse the received data and add it to the AnalysedText of 
the 
+                //contentItem
+                return parseAnalysedText(ci, textBlob, in, charset);
+            } finally {
+                //ensure that the stream is closed
+                IOUtils.closeQuietly(in);
+            }
+        }
+    }
+    
+    /**
+     * @param ci
+     * @param entry
+     * @param in
+     * @param charset
+     * @throws EngineException
+     */
+    private AnalysedText parseAnalysedText(ContentItem ci, 
Map.Entry<UriRef,Blob> entry,
+            InputStream in,Charset charset) throws IOException {
+        AnalysedText at;
+        ci.getLock().writeLock().lock();
+        try {
+            at = analysedTextFactory.createAnalysedText(ci, entry.getValue());
+        } finally {
+            ci.getLock().writeLock().unlock();
+        }
+        analyzedTextParser.parse(in, charset, at);
+        return at;
+    }
+
+    @Override
+    public Map<String,Object> getServiceProperties() {
+        return SERVICE_PROPERTIES;
+    }
+    /**
+     * Activate and read the properties. Configures and initialises a 
POSTagger for each language configured in
+     * CONFIG_LANGUAGES.
+     *
+     * @param ce the {@link org.osgi.service.component.ComponentContext}
+     */
+    @Activate
+    protected void activate(ComponentContext ce) throws 
ConfigurationException, IOException {
+        super.activate(ce);
+        log.info("activate {} '{}'",getClass().getSimpleName(),getName());
+        @SuppressWarnings("unchecked")
+        Dictionary<String, Object> properties = ce.getProperties();
+        languageConfig.setConfiguration(properties);
+
+        Object value = properties.get(ANALYSIS_SERVICE_URL);
+        if(value == null){
+            throw new ConfigurationException(ANALYSIS_SERVICE_URL, 
+                "The RESTful Analysis Service URL is missing in the provided 
configuration!");
+        } else {
+            try {
+                analysisServiceUrl = new URI(value.toString());
+                log.info("  ... service: {}",analysisServiceUrl);
+            } catch (URISyntaxException e) {
+                throw new ConfigurationException(ANALYSIS_SERVICE_URL, 
+                        "The parsed RESTful Analysis Service URL '"+ value
+                        + "'is not a valid URL!",e);
+            }
+        }
+        String usr;
+        String pwd;
+        value = properties.get(ANALYSIS_SERVICE_USER);
+        if(value != null && !value.toString().isEmpty()){
+            usr = value.toString();
+            value = properties.get(ANALYSIS_SERVICE_PWD);
+            pwd = value == null ? null : value.toString();
+        } else { // no user set
+            usr = null;
+            pwd = null;
+        }
+        
+        //init the http client
+        httpParams = new BasicHttpParams();
+        httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Apache Stanbol 
RESTful NLP Analysis Engine");
+        httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, true);
+        httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS, 3);
+        httpParams.setBooleanParameter(CoreConnectionPNames.SO_KEEPALIVE, 
true);
+
+        connectionManager = new PoolingClientConnectionManager();
+        connectionManager.setMaxTotal(20);
+        connectionManager.setDefaultMaxPerRoute(20);
+
+        httpClient = new DefaultHttpClient(connectionManager,httpParams);
+        if(usr != null){
+            log.info("  ... setting user to {}",usr);
+            httpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
+                new UsernamePasswordCredentials(usr, pwd));
+            // And add request interceptor to have preemptive authentication
+            httpClient.addRequestInterceptor(new PreemptiveAuthInterceptor(), 
0);
+        }
+        //get the supported languages
+        String supported = httpClient.execute(new HttpGet(analysisServiceUrl), 
+            new BasicResponseHandler());
+        for(String lang : supported.split("\\{\\[\",\\]\\}")){
+            supportedLanguages.add(lang);
+        }
+        
+    }
+    
+    @Deactivate
+    protected void deactivate(ComponentContext context) {
+        languageConfig.setDefault();
+        supportedLanguages.clear();
+        //shutdown the Http Client
+        httpClient = null;
+        httpParams = null;
+        connectionManager.shutdown();
+        connectionManager = null;
+        super.deactivate(context);
+    }
+    
+    /**
+     * Extracts the selection context based on the content, selection and
+     * the start char offset of the selection
+     * @param content the content
+     * @param selection the selected text
+     * @param selectionStartPos the start char position of the selection
+     * @return the context
+     */
+    private String getDefaultSelectionContext(String content, String 
selection,int selectionStartPos){
+        //extract the selection context
+        int beginPos;
+        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
+            beginPos = 0;
+        } else {
+            int start = 
selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            beginPos = content.indexOf(' ',start);
+            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
+                beginPos = start; //begin within a word
+            }
+        }
+        int endPos;
+        
if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE
 >= content.length()){
+            endPos = content.length();
+        } else {
+            int start = 
selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            endPos = content.lastIndexOf(' ', start);
+            if(endPos <= selectionStartPos+selection.length()){
+                endPos = start; //end within a word;
+            }
+        }
+        return content.substring(beginPos, endPos);
+    }
+    
+    /**
+     * HttpRequestInterceptor for preemptive authentication, based on 
httpclient
+     * 4.0 example
+     */
+    private static class PreemptiveAuthInterceptor implements 
HttpRequestInterceptor {
+
+        public void process(HttpRequest request, HttpContext context) throws 
HttpException, IOException {
+
+            AuthState authState = (AuthState) 
context.getAttribute(ClientContext.TARGET_AUTH_STATE);
+            CredentialsProvider credsProvider = (CredentialsProvider) 
context.getAttribute(ClientContext.CREDS_PROVIDER);
+            HttpHost targetHost = (HttpHost) 
context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
+
+            // If not auth scheme has been initialized yet
+            if (authState.getAuthScheme() == null) {
+                AuthScope authScope = new AuthScope(targetHost.getHostName(), 
targetHost.getPort());
+
+                // Obtain credentials matching the target host
+                Credentials creds = credsProvider.getCredentials(authScope);
+
+                // If found, generate BasicScheme preemptively
+                if (creds != null) {
+                    authState.update(new BasicScheme(), creds);
+                }
+            }
+        }
+    }
+}

Added: 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1435689&view=auto
==============================================================================
--- 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties
 (added)
+++ 
stanbol/trunk/enhancement-engines/restful-nlp/src/main/resources/OSGI-INF/metatype/metatype.properties
 Sat Jan 19 20:53:32 2013
@@ -0,0 +1,49 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+#
+
+org.apache.stanbol.enhancer.engines.restful.nlp.impl.RestfulNlpAnalysisEngine.name=Apache
 \
+Stanbol Enhancer Engine: RESTful NLP Analyser Engine
+org.apache.stanbol.enhancer.engines.restful.nlp.impl.RestfulNlpAnalysisEngine.description=Enhancement
 \
+Engine that uses a RESTful NLP Analysis service to enhance parsed Text.
+
+
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as 
\
+used in the RESTful interface '/engine/<name>'
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are 
active the \
+one with the higher ranking will be used to process parsed content items.
+
+enhancer.engine.restful.nlp.languages.name=Language configuration
+enhancer.engine.restful.nlp.languages.description=The list of ISO language 
codes \
+  that are processed by this Engine. NOTE: that lanuages will only be 
processed if \
+  they are enabled by this configuration AND supported by the RESTful analysis 
service. \
+  Syntyx: '*' is the Wildcard; '!{lang}' to exclude a language;
+  
+enhancer.engine.restful.nlp.analysis.service.name=Service URL
+enhancer.engine.restful.nlp.analysis.service.description=The {service-baseuri} 
of \
+  the endpoint implementing the RESTful service as specified by STANBOL-892
+  
+enhancer.engine.restful.nlp.analysis.service.user.name=User Name
+enhancer.engine.restful.nlp.analysis.service.user.description=The user name 
for \
+  the service (optional)
+  
+enhancer.engine.restful.nlp.analysis.service.user.name=Password
+enhancer.engine.restful.nlp.analysis.service.user.description=The password for 
\
+  the service (optional)
+ 


Reply via email to