Author: rwesten
Date: Thu Apr 16 07:29:25 2015
New Revision: 1674010

URL: http://svn.apache.org/r1674010
Log:
implementation for STANBOL-1417 for the 0.12 branch

Modified:
    
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java

Modified: 
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java?rev=1674010&r1=1674009&r2=1674010&view=diff
==============================================================================
--- 
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
 (original)
+++ 
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
 Thu Apr 16 07:29:25 2015
@@ -20,7 +20,16 @@ import static javax.ws.rs.core.MediaType
 import static 
org.apache.stanbol.enhancer.jersey.utils.RequestPropertiesHelper.REQUEST_PROPERTIES_URI;
 import static 
org.apache.stanbol.enhancer.jersey.utils.RequestPropertiesHelper.PARSED_CONTENT_URIS;
 import static 
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATED;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
 import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENHANCEMENT;
+import static 
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -32,10 +41,12 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
@@ -51,6 +62,7 @@ import javax.ws.rs.core.Response;
 import javax.ws.rs.ext.MessageBodyReader;
 import javax.ws.rs.ext.Provider;
 
+import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -72,6 +84,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.StreamSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.codehaus.jettison.json.JSONArray;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
@@ -93,6 +106,10 @@ public class ContentItemReader implement
     private HttpServletRequest request;
 
     public static final MediaType MULTIPART = 
MediaType.valueOf(MediaType.MULTIPART_FORM_DATA_TYPE.getType()+"/*");
+    /**
+     * Clerezza LiteralFactory
+     */
+    private LiteralFactory lf = LiteralFactory.getInstance();
 
     public ContentItemReader(@Context ServletContext context) {
         this.context = context;
@@ -283,12 +300,41 @@ public class ContentItemReader implement
         //finally set the language of the content if explicitly parsed in the 
request
         String contentLanguage = getContentLanguage();
         if(!StringUtils.isBlank(contentLanguage)){
-            EnhancementEngineHelper.set(contentItem.getMetadata(), 
contentItem.getUri(), 
-                DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
+            //language codes are case insensitive ... so we convert to lower 
case
+            contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
+            createParsedLanguageAnnotation(contentItem,contentLanguage);
+// previously only the dc:language property was set to the contentItem. 
However this
+// information is only used as fallback if no Language annotation is present. 
However
+// if a user explicitly parses the language he expects this language to be used
+// so this was change with STANBOL-1417
+//            EnhancementEngineHelper.set(contentItem.getMetadata(), 
contentItem.getUri(), 
+//                DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
         }
         return contentItem;
     }
     /**
+     * Creates a fise:TextAnnotation for the explicitly parsed Content-Language
+     * header. The confidence of this annotation is set <code>1.0</code> (see 
+     * <a 
href="https://issues.apache.org/jira/browse/STANBOL-1417";>STANBOL-1417</a>).
+     * @param ci the {@link ContentItem} to the the language annotation
+     * @param lang the parsed language
+     */
+    private void createParsedLanguageAnnotation(ContentItem ci, String lang){
+        MGraph m = ci.getMetadata();
+        UriRef la = new UriRef("urn:enhancement-"+ 
EnhancementEngineHelper.randomUUID());
+        //add the fise:Enhancement information
+        m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT));
+        m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION));
+        m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri()));
+        m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new 
Date())));
+        m.add(new TripleImpl(la, DC_CREATOR, 
lf.createTypedLiteral("Content-Language Header of the request")));
+        //add fise:TextAnnotation information as expected by a Language 
annotation.
+        m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
+        m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang)));
+        //we set the confidence to 1.0
+        m.add(new TripleImpl(la, ENHANCER_CONFIDENCE, 
lf.createTypedLiteral(Float.valueOf(1.0f))));
+    }
+    /**
      * tries to retrieve the ContentItem from the 'uri' query parameter of the
      * {@link #request}.
      * @return the parsed URI or <code>null</code> if none


Reply via email to