Author: rwesten
Date: Thu Apr 16 07:29:25 2015
New Revision: 1674010
URL: http://svn.apache.org/r1674010
Log:
implementation for STANBOL-1417 for the 0.12 branch
Modified:
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
Modified:
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java?rev=1674010&r1=1674009&r2=1674010&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
(original)
+++
stanbol/branches/release-0.12/enhancer/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/reader/ContentItemReader.java
Thu Apr 16 07:29:25 2015
@@ -20,7 +20,16 @@ import static javax.ws.rs.core.MediaType
import static
org.apache.stanbol.enhancer.jersey.utils.RequestPropertiesHelper.REQUEST_PROPERTIES_URI;
import static
org.apache.stanbol.enhancer.jersey.utils.RequestPropertiesHelper.PARSED_CONTENT_URIS;
import static
org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATED;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENHANCEMENT;
+import static
org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import java.io.IOException;
import java.io.InputStream;
@@ -32,10 +41,12 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -51,6 +62,7 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.ext.MessageBodyReader;
import javax.ws.rs.ext.Provider;
+import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
@@ -72,6 +84,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StreamSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
@@ -93,6 +106,10 @@ public class ContentItemReader implement
private HttpServletRequest request;
public static final MediaType MULTIPART =
MediaType.valueOf(MediaType.MULTIPART_FORM_DATA_TYPE.getType()+"/*");
+ /**
+ * Clerezza LiteralFactory
+ */
+ private LiteralFactory lf = LiteralFactory.getInstance();
public ContentItemReader(@Context ServletContext context) {
this.context = context;
@@ -283,12 +300,41 @@ public class ContentItemReader implement
//finally set the language of the content if explicitly parsed in the
request
String contentLanguage = getContentLanguage();
if(!StringUtils.isBlank(contentLanguage)){
- EnhancementEngineHelper.set(contentItem.getMetadata(),
contentItem.getUri(),
- DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
+ //language codes are case insensitive ... so we convert to lower
case
+ contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
+ createParsedLanguageAnnotation(contentItem,contentLanguage);
+// previously only the dc:language property was set to the contentItem.
However this
+// information is only used as fallback if no Language annotation is present.
However
+// if a user explicitly parses the language he expects this language to be used
+// so this was change with STANBOL-1417
+// EnhancementEngineHelper.set(contentItem.getMetadata(),
contentItem.getUri(),
+// DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
}
return contentItem;
}
/**
+ * Creates a fise:TextAnnotation for the explicitly parsed Content-Language
+ * header. The confidence of this annotation is set <code>1.0</code> (see
+ * <a
href="https://issues.apache.org/jira/browse/STANBOL-1417">STANBOL-1417</a>).
+ * @param ci the {@link ContentItem} to the the language annotation
+ * @param lang the parsed language
+ */
+ private void createParsedLanguageAnnotation(ContentItem ci, String lang){
+ MGraph m = ci.getMetadata();
+ UriRef la = new UriRef("urn:enhancement-"+
EnhancementEngineHelper.randomUUID());
+ //add the fise:Enhancement information
+ m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT));
+ m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION));
+ m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri()));
+ m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new
Date())));
+ m.add(new TripleImpl(la, DC_CREATOR,
lf.createTypedLiteral("Content-Language Header of the request")));
+ //add fise:TextAnnotation information as expected by a Language
annotation.
+ m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
+ m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang)));
+ //we set the confidence to 1.0
+ m.add(new TripleImpl(la, ENHANCER_CONFIDENCE,
lf.createTypedLiteral(Float.valueOf(1.0f))));
+ }
+ /**
* tries to retrieve the ContentItem from the 'uri' query parameter of the
* {@link #request}.
* @return the parsed URI or <code>null</code> if none