Modified: incubator/stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java?rev=1243965&r1=1243964&r2=1243965&view=diff ============================================================================== --- incubator/stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java (original) +++ incubator/stanbol/trunk/enhancer/jersey/src/test/java/org/apache/stanbol/enhancer/jersey/ContentItemReaderWriterTest.java Tue Feb 14 14:49:20 2012 @@ -1,30 +1,50 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.stanbol.enhancer.jersey; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.ENHANCEMENT_PROPERTIES_URI; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.OUTPUT_CONTENT; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.OUTPUT_CONTENT_PART; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.PARSED_CONTENT_URIS; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.RDF_FORMAT; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.getEnhancementProperties; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.getOutputContent; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.getOutputContentParts; +import static org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper.getParsedContentURIs; +import static org.apache.stanbol.enhancer.servicesapi.helper.ExecutionMetadataHelper.initExecutionMetadata; +import static org.apache.stanbol.enhancer.servicesapi.helper.ExecutionMetadataHelper.initExecutionMetadataContentPart; +import static org.apache.stanbol.enhancer.servicesapi.helper.ExecutionPlanHelper.createExecutionPlan; +import static org.apache.stanbol.enhancer.servicesapi.helper.ExecutionPlanHelper.writeExecutionNode; +import static org.apache.stanbol.enhancer.servicesapi.rdf.ExecutionMetadata.CHAIN_EXECUTION; import static org.junit.Assert.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import javax.ws.rs.core.HttpHeaders; import javax.ws.rs.core.MediaType; @@ -40,6 +60,7 @@ import org.apache.clerezza.rdf.ontologie import org.apache.commons.io.IOUtils; import org.apache.stanbol.commons.indexedgraph.IndexedMGraph; import org.apache.stanbol.enhancer.jersey.reader.ContentItemReader; +import org.apache.stanbol.enhancer.jersey.utils.EnhancementPropertiesHelper; import org.apache.stanbol.enhancer.jersey.writers.ContentItemWriter; import org.apache.stanbol.enhancer.servicesapi.Blob; import org.apache.stanbol.enhancer.servicesapi.ContentItem; @@ -51,10 +72,14 @@ import org.apache.stanbol.enhancer.servi import org.apache.stanbol.enhancer.servicesapi.rdf.ExecutionMetadata; import org.junit.BeforeClass; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.sun.jersey.core.util.StringKeyIgnoreCaseMultivaluedMap; public class ContentItemReaderWriterTest { + + private static final Logger log = LoggerFactory.getLogger(ContentItemReaderWriterTest.class); private static ContentItem contentItem; private static ContentItemWriter ciWriter; @@ -74,10 +99,17 @@ public class ContentItemReaderWriterTest "This is a ContentItem to Mime Multipart test!", "text/plain")); contentItem.getMetadata().add(new TripleImpl( new UriRef("urn:test"), RDF.type, new UriRef("urn:types:Document"))); - MGraph em =ExecutionMetadataHelper.initExecutionMetadataContentPart(contentItem); - NonLiteral ep = ExecutionPlanHelper.createExecutionPlan(em, "testChain"); - ExecutionPlanHelper.writeExecutionNode(em, ep, "testEngine", true, null); - ExecutionMetadataHelper.initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false); + //mark the main content as parsed and also that all + //contents and contentparts should be included + Map<String,Object> properties = getEnhancementProperties(contentItem); + properties.put(PARSED_CONTENT_URIS, Collections.singleton(contentItem.getPartUri(0).getUnicodeString())); + properties.put(OUTPUT_CONTENT, Collections.singleton("*/*")); + properties.put(OUTPUT_CONTENT_PART, Collections.singleton("*")); + properties.put(RDF_FORMAT, "application/rdf+xml"); + MGraph em = initExecutionMetadataContentPart(contentItem); + NonLiteral ep = createExecutionPlan(em, "testChain"); + writeExecutionNode(em, ep, "testEngine", true, null); + initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false); ciWriter = new ContentItemWriter(null); ciReader = new ContentItemReader(null); } @@ -108,6 +140,7 @@ public class ContentItemReaderWriterTest assertEquals(contentType.getParameters().get("charset"),"UTF-8"); //check the serialised multipart MIME String multipartMime = new String(out.toByteArray(),Charset.forName(contentType.getParameters().get("charset"))); + log.info("Multipart MIME content:\n{}\n",multipartMime); String[] tests = new String[]{ "--"+contentType.getParameters().get("boundary"), "Content-Disposition: form-data; name=\"metadata\"; filename=\"urn:test\"", @@ -126,7 +159,10 @@ public class ContentItemReaderWriterTest "This is a ContentItem to Mime Multipart test!", "--contentParts--", "--"+contentType.getParameters().get("boundary"), - "Content-Disposition: form-data; name=\"http://stanbol.apache.org/ontology/enhancer/executionMetadata#ChainExecution\"", + "Content-Disposition: form-data; name=\""+ENHANCEMENT_PROPERTIES_URI.getUnicodeString()+"\"", + "Content-Type: application/json; charset=UTF-8", + "--"+contentType.getParameters().get("boundary"), + "Content-Disposition: form-data; name=\""+CHAIN_EXECUTION.getUnicodeString()+"\"", "Content-Type: application/rdf+xml; charset=UTF-8", "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionplan#ExecutionNode\"/>", "--"+contentType.getParameters().get("boundary")+"--" @@ -142,9 +178,8 @@ public class ContentItemReaderWriterTest public void testReader() throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); MediaType contentType = serializeContentItem(out); - ContentItemReader cir = new ContentItemReader(null); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); - ContentItem ci = cir.readFrom(ContentItem.class, null, null, contentType, null, in); + ContentItem ci = ciReader.readFrom(ContentItem.class, null, null, contentType, null, in); //assert ID assertEquals(contentItem.getUri(), ci.getUri()); //assert metadata @@ -159,9 +194,11 @@ public class ContentItemReaderWriterTest assertEquals(content, readContent); Iterator<Entry<UriRef,Blob>> contentItemBlobsIt = ContentItemHelper.getContentParts(contentItem, Blob.class).entrySet().iterator(); Iterator<Entry<UriRef,Blob>> ciBlobsIt = ContentItemHelper.getContentParts(ci, Blob.class).entrySet().iterator(); + Set<String> expectedParsedContentIds = new HashSet<String>(); //later used to validate enhancementMetadata while(contentItemBlobsIt.hasNext() && ciBlobsIt.hasNext()){ Entry<UriRef,Blob> contentItemBlobPart = contentItemBlobsIt.next(); Entry<UriRef,Blob> ciBlobPart = ciBlobsIt.next(); + expectedParsedContentIds.add(ciBlobPart.getKey().getUnicodeString()); assertEquals(contentItemBlobPart.getKey(), ciBlobPart.getKey()); String partContentType = contentItemBlobPart.getValue().getMimeType(); String readPartContentType = ciBlobPart.getValue().getMimeType(); @@ -170,11 +207,22 @@ public class ContentItemReaderWriterTest String readPartContent = IOUtils.toString(ciBlobPart.getValue().getStream(), "UTF-8"); assertEquals(partContent, readPartContent); } + //validate ExecutionMetadata MGraph executionMetadata = contentItem.getPart(ExecutionMetadata.CHAIN_EXECUTION, MGraph.class); MGraph readExecutionMetadata = ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, MGraph.class); assertNotNull(executionMetadata); assertNotNull(readExecutionMetadata); assertEquals(executionMetadata.size(), readExecutionMetadata.size()); + //validate EnhancemetnProperties + Map<String,Object> properties = getEnhancementProperties(ci); + //the parsed value MUST BE overridden by the two content parts parsed + assertEquals(expectedParsedContentIds, getParsedContentURIs(properties)); + Collection<String> outputContent = getOutputContent(properties); + assertEquals(1, outputContent.size()); + assertEquals(outputContent.iterator().next(), "*/*"); + Collection<String> outputContentPart = Collections.singleton("*"); + assertEquals(1, outputContentPart.size()); + assertEquals(outputContentPart.iterator().next(), "*"); } }
Modified: incubator/stanbol/trunk/integration-tests/pom.xml URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/pom.xml?rev=1243965&r1=1243964&r2=1243965&view=diff ============================================================================== --- incubator/stanbol/trunk/integration-tests/pom.xml (original) +++ incubator/stanbol/trunk/integration-tests/pom.xml Tue Feb 14 14:49:20 2012 @@ -81,6 +81,14 @@ <artifactId>commons-io</artifactId> </dependency> <dependency> + <groupId>org.apache.httpcomponents</groupId> + <artifactId>httpcore-osgi</artifactId> + </dependency> + <dependency> + <groupId>org.apache.httpcomponents</groupId> + <artifactId>httpmime</artifactId> + </dependency> + <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> </dependency> @@ -96,6 +104,15 @@ <groupId>org.codehaus.jettison</groupId> <artifactId>jettison</artifactId> </dependency> + <!-- Used to access constants such as Ontology URIs --> + <dependency> + <groupId>org.apache.stanbol</groupId> + <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId> + </dependency> + <dependency> + <groupId>org.apache.clerezza</groupId> + <artifactId>rdf.ontologies</artifactId> + </dependency> </dependencies> <build> Modified: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java?rev=1243965&r1=1243964&r2=1243965&view=diff ============================================================================== --- incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java (original) +++ incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/commons/httpqueryheaders/it/HttpQueryHeaderPostTest.java Tue Feb 14 14:49:20 2012 @@ -64,7 +64,7 @@ public class HttpQueryHeaderPostTest ext "header_Accept",""))) //override the parse Accept Header .withHeader("Accept","text/turtle") //set Accept to turtle (overridden) .withContent("John Smith was born in London. But since ten years he " + - "lives now in Paris.") + "works for the Smith Coorperation and lives in Paris.") ) .assertStatus(200) //check for JSON-LD (the default content type Added: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/ContentTranformationTest.java URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/ContentTranformationTest.java?rev=1243965&view=auto ============================================================================== --- incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/ContentTranformationTest.java (added) +++ incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/ContentTranformationTest.java Tue Feb 14 14:49:20 2012 @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.stanbol.enhancer.it; + +import static org.apache.stanbol.enhancer.it.MultipartContentItemTestUtils.getHTMLContent; + +import java.io.IOException; + +import org.junit.Test; +/** + * This tests RESTful API extensions to the Stanbol Enhancer as described by + * STANBOL-481 + */ +public class ContentTranformationTest extends EnhancerTestBase { + + public static final String[] TEXT_CONTENT = new String[]{ + "Stanbol Content Transformation", + "The multipart content API of Apache Stanbol allows to directly " + + "request transformed content by adding the \"omitMetadata=true\" " + + "query parameter and setting the \"Accept\" header to the target" + + "content type.", + "This feature can be used with any enhancement chain that " + + "incudles an Engine that provides the required content transcoding" + + "functionality. However because extracted metadata are omitted by" + + "such requests it is best used with enhancement chains that only" + + "contains such engines." + }; + + + public ContentTranformationTest() { + //for now use the language chain to test transforming + super(getChainEndpoint("language"),"metaxa","langid"); + } + + @Test + public void testHtml2PlainText() throws IOException { + executor.execute( + builder.buildPostRequest(getEndpoint()+"?omitMetadata=true") + .withHeader("Accept","text/plain") + .withContent(getHTMLContent(TEXT_CONTENT)) + ) + .assertStatus(200) + .assertContentType("text/plain") + .assertContentContains(TEXT_CONTENT); + + } +} Propchange: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/ContentTranformationTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartContentItemTestUtils.java URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartContentItemTestUtils.java?rev=1243965&view=auto ============================================================================== --- incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartContentItemTestUtils.java (added) +++ incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartContentItemTestUtils.java Tue Feb 14 14:49:20 2012 @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.stanbol.enhancer.it; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.http.NameValuePair; +import org.apache.http.client.utils.URLEncodedUtils; +import org.apache.http.message.BasicNameValuePair; + +public class MultipartContentItemTestUtils { + + public static String getHTMLContent(String...content){ + if(content == null || content.length<2){ + throw new IllegalArgumentException("The parsed content MUST have at lest two elements"); + } + StringBuilder c = new StringBuilder(); + c.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" " + + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"); + c.append("<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" " + + "lang=\"en\" dir=\"ltr\">\n"); + c.append("<head>\n"); + c.append("<meta http-equiv=\"Content-Type\" content=\"text/html; " + + "charset=utf-8\" />\n"); + c.append("<title>").append(content[0]).append("</title>\n"); + c.append(" <meta http-equiv=\"Content-Type\" content=\"text/html; " + + "charset=utf-8\" />\n"); + c.append("<style type=\"text/css\">"); + c.append("#headbox {\n"); + c.append(" background: none repeat scroll 0 0 white;\n"); + c.append(" border-bottom: 3px solid black;\n"); + c.append(" width: 100%;\n"); + c.append("}\n"); + c.append("</style>\n"); + c.append("</head>\n"); + c.append("<body>\n"); + c.append("<div class=\"content\">\n"); + c.append("<h2>").append(content[0]).append("</h2>\n"); + for(int i=1;i<content.length;i++){ + c.append("<p>").append(content[i]).append("</p>\n"); + } + c.append("</div>\n"); + c.append("</body>\n"); + c.append("</html>\n"); + return c.toString(); + } + + + /** + * Build an path from the supplied path and + * query parameters. + * + * @param queryParameters an even number of Strings, each pair + * of values represents the key and value of a query parameter. + * Keys and values are encoded by this method. + */ + public static String buildPathWithParams(String path, String... queryParameters) { + final StringBuilder sb = new StringBuilder(); + if (queryParameters == null || queryParameters.length == 0) { + sb.append(path); + } else if (queryParameters.length % 2 != 0) { + throw new IllegalArgumentException("Invalid number of queryParameters arguments (" + + queryParameters.length + "), must be even"); + } else { + final List<NameValuePair> p = new ArrayList<NameValuePair>(); + for (int i = 0; i < queryParameters.length; i += 2) { + p.add(new BasicNameValuePair(queryParameters[i], queryParameters[i + 1])); + } + sb.append(path); + sb.append("?"); + sb.append(URLEncodedUtils.format(p, "UTF-8")); + } + + return sb.toString(); + } +} Propchange: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartContentItemTestUtils.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartRequestTest.java URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartRequestTest.java?rev=1243965&view=auto ============================================================================== --- incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartRequestTest.java (added) +++ incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartRequestTest.java Tue Feb 14 14:49:20 2012 @@ -0,0 +1,598 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.stanbol.enhancer.it; + +import static org.apache.stanbol.enhancer.it.MultipartContentItemTestUtils.buildPathWithParams; +import static org.apache.stanbol.enhancer.it.MultipartContentItemTestUtils.getHTMLContent; +import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_ORGANISATION; +import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PERSON; +import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PLACE; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; + +import org.apache.clerezza.rdf.core.BNode; +import org.apache.clerezza.rdf.core.MGraph; +import org.apache.clerezza.rdf.core.Resource; +import org.apache.clerezza.rdf.core.UriRef; +import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl; +import org.apache.clerezza.rdf.core.impl.SimpleMGraph; +import org.apache.clerezza.rdf.core.impl.TripleImpl; +import org.apache.clerezza.rdf.core.serializedform.Serializer; +import org.apache.clerezza.rdf.core.serializedform.SupportedFormat; +import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider; +import org.apache.clerezza.rdf.ontologies.RDF; +import org.apache.http.entity.mime.FormBodyPart; +import org.apache.http.entity.mime.HttpMultipart; +import org.apache.http.entity.mime.MIME; +import org.apache.http.entity.mime.MultipartEntity; +import org.apache.http.entity.mime.content.AbstractContentBody; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.entity.mime.content.ContentDescriptor; +import org.apache.http.entity.mime.content.StringBody; +import org.apache.stanbol.enhancer.servicesapi.ContentItem; +import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine; +import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper; +import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses; +import org.apache.stanbol.enhancer.servicesapi.rdf.Properties; +import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This tests RESTful API extensions to the Stanbol Enhancer as described by + * STANBOL-481 + */ +public class MultipartRequestTest extends EnhancerTestBase { + private static final Charset UTF8 = Charset.forName("UTF-8"); + private static Serializer serializer = new Serializer(); + static { + serializer.bindSerializingProvider(new JenaSerializerProvider()); + } + + + private static final Logger log = LoggerFactory.getLogger(MultipartRequestTest.class); + + private final String TEXT_CONTENT = "The Apache Stanbol Enhancer.\n" + + "The Stanbol enhancer can detect famous cities such as Paris and " + + "people such as Bob Marley."; + private final String[] TEXT_CONTENT_LINES = TEXT_CONTENT.split("\n"); + private final String HTML_CONTENT = getHTMLContent(TEXT_CONTENT_LINES); + private final String[] HTML_CONTENT_LINES = HTML_CONTENT.split("\n"); + public MultipartRequestTest() { + super(); //use the default endpoint + } + + @Test + public void testIllegalRdfFormat() throws IOException { + String[] params = new String []{ + "outputContent","*/*", + "rdfFormat","notAvalidMimeFormat"}; + executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(400); //BAD_REQUEST + } + @Test + public void testIllegalOutputContent() throws IOException { + String[] params = new String []{ + "outputContent","notAvalidMimeFormat"}; + executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(400); //BAD_REQUEST + } + @Test + public void testDefaultContentTypes() throws IOException { + //'*/*', 'text/plain' and 'application/octet-stream' where considered as + // Indicators that the default RDF serialisation format for the metadata + // should be used. + //This is basically testing backward compatibility + + String[] jsonLDMetadataTests = new String[]{ + "\"@subject\": \"http://dbpedia.org/resource/Paris\",", + "\"@subject\": \"http://dbpedia.org/resource/Bob_Marley\",", + "\"dc:creator\": \"org.apache.stanbol.enhancer.engines.entitytagging.impl.NamedEntityTaggingEngine\"," + }; + String[] params = new String []{ + "outputContent","text/plain" + }; + executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","text/plain") //must be multipart/from-data + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) //metadata as JSONLD + .assertContentRegexp(jsonLDMetadataTests); + + params = new String []{ + "outputContent","application/octet-stream"}; //omitMetadata=false + executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","text/plain") //must be multipart/from-data + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) //metadata as JSONLD + .assertContentRegexp(jsonLDMetadataTests); + + params = new String []{ + "outputContent","application/octet-stream"}; //omitMetadata=false + executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","text/plain") //must be multipart/from-data + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) //metadata as JSONLD + .assertContentRegexp(jsonLDMetadataTests); + } + + @Test + public void testOutputMetadataAndAllContent() throws IOException { + String[] params = new String []{ + "outputContent","*/*", + "rdfFormat","text/rdf+nt"}; + String content = executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) + .assertContentContains( + "--contentItem", + "--contentItem--", + "Content-Disposition: form-data; name=\"content\"", + "Content-Type: multipart/alternate; boundary=contentParts; charset=UTF-8", + "Content-Type: text/plain; charset=UTF-8", + "Content-Type: text/html", + "--contentParts", + "--contentParts--") + .assertContentContains(TEXT_CONTENT_LINES) + .assertContentContains(HTML_CONTENT_LINES) //line by line the HTML content + .assertContentRegexp( + "Content-Disposition: form-data; name=\"metadata\"; filename=.*", + "Content-Disposition: form-data; name=\"urn:metaxa:plain-text:.*", + "Content-Disposition: form-data; name=\"urn:content-item-sha1-.*", + //and the expected enhancements in the metadata + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley") + .getContent(); + log.debug("Content:\n{}\n",content); + + } + @Test + public void testOutputAllContentOmitMetadata() throws IOException { + String[] params = new String []{ + "outputContent","*/*", + "omitMetadata","true", + "rdfFormat","text/rdf+nt"}; + String content = executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) + .assertContentContains( + "--contentItem", + "--contentItem--", + "Content-Disposition: form-data; name=\"content\"", + "Content-Type: multipart/alternate; boundary=contentParts; charset=UTF-8", + "Content-Type: text/plain; charset=UTF-8", + "Content-Type: text/html", + "--contentParts", + "--contentParts--") + .assertContentContains(TEXT_CONTENT_LINES) + .assertContentContains(HTML_CONTENT_LINES) //line by line the HTML content + .assertContentRegexp( //MUST contain + "Content-Disposition: form-data; name=\"urn:metaxa:plain-text:.*", + "Content-Disposition: form-data; name=\"urn:content-item-sha1-.*") + .assertContentRegexp(false, //MUST NOT contain + "Content-Disposition: form-data; name=\"metadata\"; filename=.*", + //and the expected enhancements in the metadata + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley") + .getContent(); + log.debug("Content:\n{}\n",content); + + } + + @Test + public void testOutputPlainTextContent() throws IOException { + String[] params = new String []{ + "outputContent","text/plain", + "rdfFormat","text/rdf+nt"}; + String content = executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) + .assertContentContains( + "--contentItem", + "--contentItem--", + "Content-Disposition: form-data; name=\"content\"", + "Content-Type: multipart/alternate; boundary=contentParts; charset=UTF-8", + "Content-Type: text/plain; charset=UTF-8", + "--contentParts", + "--contentParts--") + .assertContentContains(TEXT_CONTENT_LINES) + .assertContentRegexp( + "Content-Disposition: form-data; name=\"metadata\"; filename=.*", + "Content-Disposition: form-data; name=\"urn:metaxa:plain-text:.*", + //and the expected enhancements in the metadata + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley") + .getContent(); + log.debug("Content:\n{}\n",content); + + } + @Test + public void testOutputContentOtherThanParsed() throws IOException { + //metadata and text content + String[] params = new String []{ + "outputContent","*/*", + "omitParsed","true", + "rdfFormat","text/rdf+nt"}; + String content = executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/html; charset=UTF-8") + .withContent(HTML_CONTENT) + ) + .assertStatus(200) + .assertContentContains( + "--contentItem", + "--contentItem--", + "Content-Disposition: form-data; name=\"content\"", + "Content-Type: multipart/alternate; boundary=contentParts; charset=UTF-8", + "Content-Type: text/plain; charset=UTF-8", + "--contentParts", + "--contentParts--") + .assertContentContains(TEXT_CONTENT_LINES) + .assertContentRegexp( + "Content-Disposition: form-data; name=\"metadata\"; filename=.*", + "Content-Disposition: form-data; name=\"urn:metaxa:plain-text:.*", + //and the expected enhancements in the metadata + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley") + .getContent(); + log.debug("Content:\n{}\n",content); + } + @Test + public void testOutputContentPart() throws IOException { + String[] params = new String []{ + "outputContentPart","http://stanbol.apache.org/ontology/enhancer/executionMetadata#ChainExecution", + "omitMetadata","true", + "rdfFormat","application/rdf+xml"}; + String content = executor.execute( + builder.buildPostRequest(buildPathWithParams(getEndpoint(), params)) + .withHeader("Accept","multipart/from-data") + .withHeader("Content-Type", "text/plain; charset=UTF-8") + .withContent(TEXT_CONTENT) + ) + .assertStatus(200) + .assertContentContains( + "--contentItem", + "--contentItem--", + "Content-Disposition: form-data; name=\"http://stanbol.apache.org/ontology/enhancer/executionMetadata#ChainExecution\"", + "Content-Type: application/rdf+xml; charset=UTF-8", + "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionplan#ExecutionPlan\"/>", + "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionplan#ExecutionNode\"/>", + "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionMetadata#EngineExecution\"/>", + "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/executionMetadata#ChainExecution\"/>") + .getContent(); + log.debug("Content:\n{}\n",content); + } + /** + * This uploads the HTML as well as the plain text version of an content. + * This allows it CMS to parse already available alternate content versions + * in a single request. Stanbol can than still use the original content + * (e.g. to extract metadata) but other engines that require the alternate + * version (e.g. plain text version) of an document will directly use the + * parsed version .<p> + * This UnitTest ensures this by adding a "secret" extension the to plain + * text version and than checks if the two entities mentioned in that + * part are included in the extracted entities. + * @throws IOException + */ + @Test + public void testUploadMultipleContents() throws IOException { + //It is a secret, that Berlin is the capital of Germany + String extraTextConent = TEXT_CONTENT + + "\nIt is a secret, that the city of Berlin is the capital of Germany since 1990."; + + //The multipart entity for the contentItem + MultipartEntity contentItem = new MultipartEntity(null, null ,UTF8); + //The multipart/alternate mime part for the parsed content versions + HttpMultipart content = new HttpMultipart("alternate", UTF8 ,"contentParts"); + //add the content part to the contentItem + contentItem.addPart( + "content", //the name MUST BE "content" + new MultipartContentBody(content)); + //now add the content (ordering is important, because the first + //part will be assumed the original document and all following are + //assumed alternate - transformed - versions + content.addBodyPart(new FormBodyPart( + "http://www.example.com/test.html", //the id of the content + new StringBody(HTML_CONTENT, "text/html", UTF8))); + content.addBodyPart(new FormBodyPart( + "http://www.example.com/test.txt", + new StringBody(extraTextConent, "text/plain", UTF8))); + + String receivedContent = executor.execute( + builder.buildPostRequest(getEndpoint()) + .withHeader("Accept","text/rdf+nt") + .withEntity(contentItem) + ) + .assertStatus(200) + .assertContentRegexp( + //and the expected enhancements in the metadata + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", + //check also for expeted entities extracted from the secret Text part! + "http://fise.iks-project.eu/ontology/entity-label.*Berlin", + "http://fise.iks-project.eu/ontology/entity-label.*Germany") + .getContent(); + log.debug("Content:\n{}\n",receivedContent); + + } + + + @Test + public void testContentBeforeMetadata() throws IOException{ + final UriRef contentItemId = new UriRef("http://www.example.com/test.html"); + String rdfContentType = SupportedFormat.RDF_XML; + String rdfContent = getDummyRdfMetadata(contentItemId, rdfContentType); + MultipartEntity contentItem = new MultipartEntity(null, null ,UTF8); + //first the content -> illegal + contentItem.addPart( + "content", //the name MUST BE "content" + new StringBody(HTML_CONTENT,"text/html",UTF8)); + //after that the metadata + contentItem.addPart( + "metadata", //the name MUST BE "metadata" + new StringBody(rdfContent,rdfContentType,UTF8)); + + String receivedContent = executor.execute( + builder.buildPostRequest(getEndpoint()) + .withHeader("Accept","text/rdf+nt") + .withEntity(contentItem) + ) + .assertStatus(400) //BAD request + .getContent(); + //check also the error message + Assert.assertTrue(receivedContent.contains( + "The Multipart MIME part with the 'metadata' MUST BE before the " + + "MIME part containing the 'content'")); + } + @Test + public void testMissingContent() throws IOException{ + final UriRef contentItemId = new UriRef("http://www.example.com/test.html"); + String rdfContentType = SupportedFormat.RDF_XML; + String rdfContent = getDummyRdfMetadata(contentItemId, rdfContentType); + MultipartEntity contentItem = new MultipartEntity(null, null ,UTF8); + //after that the metadata + contentItem.addPart( + "metadata", //the name MUST BE "metadata" + new StringBody(rdfContent,rdfContentType,UTF8)); + + String receivedContent = executor.execute( + builder.buildPostRequest(getEndpoint()) + .withHeader("Accept","text/rdf+nt") + .withEntity(contentItem) + ) + .assertStatus(400) //BAD request + .getContent(); + //check also the error message + Assert.assertTrue(receivedContent.contains( + "The parsed multipart content item does not contain any content.")); + } + + /** + * @param contentItemId + * @param rdfContentType + * @return + */ + private String getDummyRdfMetadata(final UriRef contentItemId, String rdfContentType) { + MGraph metadata = new SimpleMGraph(); + metadata.add(new TripleImpl(new BNode(), Properties.ENHANCER_EXTRACTED_FROM, contentItemId)); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + serializer.serialize(out, metadata, rdfContentType); + String rdfContent = new String(out.toByteArray(),UTF8); + return rdfContent; + } + + /** + * Stanbol also supports to upload pre-existing metadata with the content. + * This UnitTest uses an example that parsed TextAnnotations for free text + * tags provided by users that are than linked to Entities in DBPedia + * @throws IOException + */ + @Test + public void testUploadWithMetadata() throws IOException { + //create the metadata + Resource user = new PlainLiteralImpl("Rupert Westenthaler"); + final UriRef contentItemId = new UriRef("http://www.example.com/test.html"); + MGraph metadata = new SimpleMGraph(); + addTagAsTextAnnotation(metadata, contentItemId, + "Germany",DBPEDIA_PLACE, user); + addTagAsTextAnnotation(metadata, contentItemId, + "Europe",DBPEDIA_PLACE, user); + addTagAsTextAnnotation(metadata, contentItemId, + "NATO",DBPEDIA_ORGANISATION, user); + addTagAsTextAnnotation(metadata, contentItemId, + "Silvio Berlusconi",DBPEDIA_PERSON, user); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + serializer.serialize(out, metadata, SupportedFormat.RDF_XML); + String rdfContent = new String(out.toByteArray(),UTF8); + + //The multipart entity for the contentItem + MultipartEntity contentItem = new MultipartEntity(null, null ,UTF8); + //the "metadata" MUST BE the first element + /* + * NOTE: We need here to override the getFilename, because this MUST + * BE the URI of the ContentItem. This is important, because the + * Metadata do contain triples about that ContentItem and therefore + * it MUST BE assured that the URI of the ContentItem created by + * the Stanbol Enhancer is the same of as the URI used in the + * Metadata! + */ + contentItem.addPart( + "metadata", //the name MUST BE "metadata" + new StringBody(rdfContent,SupportedFormat.RDF_XML,UTF8){ + @Override + public String getFilename() { //The filename MUST BE the + return contentItemId.getUnicodeString(); //uri of the ContentItem + } + }); + //Add the Content + /* + * NOTE: If we only parse a single content than we can also directly + * add it with the name "content". This means that the useage of + * a "multipart/alternate" container is in such cases optional. + */ + contentItem.addPart( + "content", //the name MUST BE "content" + new StringBody(HTML_CONTENT,"text/html",UTF8)); + + //send the request + String receivedContent = executor.execute( + builder.buildPostRequest(getEndpoint()) + .withHeader("Accept","text/rdf+nt") + .withEntity(contentItem) + ) + .assertStatus(200) + .assertContentRegexp( + //and the expected enhancements based on the parsed content + "http://purl.org/dc/terms/creator.*LangIdEnhancementEngine", + "http://purl.org/dc/terms/language.*en", + "http://fise.iks-project.eu/ontology/entity-label.*Paris", + "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*EngineCore", + "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", + //additional enhancements based on parsed metadata + "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Germany.*", + "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/NATO.*", + "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Silvio_Berlusconi.*", + "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Europe.*") + .getContent(); + log.debug("Content:\n{}\n",receivedContent); + } + /** + * Utility that creates an {@link TechnicalClasses#ENHANCER_TEXTANNOTATION TextAnnotation} + * for the parsed contentItem, free text tag an user. + * @param graph the grpah to add the information + * @param contentItem the {@link ContentItem#getUri() uri} of the {@link ContentItem} + * @param tag the free text tag for the document + * @param tagType the type of the tag. Typically Stanbol supports: <ul> + * <li>{@link OntologicalClasses#DBPEDIA_PERSON} + * <li>{@link OntologicalClasses#DBPEDIA_ORGANISATION} + * <li>{@link OntologicalClasses#DBPEDIA_PLACE} + * </ul> + * But specific {@link EnhancementEngine}s might also process other types + * or even TextAnnotations without an type + * @param user the user that created the tag + * @return the uri of the created annotation + */ + private static final UriRef addTagAsTextAnnotation(MGraph graph, UriRef contentItem, + String tag, UriRef tagType, Resource user){ + UriRef ta = new UriRef("urn:user-annotation:"+EnhancementEngineHelper.randomUUID()); + graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_TEXTANNOTATION)); + graph.add(new TripleImpl(ta, Properties.ENHANCER_EXTRACTED_FROM,contentItem)); + if(tagType != null){ + graph.add(new TripleImpl(ta, Properties.DC_TYPE, tagType)); + } + graph.add(new TripleImpl(ta, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(tag))); + graph.add(new TripleImpl(ta, RDF.type, TechnicalClasses.ENHANCER_ENHANCEMENT)); + if(user != null){ + graph.add(new TripleImpl(ta, Properties.DC_CREATOR,user)); + } + return ta; + } + /** + * Supports sending multipart mime as {@link ContentBody}. + * TODO: maybe move such utilities to an own Multipart ContentItem + * utility module + * @author Rupert Westenthaler + * + */ + private static class MultipartContentBody extends AbstractContentBody implements ContentBody,ContentDescriptor { + + private HttpMultipart multipart; + + public MultipartContentBody(HttpMultipart multipart){ + super(String.format("multipart/%s; boundary=%s", + multipart.getSubType(), multipart.getBoundary())); + this.multipart = multipart; + } + @Override + public String getCharset() { + return multipart.getCharset().toString(); + } + + @Override + public String getTransferEncoding() { + return MIME.ENC_8BIT; + } + + @Override + public long getContentLength() { + return multipart.getTotalLength(); + } + + @Override + public String getFilename() { + return null; + } + + @Override + public void writeTo(OutputStream out) throws IOException { + multipart.writeTo(out); + } + + } +} Propchange: incubator/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/MultipartRequestTest.java ------------------------------------------------------------------------------ svn:mime-type = text/plain