Author: tallison
Date: Fri Dec 19 03:12:38 2014
New Revision: 1646616
URL: http://svn.apache.org/r1646616
Log:
TIKA-1497: add JSON and XMP output to tika-server's /meta
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
Modified:
tika/trunk/tika-server/pom.xml
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
Modified: tika/trunk/tika-server/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Fri Dec 19 03:12:38 2014
@@ -44,6 +44,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>tika-xmp</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
<groupId>net.sf.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>2.0</version>
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
Fri Dec 19 03:12:38 2014
@@ -17,85 +17,65 @@
package org.apache.tika.server;
-import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
import javax.ws.rs.Consumes;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.core.StreamingOutput;
+import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.tika.config.TikaConfig;
+
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.helpers.DefaultHandler;
-import au.com.bytecode.opencsv.CSVWriter;
@Path("/meta")
public class MetadataResource {
private static final Log logger = LogFactory.getLog(MetadataResource.class);
-
+
private TikaConfig tikaConfig;
+
public MetadataResource(TikaConfig tikaConfig) {
- this.tikaConfig = tikaConfig;
+ this.tikaConfig = tikaConfig;
}
-
+
@PUT
@Consumes("multipart/form-data")
- @Produces("text/csv")
+ @Produces({"text/csv", "application/json", "application/rdf+xml"})
@Path("form")
- public StreamingOutput getMetadataFromMultipart(Attachment att, @Context
UriInfo info) throws Exception {
- return produceMetadata(att.getObject(InputStream.class),
att.getHeaders(), info);
+ public Response getMetadataFromMultipart(Attachment att, @Context UriInfo
info) throws Exception {
+ return Response.ok(
+ parseMetadata(att.getObject(InputStream.class), att.getHeaders(),
info)).build();
}
-
+
@PUT
- @Produces("text/csv")
- public StreamingOutput getMetadata(InputStream is, @Context HttpHeaders
httpHeaders, @Context UriInfo info) throws Exception {
- return produceMetadata(is, httpHeaders.getRequestHeaders(), info);
+ @Produces({"text/csv", "application/json", "application/rdf+xml"})
+ public Response getMetadata(InputStream is, @Context HttpHeaders
httpHeaders, @Context UriInfo info) throws Exception {
+ return Response.ok(
+ parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build();
}
-
- private StreamingOutput produceMetadata(InputStream is,
MultivaluedMap<String, String> httpHeaders, UriInfo info) throws Exception {
+
+ private Metadata parseMetadata(InputStream is,
+ MultivaluedMap<String, String> httpHeaders,
UriInfo info) throws Exception {
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
AutoDetectParser parser = TikaResource.createParser(tikaConfig);
TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
+ TikaResource.fillParseContext(context, httpHeaders);
TikaResource.logRequest(logger, info, metadata);
parser.parse(is, new DefaultHandler(), metadata, context);
-
- return new StreamingOutput() {
- public void write(OutputStream outputStream) throws IOException,
WebApplicationException {
- metadataToCsv(metadata, outputStream);
- }
- };
- }
-
- public static void metadataToCsv(Metadata metadata, OutputStream
outputStream) throws IOException {
- CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream,
"UTF-8"));
-
- for (String name : metadata.names()) {
- String[] values = metadata.getValues(name);
- ArrayList<String> list = new ArrayList<String>(values.length+1);
- list.add(name);
- list.addAll(Arrays.asList(values));
- writer.writeNext(list.toArray(values));
- }
-
- writer.close();
+ return metadata;
}
}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
Fri Dec 19 03:12:38 2014
@@ -75,9 +75,10 @@ public class RecursiveMetadataResource {
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser,
new BasicContentHandlerFactory(type, -1));
TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
+ TikaResource.fillParseContext(context, httpHeaders);
TikaResource.logRequest(logger, info, metadata);
wrapper.parse(is, new DefaultHandler(), metadata, context);
return new MetadataList(wrapper.getMetadata());
}
-}
\ No newline at end of file
+}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Fri Dec 19 03:12:38 2014
@@ -118,6 +118,7 @@ public class TikaServerCli {
providers.add(new CSVMessageBodyWriter());
providers.add(new MetadataListMessageBodyWriter());
providers.add(new JSONMessageBodyWriter());
+ providers.add(new XMPMessageBodyWriter());
providers.add(new TikaExceptionMapper());
if (logFilter != null) {
providers.add(logFilter);
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
Fri Dec 19 03:12:38 2014
@@ -21,7 +21,10 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -35,6 +38,7 @@ import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriInfo;
+import au.com.bytecode.opencsv.CSVWriter;
import org.apache.commons.lang.mutable.MutableInt;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -142,7 +146,7 @@ public class UnpackerResource {
files.put(TEXT_FILENAME, text.toByteArray());
ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
- MetadataResource.metadataToCsv(metadata, metaStream);
+ metadataToCsv(metadata, metaStream);
files.put(META_FILENAME, metaStream.toByteArray());
}
@@ -150,6 +154,20 @@ public class UnpackerResource {
return files;
}
+ public static void metadataToCsv(Metadata metadata, OutputStream
outputStream) throws IOException {
+ CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream,
"UTF-8"));
+
+ for (String name : metadata.names()) {
+ String[] values = metadata.getValues(name);
+ ArrayList<String> list = new ArrayList<String>(values.length+1);
+ list.add(name);
+ list.addAll(Arrays.asList(values));
+ writer.writeNext(list.toArray(values));
+ }
+
+ writer.close();
+ }
+
private class MyEmbeddedDocumentExtractor implements
EmbeddedDocumentExtractor {
private final MutableInt count;
private final Map<String, byte[]> zout;
Added:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java?rev=1646616&view=auto
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
(added)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
Fri Dec 19 03:12:38 2014
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.xmp.XMPMetadata;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+
+@Provider
+@Produces("application/rdf+xml")
+public class XMPMessageBodyWriter implements MessageBodyWriter<Metadata> {
+
+ private static MediaType RDF_XML =
MediaType.valueOf("application/rdf+xml");
+
+ public boolean isWriteable(Class<?> type, Type genericType, Annotation[]
annotations, MediaType mediaType) {
+ return mediaType.equals(RDF_XML) &&
Metadata.class.isAssignableFrom(type);
+ }
+
+ public long getSize(Metadata data, Class<?> type, Type genericType,
Annotation[] annotations, MediaType mediaType) {
+ return -1;
+ }
+
+ @Override
+ public void writeTo(Metadata metadata, Class<?> type, Type genericType,
Annotation[] annotations,
+ MediaType mediaType, MultivaluedMap<String, Object> httpHeaders,
OutputStream entityStream) throws IOException,
+ WebApplicationException {
+ try {
+ Writer writer = new OutputStreamWriter(entityStream, "UTF-8");
+ XMPMetadata xmp = new XMPMetadata(metadata);
+ writer.write(xmp.toString());
+ writer.flush();
+ } catch (TikaException e) {
+ throw new IOException(e);
+ }
+ entityStream.flush();
+ }
+}
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
Fri Dec 19 03:12:38 2014
@@ -23,14 +23,19 @@ import static org.junit.Assert.assertNot
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import javax.ws.rs.core.Response;
+import org.apache.cxf.helpers.IOUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
import org.junit.Test;
import au.com.bytecode.opencsv.CSVReader;
@@ -46,7 +51,13 @@ public class MetadataResourceTest extend
}
@Override
- protected void setUpProviders(JAXRSServerFactoryBean sf) {}
+ protected void setUpProviders(JAXRSServerFactoryBean sf) {
+ List<Object> providers = new ArrayList<Object>();
+ providers.add(new JSONMessageBodyWriter());
+ providers.add(new CSVMessageBodyWriter());
+ providers.add(new XMPMessageBodyWriter());
+ sf.setProviders(providers);
+ }
@Test
public void testSimpleWord() throws Exception {
@@ -111,4 +122,35 @@ public class MetadataResourceTest extend
assertNotNull(metadata.get("Author"));
assertEquals("pavel", metadata.get("Author"));
}
+
+ @Test
+ public void testJSON() throws Exception {
+ Response response = WebClient
+ .create(endPoint + META_PATH)
+ .type("application/msword")
+ .accept("application/json")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+
+ Reader reader = new InputStreamReader((InputStream)
response.getEntity(), "UTF-8");
+
+ Metadata metadata = JsonMetadata.fromJson(reader);
+ assertNotNull(metadata.get("Author"));
+ assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+ }
+
+ @Test
+ public void testXMP() throws Exception {
+ Response response = WebClient
+ .create(endPoint + META_PATH)
+ .type("application/msword")
+ .accept("application/rdf+xml")
+ .put(ClassLoader
+ .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+
+ String result =
IOUtils.readStringFromStream((InputStream)response.getEntity());
+ assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
+ }
+
}
+