Author: tallison
Date: Fri Dec 19 03:12:38 2014
New Revision: 1646616

URL: http://svn.apache.org/r1646616
Log:
TIKA-1497: add JSON and XMP output to tika-server's /meta

Added:
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
Modified:
    tika/trunk/tika-server/pom.xml
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
    
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java

Modified: tika/trunk/tika-server/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Fri Dec 19 03:12:38 2014
@@ -44,6 +44,11 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-xmp</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
       <groupId>net.sf.opencsv</groupId>
       <artifactId>opencsv</artifactId>
       <version>2.0</version>

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
 Fri Dec 19 03:12:38 2014
@@ -17,85 +17,65 @@
 
 package org.apache.tika.server;
 
-import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-import java.util.Arrays;
 
 import javax.ws.rs.Consumes;
 import javax.ws.rs.PUT;
 import javax.ws.rs.Path;
 import javax.ws.rs.Produces;
-import javax.ws.rs.WebApplicationException;
 import javax.ws.rs.core.Context;
 import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.core.StreamingOutput;
+import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
 import org.apache.tika.config.TikaConfig;
+
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.xml.sax.helpers.DefaultHandler;
 
-import au.com.bytecode.opencsv.CSVWriter;
 
 @Path("/meta")
 public class MetadataResource {
   private static final Log logger = LogFactory.getLog(MetadataResource.class);
-  
+
   private TikaConfig tikaConfig;
+
   public MetadataResource(TikaConfig tikaConfig) {
-      this.tikaConfig = tikaConfig;
+    this.tikaConfig = tikaConfig;
   }
-  
+
   @PUT
   @Consumes("multipart/form-data")
-  @Produces("text/csv")
+  @Produces({"text/csv", "application/json", "application/rdf+xml"})
   @Path("form")
-  public StreamingOutput getMetadataFromMultipart(Attachment att, @Context 
UriInfo info) throws Exception {
-         return produceMetadata(att.getObject(InputStream.class), 
att.getHeaders(), info);
+  public Response getMetadataFromMultipart(Attachment att, @Context UriInfo 
info) throws Exception {
+    return Response.ok(
+            parseMetadata(att.getObject(InputStream.class), att.getHeaders(), 
info)).build();
   }
-  
+
   @PUT
-  @Produces("text/csv")
-  public StreamingOutput getMetadata(InputStream is, @Context HttpHeaders 
httpHeaders, @Context UriInfo info) throws Exception {
-         return produceMetadata(is, httpHeaders.getRequestHeaders(), info);
+  @Produces({"text/csv", "application/json", "application/rdf+xml"})
+  public Response getMetadata(InputStream is, @Context HttpHeaders 
httpHeaders, @Context UriInfo info) throws Exception {
+    return Response.ok(
+            parseMetadata(is, httpHeaders.getRequestHeaders(), info)).build();
   }
-  
-  private StreamingOutput produceMetadata(InputStream is, 
MultivaluedMap<String, String> httpHeaders, UriInfo info) throws Exception {
+
+  private Metadata parseMetadata(InputStream is,
+                                 MultivaluedMap<String, String> httpHeaders, 
UriInfo info) throws Exception {
     final Metadata metadata = new Metadata();
     final ParseContext context = new ParseContext();
     AutoDetectParser parser = TikaResource.createParser(tikaConfig);
     TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
+    TikaResource.fillParseContext(context, httpHeaders);
     TikaResource.logRequest(logger, info, metadata);
 
     parser.parse(is, new DefaultHandler(), metadata, context);
-
-    return new StreamingOutput() {
-      public void write(OutputStream outputStream) throws IOException, 
WebApplicationException {
-        metadataToCsv(metadata, outputStream);
-      }
-    };
-  }
-
-  public static void metadataToCsv(Metadata metadata, OutputStream 
outputStream) throws IOException {
-    CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, 
"UTF-8"));
-
-    for (String name : metadata.names()) {
-      String[] values = metadata.getValues(name);
-      ArrayList<String> list = new ArrayList<String>(values.length+1);
-      list.add(name);
-      list.addAll(Arrays.asList(values));
-      writer.writeNext(list.toArray(values));
-    }
-
-    writer.close();
+    return metadata;
   }
 }

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/RecursiveMetadataResource.java
 Fri Dec 19 03:12:38 2014
@@ -75,9 +75,10 @@ public class RecursiveMetadataResource {
     RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser,
             new BasicContentHandlerFactory(type, -1));
     TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
+    TikaResource.fillParseContext(context, httpHeaders);
     TikaResource.logRequest(logger, info, metadata);
 
     wrapper.parse(is, new DefaultHandler(), metadata, context);
     return new MetadataList(wrapper.getMetadata());
   }
-}
\ No newline at end of file
+}

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java 
(original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java 
Fri Dec 19 03:12:38 2014
@@ -118,6 +118,7 @@ public class TikaServerCli {
       providers.add(new CSVMessageBodyWriter());
       providers.add(new MetadataListMessageBodyWriter());
       providers.add(new JSONMessageBodyWriter());
+      providers.add(new XMPMessageBodyWriter());
       providers.add(new TikaExceptionMapper());
       if (logFilter != null) {
          providers.add(logFilter);

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
 Fri Dec 19 03:12:38 2014
@@ -21,7 +21,10 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
@@ -35,6 +38,7 @@ import javax.ws.rs.core.HttpHeaders;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
 
+import au.com.bytecode.opencsv.CSVWriter;
 import org.apache.commons.lang.mutable.MutableInt;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -142,7 +146,7 @@ public class UnpackerResource {
       files.put(TEXT_FILENAME, text.toByteArray());
 
       ByteArrayOutputStream metaStream = new ByteArrayOutputStream();
-      MetadataResource.metadataToCsv(metadata, metaStream);
+      metadataToCsv(metadata, metaStream);
 
       files.put(META_FILENAME, metaStream.toByteArray());
     }
@@ -150,6 +154,20 @@ public class UnpackerResource {
     return files;
   }
 
+  public static void metadataToCsv(Metadata metadata, OutputStream 
outputStream) throws IOException {
+    CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, 
"UTF-8"));
+
+    for (String name : metadata.names()) {
+      String[] values = metadata.getValues(name);
+      ArrayList<String> list = new ArrayList<String>(values.length+1);
+      list.add(name);
+      list.addAll(Arrays.asList(values));
+      writer.writeNext(list.toArray(values));
+    }
+
+    writer.close();
+  }
+
   private class MyEmbeddedDocumentExtractor implements 
EmbeddedDocumentExtractor {
     private final MutableInt count;
     private final Map<String, byte[]> zout;

Added: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java?rev=1646616&view=auto
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
 (added)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java
 Fri Dec 19 03:12:38 2014
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.server;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.xmp.XMPMetadata;
+
+import javax.ws.rs.Produces;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.ext.MessageBodyWriter;
+import javax.ws.rs.ext.Provider;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Type;
+
+@Provider
+@Produces("application/rdf+xml")
+public class XMPMessageBodyWriter implements MessageBodyWriter<Metadata> {
+
+    private static MediaType RDF_XML = 
MediaType.valueOf("application/rdf+xml");
+
+    public boolean isWriteable(Class<?> type, Type genericType, Annotation[] 
annotations, MediaType mediaType) {
+        return mediaType.equals(RDF_XML) && 
Metadata.class.isAssignableFrom(type);
+    }
+
+    public long getSize(Metadata data, Class<?> type, Type genericType, 
Annotation[] annotations, MediaType mediaType) {
+        return -1;
+    }
+
+    @Override
+    public void writeTo(Metadata metadata, Class<?> type, Type genericType, 
Annotation[] annotations,
+          MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, 
OutputStream entityStream) throws IOException,
+        WebApplicationException {
+            try {
+                Writer writer = new OutputStreamWriter(entityStream, "UTF-8");
+                XMPMetadata xmp = new XMPMetadata(metadata);
+                writer.write(xmp.toString());
+                writer.flush();
+            } catch (TikaException e) {
+                throw new IOException(e);
+            }
+            entityStream.flush();
+    }
+}

Modified: 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1646616&r1=1646615&r2=1646616&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
 (original)
+++ 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
 Fri Dec 19 03:12:38 2014
@@ -23,14 +23,19 @@ import static org.junit.Assert.assertNot
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import javax.ws.rs.core.Response;
 
+import org.apache.cxf.helpers.IOUtils;
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.serialization.JsonMetadata;
 import org.junit.Test;
 
 import au.com.bytecode.opencsv.CSVReader;
@@ -46,7 +51,13 @@ public class MetadataResourceTest extend
     }
 
     @Override
-    protected void setUpProviders(JAXRSServerFactoryBean sf) {}
+    protected void setUpProviders(JAXRSServerFactoryBean sf) {
+        List<Object> providers = new ArrayList<Object>();
+        providers.add(new JSONMessageBodyWriter());
+        providers.add(new CSVMessageBodyWriter());
+        providers.add(new XMPMessageBodyWriter());
+        sf.setProviders(providers);
+    }
 
     @Test
     public void testSimpleWord() throws Exception {
@@ -111,4 +122,35 @@ public class MetadataResourceTest extend
         assertNotNull(metadata.get("Author"));
         assertEquals("pavel", metadata.get("Author"));
     }
+
+    @Test
+    public void testJSON() throws Exception {
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .type("application/msword")
+                .accept("application/json")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+
+        Reader reader = new InputStreamReader((InputStream) 
response.getEntity(), "UTF-8");
+
+        Metadata metadata = JsonMetadata.fromJson(reader);
+        assertNotNull(metadata.get("Author"));
+        assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+    }
+
+    @Test
+    public void testXMP() throws Exception {
+        Response response = WebClient
+                .create(endPoint + META_PATH)
+                .type("application/msword")
+                .accept("application/rdf+xml")
+                .put(ClassLoader
+                        .getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
+
+        String result = 
IOUtils.readStringFromStream((InputStream)response.getEntity());
+        assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
+    }
+
 }
+


Reply via email to