Author: maxcom
Date: Tue Aug 2 11:45:50 2011
New Revision: 1153097
URL: http://svn.apache.org/viewvc?rev=1153097&view=rev
Log:
TIKA-593: Update tika-server
1) Synchronize codebase with our production code. This fixes few bugs and has
cleaner code
2) Update Jersey dependency to latest version (1.0.3 -> 1.7), port code to
JAX-RS 1.1
3) Replace Grizzly web server with Jetty, that is available at maven central
repository.
Tika-server still requires java.net's Jersey artifact, I'm goint to try to
replace Jersey with
Apache (Incubator) Wink
Added:
tika/trunk/tika-server/src/main/resources/tikaserver-version.properties
Modified:
tika/trunk/tika-server/pom.xml
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
tika/trunk/tika-server/src/main/resources/commons-logging.properties
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
Modified: tika/trunk/tika-server/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Tue Aug 2 11:45:50 2011
@@ -1,3 +1,20 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
<!--
@@ -41,33 +58,38 @@
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
- <version>1.0.3.1</version>
+ <version>1.7</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
- <version>1.0.3.1</version>
+ <version>1.7</version>
</dependency>
<dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-client</artifactId>
- <version>1.0.3.1</version>
- </dependency>
- <dependency>
<groupId>javax.ws.rs</groupId>
<artifactId>jsr311-api</artifactId>
- <version>1.0</version>
+ <version>1.1</version>
</dependency>
<dependency>
- <groupId>com.sun.jersey.test.framework</groupId>
- <artifactId>jersey-test-framework</artifactId>
- <version>1.0.3.1</version>
+ <groupId>com.sun.jersey.jersey-test-framework</groupId>
+ <artifactId>jersey-test-framework-grizzly</artifactId>
+ <version>1.7</version>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>com.sun.grizzly</groupId>
- <artifactId>grizzly-servlet-webserver</artifactId>
- <version>1.9.8</version>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>8.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>8.0.0.M3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jdk14</artifactId>
+ <version>1.6.1</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
@@ -155,8 +177,8 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
+ <version>2.6</version>
<configuration>
- <version>2.6</version>
<redirectTestOutputToFile>true</redirectTestOutputToFile>
<argLine>-da -XX:+HeapDumpOnOutOfMemoryError
-Xmx512m</argLine>
<!-- <argLine>-agentlib:jprofilerti=port=8849
-Xbootclasspath/a:/arc/opt/jprofiler5/bin/agent.jar</argLine> -->
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/MetadataResource.java
Tue Aug 2 11:45:50 2011
@@ -14,17 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import au.com.bytecode.opencsv.CSVWriter;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.detect.Detector;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeTypeException;
import org.xml.sax.helpers.DefaultHandler;
import javax.ws.rs.PUT;
@@ -34,30 +31,27 @@ import javax.ws.rs.WebApplicationExcepti
import javax.ws.rs.core.Context;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.StreamingOutput;
+import javax.ws.rs.core.UriInfo;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.List;
-@Path("/meta")
+@Path("/meta{id:(/.*)?}")
public class MetadataResource {
- private static final String CONTENT_LENGTH = "Content-Length";
- private static final String FILE_NNAME = "File-Name";
- private static final String RESOURCE_NAME = "resourceName";
+ private static final Log logger = LogFactory.getLog(MetadataResource.class);
@PUT
@Produces("text/csv")
- public StreamingOutput getMetadata( InputStream is, @Context HttpHeaders
httpHeaders ) throws Exception {
- final Detector detector = new HeaderTrustingDetectorFactory
().createDetector( httpHeaders );
- final AutoDetectParser parser = new AutoDetectParser(detector);
- final ParseContext context = new ParseContext();
- context.set(Parser.class, parser);
+ public StreamingOutput getMetadata(InputStream is, @Context HttpHeaders
httpHeaders, @Context UriInfo info) throws Exception {
final Metadata metadata = new Metadata();
- parser.parse( is, new DefaultHandler(), metadata, context );
- fillMetadata ( httpHeaders, metadata );
+ AutoDetectParser parser = TikaResource.createParser();
+ TikaResource.fillMetadata(parser, metadata, httpHeaders);
+ TikaResource.logRequest(logger, info, metadata);
+
+ parser.parse(is, new DefaultHandler(), metadata);
return new StreamingOutput() {
public void write(OutputStream outputStream) throws IOException,
WebApplicationException {
@@ -73,26 +67,4 @@ public class MetadataResource {
}
};
}
-
- private void fillMetadata ( HttpHeaders httpHeaders, Metadata metadata ) {
- final List < String > fileName = httpHeaders.getRequestHeader(FILE_NNAME),
cl = httpHeaders.getRequestHeader(CONTENT_LENGTH);
- if ( cl != null && !cl.isEmpty() )
- metadata.set( CONTENT_LENGTH, cl.get(0) );
-
- if ( fileName != null && !fileName.isEmpty() )
- metadata.set( RESOURCE_NAME, fileName.get(0) );
- }
-
- private static class HeaderTrustingDetectorFactory {
- public Detector createDetector( HttpHeaders httpHeaders ) throws
IOException, MimeTypeException {
- final javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
- if (mediaType == null ||
mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE ))
- return (new TikaConfig()).getMimeRepository();
- else return new Detector() {
- public MediaType detect(InputStream inputStream, Metadata metadata)
throws IOException {
- return MediaType.parse( mediaType.toString() );
- }
- };
- }
- }
}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/PartExtractor.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import java.io.IOException;
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaExceptionMapper.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import org.apache.tika.exception.TikaException;
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java
Tue Aug 2 11:45:50 2011
@@ -14,38 +14,48 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import javax.ws.rs.*;
-import javax.ws.rs.core.Context;
-import javax.ws.rs.core.HttpHeaders;
-import javax.ws.rs.core.Response;
-import javax.ws.rs.core.StreamingOutput;
+import javax.ws.rs.core.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
-@Path("/tika")
+@Path("/tika{id:(/.*)?}")
public class TikaResource {
public static final String GREETING = "This is Tika Server. Please PUT\n";
private final Log logger = LogFactory.getLog(TikaResource.class);
+ static {
+ ExtractorFactory.setAllThreadsPreferEventExtractors(true);
+ }
+
@SuppressWarnings({"SameReturnValue"})
@GET
@Produces("text/plain")
@@ -56,7 +66,11 @@ public class TikaResource {
public static AutoDetectParser createParser() {
final AutoDetectParser parser = new AutoDetectParser();
- parser.setFallback(new AbstractParser() {
+ Map<MediaType,Parser> parsers = parser.getParsers();
+ parsers.put(MediaType.APPLICATION_XML, new HtmlParser());
+ parser.setParsers(parsers);
+
+ parser.setFallback(new Parser() {
public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
return parser.getSupportedTypes(parseContext);
}
@@ -64,15 +78,31 @@ public class TikaResource {
public void parse(InputStream inputStream, ContentHandler
contentHandler, Metadata metadata, ParseContext parseContext) {
throw new
WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
}
+
+ public void parse(InputStream inputStream, ContentHandler
contentHandler, Metadata metadata) {
+ throw new
WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
+ }
});
return parser;
}
public static void fillMetadata(AutoDetectParser parser, Metadata metadata,
HttpHeaders httpHeaders) {
+ List<String> fileName = httpHeaders.getRequestHeader("File-Name");
+ if (fileName!=null && !fileName.isEmpty()) {
+ metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName.get(0));
+ }
+
javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
+ if (mediaType!=null && "xml".equals(mediaType.getSubtype()) ) {
+ mediaType = null;
+ }
+
+ if (mediaType !=null &&
mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
+ mediaType = null;
+ }
- if (mediaType !=null &&
!mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
+ if (mediaType !=null) {
metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE,
mediaType.toString());
final Detector detector = parser.getDetector();
@@ -94,18 +124,41 @@ public class TikaResource {
@PUT
@Consumes("*/*")
@Produces("text/plain")
- public StreamingOutput getText(final InputStream is, @Context HttpHeaders
httpHeaders) {
+ public StreamingOutput getText(final InputStream is, @Context HttpHeaders
httpHeaders, @Context final UriInfo info) {
final AutoDetectParser parser = createParser();
final Metadata metadata = new Metadata();
fillMetadata(parser, metadata, httpHeaders);
+ logRequest(logger, info, metadata);
+
return new StreamingOutput() {
public void write(OutputStream outputStream) throws IOException,
WebApplicationException {
- BodyContentHandler body = new BodyContentHandler(outputStream);
+ BodyContentHandler body = new BodyContentHandler(new
WriteOutContentHandler(outputStream) {
+ @Override
+ public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
+ super.startElement(uri, localName, qName, attributes);
+
+ if ("img".equals(localName) && attributes.getValue("alt")!=null) {
+ String nfo = "[image: "+attributes.getValue("alt")+ ']';
+
+ characters(nfo.toCharArray(), 0, nfo.length());
+ }
+
+ if ("a".equals(localName) && attributes.getValue("name")!=null) {
+ String nfo = "[bookmark: "+attributes.getValue("name")+ ']';
+
+ characters(nfo.toCharArray(), 0, nfo.length());
+ }
+ }
+ });
+
+ TikaInputStream tis = TikaInputStream.get(is);
try {
- parser.parse(is, body, metadata);
+ tis.getFile();
+
+ parser.parse(tis, body, metadata);
} catch (SAXException e) {
throw new WebApplicationException(e);
} catch (TikaException e) {
@@ -125,11 +178,31 @@ public class TikaResource {
throw new WebApplicationException(Response.status(422).build());
}
- logger.warn("Text extraction failed", e);
+ logger.warn(String.format(
+ "%s: Text extraction failed",
+ info.getPath()
+ ), e);
throw new
WebApplicationException(Response.Status.INTERNAL_SERVER_ERROR);
+ } finally {
+ tis.close();
}
}
};
}
+
+ public static void logRequest(Log logger, UriInfo info, Metadata metadata) {
+ if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null)
{
+ logger.info(String.format(
+ "%s (autodetecting type)",
+ info.getPath()
+ ));
+ } else {
+ logger.info(String.format(
+ "%s (%s)",
+ info.getPath(),
+ metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)
+ ));
+ }
+ }
}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java
Tue Aug 2 11:45:50 2011
@@ -14,42 +14,45 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
-import com.sun.grizzly.http.SelectorThread;
-import com.sun.jersey.api.container.grizzly.GrizzlyWebContainerFactory;
+import com.sun.jersey.api.core.PackagesResourceConfig;
+import com.sun.jersey.spi.container.servlet.ServletContainer;
import org.apache.commons.cli.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.eclipse.jetty.server.Server;
+import org.eclipse.jetty.servlet.ServletContextHandler;
+import org.eclipse.jetty.servlet.ServletHolder;
-import javax.ws.rs.core.UriBuilder;
-import java.net.URI;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.IOException;
+import java.util.Properties;
public class TikaServerCli {
private static final Log logger = LogFactory.getLog(TikaServerCli.class);
-
public static final int DEFAULT_PORT = 9998;
private static Options getOptions() {
Options options = new Options();
options.addOption("p", "port", true, "listen port (default =
"+DEFAULT_PORT+ ')');
-
options.addOption("h", "help", false, "this help message");
return options;
}
public static void main(String[] args) {
+ Properties properties = new Properties();
try {
- TikaServerCli cli = new TikaServerCli();
-
- Map<String, String> params = new HashMap<String, String>();
+
properties.load(ClassLoader.getSystemResourceAsStream("tikaserver-version.properties"));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
- params.put("com.sun.jersey.config.property.packages",
"org.apache.tika.server");
+ logger.info("Starting Tikaserver
"+properties.getProperty("tikaserver.version"));
- Options options = cli.getOptions();
+ try {
+ Options options = getOptions();
CommandLineParser cliParser = new GnuParser();
CommandLine line = cliParser.parse(options, args);
@@ -59,18 +62,24 @@ public class TikaServerCli {
if (line.hasOption("port")) {
port = Integer.valueOf(line.getOptionValue("port"));
}
-
if (line.hasOption("help")) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("tikaserver", options);
System.exit(-1);
}
- String baseUri = "http://localhost/";
- URI buildUri = UriBuilder.fromUri(baseUri).port(port).build();
- SelectorThread threadSelector =
GrizzlyWebContainerFactory.create(buildUri, params);
+ Server server = new Server(port);
+ ServletContextHandler context = new
ServletContextHandler(ServletContextHandler.NO_SESSIONS);
+ context.setContextPath("/");
+ server.setHandler(context);
+
+ context.addServlet(new ServletHolder(new ServletContainer(new
PackagesResourceConfig("org.apache.tika.server"))), "/*");
+
+ server.start();
+
+ logger.info("Started");
- logger.info("Started at " + buildUri);
+ server.join();
} catch (Exception ex) {
logger.fatal("Can't start", ex);
System.exit(-1);
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
(original)
+++
tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import org.apache.commons.lang.mutable.MutableInt;
@@ -24,13 +25,13 @@ import org.apache.poi.poifs.filesystem.O
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -40,15 +41,15 @@ import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
-import javax.ws.rs.core.Context;
-import javax.ws.rs.core.HttpHeaders;
-import javax.ws.rs.core.Response;
-import javax.ws.rs.core.StreamingOutput;
-import java.io.*;
+import javax.ws.rs.core.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.Collections;
import java.util.zip.ZipOutputStream;
-@Path("/unpacker")
+@Path("/unpacker{id:(/.*)?}")
public class UnpackerResource {
private static final Log logger = LogFactory.getLog(UnpackerResource.class);
@@ -62,25 +63,15 @@ public class UnpackerResource {
@Produces("application/zip")
public StreamingOutput getText(
InputStream is,
- @Context HttpHeaders httpHeaders
+ @Context HttpHeaders httpHeaders,
+ @Context UriInfo info
) throws Exception {
- if (!is.markSupported()) {
- is = new BufferedInputStream(is);
- }
-
- Parser parser;
+ Metadata metadata = new Metadata();
- javax.ws.rs.core.MediaType mediaType = httpHeaders.getMediaType();
- if (mediaType !=null &&
!mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) {
- parser = tikaConfig.getParser(new
MediaType(httpHeaders.getMediaType().getType(),
httpHeaders.getMediaType().getSubtype()));
- } else {
- MediaType type = tikaConfig.getMimeRepository().detect(is, new
Metadata());
- parser = tikaConfig.getParser(type);
- }
+ AutoDetectParser parser = TikaResource.createParser();
- if (parser==null) {
- throw new
WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE);
- }
+ TikaResource.fillMetadata(parser, metadata, httpHeaders);
+ TikaResource.logRequest(logger, info, metadata);
ContentHandler ch = new DefaultHandler();
@@ -91,9 +82,16 @@ public class UnpackerResource {
pc.set(EmbeddedDocumentExtractor.class, new
MyEmbeddedDocumentExtractor(count, zout));
- parser.parse(is, ch, new Metadata(), pc);
+ try {
+ parser.parse(is, ch, metadata, pc);
+ } catch (TikaException ex) {
+ logger.warn(String.format(
+ "%s: Unpacker failed",
+ info.getPath()
+ ), ex);
+ }
- if (count.intValue()==0) {
+ if (count.intValue() == 0) {
throw new WebApplicationException(Response.Status.NO_CONTENT);
}
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
(original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipOutput.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import javax.ws.rs.WebApplicationException;
Modified:
tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
(original)
+++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipUtils.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import java.io.IOException;
Modified: tika/trunk/tika-server/src/main/resources/commons-logging.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/resources/commons-logging.properties?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
--- tika/trunk/tika-server/src/main/resources/commons-logging.properties
(original)
+++ tika/trunk/tika-server/src/main/resources/commons-logging.properties Tue
Aug 2 11:45:50 2011
@@ -1,3 +1,4 @@
+#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
@@ -12,4 +13,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#
org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger
Added: tika/trunk/tika-server/src/main/resources/tikaserver-version.properties
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/resources/tikaserver-version.properties?rev=1153097&view=auto
==============================================================================
--- tika/trunk/tika-server/src/main/resources/tikaserver-version.properties
(added)
+++ tika/trunk/tika-server/src/main/resources/tikaserver-version.properties Tue
Aug 2 11:45:50 2011
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+tikaserver.version=${project.version}
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import au.com.bytecode.opencsv.CSVReader;
@@ -36,7 +37,7 @@ public class MetadataResourceTest extend
@Test
public void testSimpleWord() throws Exception {
Reader reader =
- webResource.path(META_PATH)
+ resource().path(META_PATH)
.type("application/msword")
.put(Reader.class,
ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
Tue Aug 2 11:45:50 2011
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
import com.sun.jersey.api.client.ClientResponse;
@@ -38,14 +39,14 @@ public class TikaResourceTest extends Je
*/
@Test
public void testHelloWorld() {
- String responseMsg = webResource.path(TIKA_PATH).get(String.class);
+ String responseMsg = resource().path(TIKA_PATH).get(String.class);
assertEquals(TikaResource.GREETING, responseMsg);
}
@Test
public void testSimpleWord() {
String responseMsg =
- webResource.path(TIKA_PATH)
+ resource().path(TIKA_PATH)
.type("application/msword")
.put(String.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC));
@@ -54,7 +55,7 @@ public class TikaResourceTest extends Je
@Test
public void testApplicationWadl() {
- String serviceWadl = webResource.path("application.wadl").
+ String serviceWadl = resource().path("application.wadl").
accept(MediaTypes.WADL).get(String.class);
assertTrue(serviceWadl.length() > 0);
@@ -63,7 +64,7 @@ public class TikaResourceTest extends Je
@Test
public void testPasswordXLS() throws Exception {
ClientResponse cr =
- webResource
+ resource()
.path(TIKA_PATH)
.type("application/vnd.ms-excel")
.put(ClientResponse.class,
ClassLoader.getSystemResourceAsStream("password.xls"));
Modified:
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java?rev=1153097&r1=1153096&r2=1153097&view=diff
==============================================================================
---
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
(original)
+++
tika/trunk/tika-server/src/test/java/org/apache/tika/server/UnpackerResourceTest.java
Tue Aug 2 11:45:50 2011
@@ -14,9 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.server;
-import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.test.framework.JerseyTest;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.tika.io.IOUtils;
@@ -29,8 +29,6 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
public class UnpackerResourceTest extends JerseyTest {
private static final String UNPACKER_PATH = "/unpacker";
@@ -71,7 +69,7 @@ public class UnpackerResourceTest extend
@Test
public void testDocWAV() throws Exception {
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -87,7 +85,7 @@ public class UnpackerResourceTest extend
@Test
public void testDocPicture() throws Exception {
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOC_WAV));
@@ -102,7 +100,7 @@ public class UnpackerResourceTest extend
@Test
public void testDocPictureNoOle() throws Exception {
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.type(APPLICATION_MSWORD)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream("2pic.doc"));
@@ -117,7 +115,7 @@ public class UnpackerResourceTest extend
@Test
public void testImageDOCX() throws Exception {
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOCX_IMAGE));
@@ -133,7 +131,7 @@ public class UnpackerResourceTest extend
public void testExeDOCX() throws Exception {
String TEST_DOCX_EXE = "2exe.docx";
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream(TEST_DOCX_EXE));
@@ -163,7 +161,7 @@ public class UnpackerResourceTest extend
@Test
public void testImageXSL() throws Exception {
InputStream is =
- webResource
+ resource()
.path(UNPACKER_PATH)
.put(InputStream.class,
ClassLoader.getSystemResourceAsStream("pic.xls"));