Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaResource.java Fri Feb 20 19:29:42 2015 @@ -75,288 +75,170 @@ import org.xml.sax.SAXException; @Path("/tika") public class TikaResource { - public static final String GREETING = "This is Tika Server. Please PUT\n"; - public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR"; - public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF"; - - - private final Log logger = LogFactory.getLog(TikaResource.class); - - private TikaConfig tikaConfig; - public TikaResource(TikaConfig tikaConfig) { - this.tikaConfig = tikaConfig; - } - - static { - ExtractorFactory.setAllThreadsPreferEventExtractors(true); - } - - @GET - @Produces("text/plain") - public String getMessage() { - return GREETING; - } - - @SuppressWarnings("serial") - public static AutoDetectParser createParser(TikaConfig tikaConfig) { - final AutoDetectParser parser = new AutoDetectParser(tikaConfig); - - Map<MediaType,Parser> parsers = parser.getParsers(); - parsers.put(MediaType.APPLICATION_XML, new HtmlParser()); - parser.setParsers(parsers); - - parser.setFallback(new Parser() { - public Set<MediaType> getSupportedTypes(ParseContext parseContext) { - return parser.getSupportedTypes(parseContext); - } - - public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) { - throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE); - } - }); - - return parser; - } - - public static String detectFilename(MultivaluedMap<String, String> httpHeaders) { - - String disposition = httpHeaders.getFirst("Content-Disposition"); - if (disposition != null) { - try { - ContentDisposition c = new ContentDisposition(disposition); - - // only support "attachment" dispositions - if ("attachment".equals(c.getDisposition())) { - String fn = c.getParameter("filename"); - if (fn != null) { - return fn; - } - } - } catch (ParseException e) { - // not a valid content-disposition field - } - } - - // this really should not be used, since it's not an official field - return httpHeaders.getFirst("File-Name"); - } - - public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> httpHeaders) { - TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); - PDFParserConfig pdfParserConfig = new PDFParserConfig(); - for (String key : httpHeaders.keySet()) { - if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) { - processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX); - } else if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) { - processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX); - } - } - parseContext.set(TesseractOCRConfig.class, ocrConfig); - parseContext.set(PDFParserConfig.class, pdfParserConfig); - } - - /** - * Utility method to set a property on a class via reflection. - * - * @param httpHeaders the HTTP headers set. - * @param object the <code>Object</code> to set the property on. - * @param key the key of the HTTP Header. - * @param prefix the name of the HTTP Header prefix used to find property. - * @throws WebApplicationException thrown when field cannot be found. - */ - private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) { - try { - String property = StringUtils.removeStart(key, prefix); - Field field = object.getClass().getDeclaredField(StringUtils.uncapitalize(property)); - field.setAccessible(true); - if (field.getType() == String.class) { - field.set(object, httpHeaders.getFirst(key)); - } else if (field.getType() == int.class) { - field.setInt(object, Integer.parseInt(httpHeaders.getFirst(key))); - } else if (field.getType() == double.class) { - field.setDouble(object, Double.parseDouble(httpHeaders.getFirst(key))); - } else if (field.getType() == boolean.class) { - field.setBoolean(object, Boolean.parseBoolean(httpHeaders.getFirst(key))); - } - } catch (Throwable ex) { - throw new WebApplicationException(String.format(Locale.ROOT, - "%s is an invalid %s header", key, X_TIKA_OCR_HEADER_PREFIX)); - } - } - - @SuppressWarnings("serial") -public static void fillMetadata(AutoDetectParser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) { - String fileName = detectFilename(httpHeaders); - if (fileName != null) { - metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName); - } - - String contentTypeHeader = httpHeaders.getFirst(HttpHeaders.CONTENT_TYPE); - javax.ws.rs.core.MediaType mediaType = contentTypeHeader == null ? null - : javax.ws.rs.core.MediaType.valueOf(contentTypeHeader); - if (mediaType!=null && "xml".equals(mediaType.getSubtype()) ) { - mediaType = null; - } - - if (mediaType !=null && mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) { - mediaType = null; - } - - if (mediaType !=null) { - metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString()); - - final Detector detector = parser.getDetector(); - - parser.setDetector(new Detector() { - public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException { - String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE); - - if (ct!=null) { - return MediaType.parse(ct); - } else { - return detector.detect(inputStream, metadata); - } - } - }); + public static final String GREETING = "This is Tika Server. Please PUT\n"; + public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR"; + public static final String X_TIKA_PDF_HEADER_PREFIX = "X-Tika-PDF"; + + + private final Log logger = LogFactory.getLog(TikaResource.class); + + private TikaConfig tikaConfig; + + public TikaResource(TikaConfig tikaConfig) { + this.tikaConfig = tikaConfig; + } + + static { + ExtractorFactory.setAllThreadsPreferEventExtractors(true); } - - final String password = httpHeaders.getFirst("Password"); - if (password != null) { - context.set(PasswordProvider.class, new PasswordProvider() { - @Override - public String getPassword(Metadata metadata) { - return password; + + @SuppressWarnings("serial") + public static AutoDetectParser createParser(TikaConfig tikaConfig) { + final AutoDetectParser parser = new AutoDetectParser(tikaConfig); + + Map<MediaType, Parser> parsers = parser.getParsers(); + parsers.put(MediaType.APPLICATION_XML, new HtmlParser()); + parser.setParsers(parsers); + + parser.setFallback(new Parser() { + public Set<MediaType> getSupportedTypes(ParseContext parseContext) { + return parser.getSupportedTypes(parseContext); + } + + public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) { + throw new WebApplicationException(Response.Status.UNSUPPORTED_MEDIA_TYPE); } }); + + return parser; } - } - @POST - @Consumes("multipart/form-data") - @Produces("text/plain") - @Path("form") - public StreamingOutput getTextFromMultipart(Attachment att, @Context final UriInfo info) { - return produceText(att.getObject(InputStream.class), att.getHeaders(), info); - } - - @PUT - @Consumes("*/*") - @Produces("text/plain") - public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { - return produceText(is, httpHeaders.getRequestHeaders(), info); - } - public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) { - final AutoDetectParser parser = createParser(tikaConfig); - final Metadata metadata = new Metadata(); - final ParseContext context = new ParseContext(); - - fillMetadata(parser, metadata, context, httpHeaders); - fillParseContext(context, httpHeaders); - - logRequest(logger, info, metadata); - - return new StreamingOutput() { - public void write(OutputStream outputStream) throws IOException, WebApplicationException { - Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8); + public static String detectFilename(MultivaluedMap<String, String> httpHeaders) { - BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer)); + String disposition = httpHeaders.getFirst("Content-Disposition"); + if (disposition != null) { + try { + ContentDisposition c = new ContentDisposition(disposition); + + // only support "attachment" dispositions + if ("attachment".equals(c.getDisposition())) { + String fn = c.getParameter("filename"); + if (fn != null) { + return fn; + } + } + } catch (ParseException e) { + // not a valid content-disposition field + } + } - TikaInputStream tis = TikaInputStream.get(is); + // this really should not be used, since it's not an official field + return httpHeaders.getFirst("File-Name"); + } - try { - parse(parser, logger, info.getPath(), tis, body, metadata, context); - } finally { - tis.close(); + public static void fillParseContext(ParseContext parseContext, MultivaluedMap<String, String> httpHeaders) { + TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); + PDFParserConfig pdfParserConfig = new PDFParserConfig(); + for (String key : httpHeaders.keySet()) { + if (StringUtils.startsWith(key, X_TIKA_OCR_HEADER_PREFIX)) { + processHeaderConfig(httpHeaders, ocrConfig, key, X_TIKA_OCR_HEADER_PREFIX); + } else if (StringUtils.startsWith(key, X_TIKA_PDF_HEADER_PREFIX)) { + processHeaderConfig(httpHeaders, pdfParserConfig, key, X_TIKA_PDF_HEADER_PREFIX); + } } - } - }; - } - - @POST - @Consumes("multipart/form-data") - @Produces("text/html") - @Path("form") - public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final UriInfo info) { - return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "html"); - } - - @PUT - @Consumes("*/*") - @Produces("text/html") - public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { - return produceOutput(is, httpHeaders.getRequestHeaders(), info, "html"); - } - - @POST - @Consumes("multipart/form-data") - @Produces("text/xml") - @Path("form") - public StreamingOutput getXMLFromMultipart(Attachment att, @Context final UriInfo info) { - return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "xml"); - } - - @PUT - @Consumes("*/*") - @Produces("text/xml") - public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { - return produceOutput(is, httpHeaders.getRequestHeaders(), info, "xml"); - } - - private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders, - final UriInfo info, final String format) { - final AutoDetectParser parser = createParser(tikaConfig); - final Metadata metadata = new Metadata(); - final ParseContext context = new ParseContext(); - - fillMetadata(parser, metadata, context, httpHeaders); - fillParseContext(context, httpHeaders); - - - logRequest(logger, info, metadata); - - return new StreamingOutput() { - public void write(OutputStream outputStream) - throws IOException, WebApplicationException { - Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8); - ContentHandler content; + parseContext.set(TesseractOCRConfig.class, ocrConfig); + parseContext.set(PDFParserConfig.class, pdfParserConfig); + } + /** + * Utility method to set a property on a class via reflection. + * + * @param httpHeaders the HTTP headers set. + * @param object the <code>Object</code> to set the property on. + * @param key the key of the HTTP Header. + * @param prefix the name of the HTTP Header prefix used to find property. + * @throws WebApplicationException thrown when field cannot be found. + */ + private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) { try { - SAXTransformerFactory factory = (SAXTransformerFactory)SAXTransformerFactory.newInstance( ); - TransformerHandler handler = factory.newTransformerHandler( ); - handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format); - handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes"); - handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, IOUtils.UTF_8.name()); - handler.setResult(new StreamResult(writer)); - content = new ExpandedTitleContentHandler( handler ); + String property = StringUtils.removeStart(key, prefix); + Field field = object.getClass().getDeclaredField(StringUtils.uncapitalize(property)); + field.setAccessible(true); + if (field.getType() == String.class) { + field.set(object, httpHeaders.getFirst(key)); + } else if (field.getType() == int.class) { + field.setInt(object, Integer.parseInt(httpHeaders.getFirst(key))); + } else if (field.getType() == double.class) { + field.setDouble(object, Double.parseDouble(httpHeaders.getFirst(key))); + } else if (field.getType() == boolean.class) { + field.setBoolean(object, Boolean.parseBoolean(httpHeaders.getFirst(key))); + } + } catch (Throwable ex) { + throw new WebApplicationException(String.format(Locale.ROOT, + "%s is an invalid %s header", key, X_TIKA_OCR_HEADER_PREFIX)); } - catch ( TransformerConfigurationException e ) { - throw new WebApplicationException( e ); + } + + @SuppressWarnings("serial") + public static void fillMetadata(AutoDetectParser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) { + String fileName = detectFilename(httpHeaders); + if (fileName != null) { + metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName); } - TikaInputStream tis = TikaInputStream.get(is); + String contentTypeHeader = httpHeaders.getFirst(HttpHeaders.CONTENT_TYPE); + javax.ws.rs.core.MediaType mediaType = contentTypeHeader == null ? null + : javax.ws.rs.core.MediaType.valueOf(contentTypeHeader); + if (mediaType != null && "xml".equals(mediaType.getSubtype())) { + mediaType = null; + } - try { - parse(parser, logger, info.getPath(), tis, content, metadata, context); - } finally { - tis.close(); + if (mediaType != null && mediaType.equals(javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE)) { + mediaType = null; + } + + if (mediaType != null) { + metadata.add(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE, mediaType.toString()); + + final Detector detector = parser.getDetector(); + + parser.setDetector(new Detector() { + public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException { + String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE); + + if (ct != null) { + return MediaType.parse(ct); + } else { + return detector.detect(inputStream, metadata); + } + } + }); + } + + final String password = httpHeaders.getFirst("Password"); + if (password != null) { + context.set(PasswordProvider.class, new PasswordProvider() { + @Override + public String getPassword(Metadata metadata) { + return password; + } + }); } - } - }; - } + } public static void parse(Parser parser, Log logger, String path, InputStream inputStream, ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException { try { parser.parse(inputStream, handler, metadata, parseContext); } catch (SAXException e) { - throw new TikaServerParseException(e); + throw new TikaServerParseException(e); } catch (EncryptedDocumentException e) { logger.warn(String.format( - Locale.ROOT, - "%s: Encrypted document", - path - ), e); - throw new TikaServerParseException(e); + Locale.ROOT, + "%s: Encrypted document", + path + ), e); + throw new TikaServerParseException(e); } catch (Exception e) { logger.warn(String.format( Locale.ROOT, @@ -367,21 +249,139 @@ public static void fillMetadata(AutoDete } } + public static void logRequest(Log logger, UriInfo info, Metadata metadata) { + if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE) == null) { + logger.info(String.format( + Locale.ROOT, + "%s (autodetecting type)", + info.getPath() + )); + } else { + logger.info(String.format( + Locale.ROOT, + "%s (%s)", + info.getPath(), + metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE) + )); + } + } + + @GET + @Produces("text/plain") + public String getMessage() { + return GREETING; + } + + @POST + @Consumes("multipart/form-data") + @Produces("text/plain") + @Path("form") + public StreamingOutput getTextFromMultipart(Attachment att, @Context final UriInfo info) { + return produceText(att.getObject(InputStream.class), att.getHeaders(), info); + } + + @PUT + @Consumes("*/*") + @Produces("text/plain") + public StreamingOutput getText(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { + return produceText(is, httpHeaders.getRequestHeaders(), info); + } + + public StreamingOutput produceText(final InputStream is, MultivaluedMap<String, String> httpHeaders, final UriInfo info) { + final AutoDetectParser parser = createParser(tikaConfig); + final Metadata metadata = new Metadata(); + final ParseContext context = new ParseContext(); + + fillMetadata(parser, metadata, context, httpHeaders); + fillParseContext(context, httpHeaders); + + logRequest(logger, info, metadata); - public static void logRequest(Log logger, UriInfo info, Metadata metadata) { - if (metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE)==null) { - logger.info(String.format( - Locale.ROOT, - "%s (autodetecting type)", - info.getPath() - )); - } else { - logger.info(String.format( - Locale.ROOT, - "%s (%s)", - info.getPath(), - metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE) - )); + return new StreamingOutput() { + public void write(OutputStream outputStream) throws IOException, WebApplicationException { + Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8); + + BodyContentHandler body = new BodyContentHandler(new RichTextContentHandler(writer)); + + TikaInputStream tis = TikaInputStream.get(is); + + try { + parse(parser, logger, info.getPath(), tis, body, metadata, context); + } finally { + tis.close(); + } + } + }; + } + + @POST + @Consumes("multipart/form-data") + @Produces("text/html") + @Path("form") + public StreamingOutput getHTMLFromMultipart(Attachment att, @Context final UriInfo info) { + return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "html"); + } + + @PUT + @Consumes("*/*") + @Produces("text/html") + public StreamingOutput getHTML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { + return produceOutput(is, httpHeaders.getRequestHeaders(), info, "html"); + } + + @POST + @Consumes("multipart/form-data") + @Produces("text/xml") + @Path("form") + public StreamingOutput getXMLFromMultipart(Attachment att, @Context final UriInfo info) { + return produceOutput(att.getObject(InputStream.class), att.getHeaders(), info, "xml"); + } + + @PUT + @Consumes("*/*") + @Produces("text/xml") + public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHeaders, @Context final UriInfo info) { + return produceOutput(is, httpHeaders.getRequestHeaders(), info, "xml"); + } + + private StreamingOutput produceOutput(final InputStream is, final MultivaluedMap<String, String> httpHeaders, + final UriInfo info, final String format) { + final AutoDetectParser parser = createParser(tikaConfig); + final Metadata metadata = new Metadata(); + final ParseContext context = new ParseContext(); + + fillMetadata(parser, metadata, context, httpHeaders); + fillParseContext(context, httpHeaders); + + + logRequest(logger, info, metadata); + + return new StreamingOutput() { + public void write(OutputStream outputStream) + throws IOException, WebApplicationException { + Writer writer = new OutputStreamWriter(outputStream, IOUtils.UTF_8); + ContentHandler content; + + try { + SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); + TransformerHandler handler = factory.newTransformerHandler(); + handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format); + handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes"); + handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, IOUtils.UTF_8.name()); + handler.setResult(new StreamResult(writer)); + content = new ExpandedTitleContentHandler(handler); + } catch (TransformerConfigurationException e) { + throw new WebApplicationException(e); + } + + TikaInputStream tis = TikaInputStream.get(is); + + try { + parse(parser, logger, info.getPath(), tis, content, metadata, context); + } finally { + tis.close(); + } + } + }; } - } }
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java Fri Feb 20 19:29:42 2015 @@ -39,111 +39,111 @@ import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; public class TikaServerCli { - private static final Log logger = LogFactory.getLog(TikaServerCli.class); - public static final int DEFAULT_PORT = 9998; - public static final String DEFAULT_HOST = "localhost"; - public static final Set<String> LOG_LEVELS = - new HashSet<String>(Arrays.asList("debug", "info")); - - private static Options getOptions() { - Options options = new Options(); - options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ')'); - options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')'); - options.addOption("l", "log", true, "request URI log level ('debug' or 'info')"); - options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'"); - options.addOption("?", "help", false, "this help message"); - - return options; - } - - public static void main(String[] args) { - - logger.info("Starting " + new Tika().toString() + " server"); - - try { - Options options = getOptions(); - - CommandLineParser cliParser = new GnuParser(); - CommandLine line = cliParser.parse(options, args); - - if (line.hasOption("help")) { - HelpFormatter helpFormatter = new HelpFormatter(); - helpFormatter.printHelp("tikaserver", options); - System.exit(-1); - } - - String host = DEFAULT_HOST; - - if (line.hasOption("host")) { - host = line.getOptionValue("host"); - } - - int port = DEFAULT_PORT; - - if (line.hasOption("port")) { - port = Integer.valueOf(line.getOptionValue("port")); - } - - boolean returnStackTrace = false; - if (line.hasOption("includeStack")) { - returnStackTrace = true; - } - - TikaLoggingFilter logFilter = null; - if (line.hasOption("log")) { - String logLevel = line.getOptionValue("log"); - if (LOG_LEVELS.contains(logLevel)) { - boolean isInfoLevel = "info".equals(logLevel); - logFilter = new TikaLoggingFilter(isInfoLevel); - } else { - logger.info("Unsupported request URI log level: " + logLevel); + public static final int DEFAULT_PORT = 9998; + public static final String DEFAULT_HOST = "localhost"; + public static final Set<String> LOG_LEVELS = + new HashSet<String>(Arrays.asList("debug", "info")); + private static final Log logger = LogFactory.getLog(TikaServerCli.class); + + private static Options getOptions() { + Options options = new Options(); + options.addOption("h", "host", true, "host name (default = " + DEFAULT_HOST + ')'); + options.addOption("p", "port", true, "listen port (default = " + DEFAULT_PORT + ')'); + options.addOption("l", "log", true, "request URI log level ('debug' or 'info')"); + options.addOption("s", "includeStack", false, "whether or not to return a stack trace\nif there is an exception during 'parse'"); + options.addOption("?", "help", false, "this help message"); + + return options; + } + + public static void main(String[] args) { + + logger.info("Starting " + new Tika().toString() + " server"); + + try { + Options options = getOptions(); + + CommandLineParser cliParser = new GnuParser(); + CommandLine line = cliParser.parse(options, args); + + if (line.hasOption("help")) { + HelpFormatter helpFormatter = new HelpFormatter(); + helpFormatter.printHelp("tikaserver", options); + System.exit(-1); + } + + String host = DEFAULT_HOST; + + if (line.hasOption("host")) { + host = line.getOptionValue("host"); + } + + int port = DEFAULT_PORT; + + if (line.hasOption("port")) { + port = Integer.valueOf(line.getOptionValue("port")); + } + + boolean returnStackTrace = false; + if (line.hasOption("includeStack")) { + returnStackTrace = true; + } + + TikaLoggingFilter logFilter = null; + if (line.hasOption("log")) { + String logLevel = line.getOptionValue("log"); + if (LOG_LEVELS.contains(logLevel)) { + boolean isInfoLevel = "info".equals(logLevel); + logFilter = new TikaLoggingFilter(isInfoLevel); + } else { + logger.info("Unsupported request URI log level: " + logLevel); + } + } + // The Tika Configuration to use throughout + TikaConfig tika = TikaConfig.getDefaultConfig(); + + JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); + + List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>(); + rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaParsers(tika))); + rCoreProviders.add(new SingletonResourceProvider(new TikaVersion(tika))); + List<ResourceProvider> rAllProviders = new ArrayList<ResourceProvider>(rCoreProviders); + rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(tika, rCoreProviders))); + sf.setResourceProviders(rAllProviders); + + List<Object> providers = new ArrayList<Object>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new CSVMessageBodyWriter()); + providers.add(new MetadataListMessageBodyWriter()); + providers.add(new JSONMessageBodyWriter()); + providers.add(new XMPMessageBodyWriter()); + providers.add(new TextMessageBodyWriter()); + providers.add(new TikaServerParseExceptionMapper(returnStackTrace)); + if (logFilter != null) { + providers.add(logFilter); + } + sf.setProviders(providers); + + sf.setAddress("http://" + host + ":" + port + "/"); + BindingFactoryManager manager = sf.getBus().getExtension( + BindingFactoryManager.class); + JAXRSBindingFactory factory = new JAXRSBindingFactory(); + factory.setBus(sf.getBus()); + manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, + factory); + sf.create(); + logger.info("Started"); + } catch (Exception ex) { + logger.fatal("Can't start", ex); + System.exit(-1); } - } - // The Tika Configuration to use throughout - TikaConfig tika = TikaConfig.getDefaultConfig(); - - JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); - - List<ResourceProvider> rCoreProviders = new ArrayList<ResourceProvider>(); - rCoreProviders.add(new SingletonResourceProvider(new MetadataResource(tika))); - rCoreProviders.add(new SingletonResourceProvider(new RecursiveMetadataResource(tika))); - rCoreProviders.add(new SingletonResourceProvider(new DetectorResource(tika))); - rCoreProviders.add(new SingletonResourceProvider(new TikaResource(tika))); - rCoreProviders.add(new SingletonResourceProvider(new UnpackerResource(tika))); - rCoreProviders.add(new SingletonResourceProvider(new TikaMimeTypes(tika))); - rCoreProviders.add(new SingletonResourceProvider(new TikaDetectors(tika))); - rCoreProviders.add(new SingletonResourceProvider(new TikaParsers(tika))); - rCoreProviders.add(new SingletonResourceProvider(new TikaVersion(tika))); - List<ResourceProvider> rAllProviders = new ArrayList<ResourceProvider>(rCoreProviders); - rAllProviders.add(new SingletonResourceProvider(new TikaWelcome(tika, rCoreProviders))); - sf.setResourceProviders(rAllProviders); - - List<Object> providers = new ArrayList<Object>(); - providers.add(new TarWriter()); - providers.add(new ZipWriter()); - providers.add(new CSVMessageBodyWriter()); - providers.add(new MetadataListMessageBodyWriter()); - providers.add(new JSONMessageBodyWriter()); - providers.add(new XMPMessageBodyWriter()); - providers.add(new TextMessageBodyWriter()); - providers.add(new TikaServerParseExceptionMapper(returnStackTrace)); - if (logFilter != null) { - providers.add(logFilter); - } - sf.setProviders(providers); - - sf.setAddress("http://" + host + ":" + port + "/"); - BindingFactoryManager manager = sf.getBus().getExtension( - BindingFactoryManager.class); - JAXRSBindingFactory factory = new JAXRSBindingFactory(); - factory.setBus(sf.getBus()); - manager.registerBindingFactory(JAXRSBindingFactory.JAXRS_BINDING_ID, - factory); - sf.create(); - logger.info("Started"); - } catch (Exception ex) { - logger.fatal("Can't start", ex); - System.exit(-1); } - } } Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaServerParseExceptionMapper.java Fri Feb 20 19:29:42 2015 @@ -31,6 +31,7 @@ import org.apache.tika.exception.TikaExc @Provider public class TikaServerParseExceptionMapper implements ExceptionMapper<TikaServerParseException> { + private final boolean returnStack; public TikaServerParseExceptionMapper(boolean returnStack) { @@ -51,12 +52,12 @@ public class TikaServerParseExceptionMap //unsupported media type Throwable causeOfCause = cause.getCause(); if (causeOfCause instanceof WebApplicationException) { - return ((WebApplicationException)causeOfCause).getResponse(); + return ((WebApplicationException) causeOfCause).getResponse(); } return buildResponse(cause, 422); } else if (cause instanceof IllegalStateException) { return buildResponse(cause, 422); - } else if(cause instanceof OldWordFileFormatException) { + } else if (cause instanceof OldWordFileFormatException) { return buildResponse(cause, 422); } else if (cause instanceof WebApplicationException) { return ((WebApplicationException) e.getCause()).getResponse(); Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaVersion.java Fri Feb 20 19:29:42 2015 @@ -23,16 +23,17 @@ import javax.ws.rs.Produces; import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; -@Path("/version") -public class TikaVersion { - private Tika tika; - public TikaVersion(TikaConfig tika) { - this.tika = new Tika(tika); - } - - @GET - @Produces("text/plain") - public String getVersion() { - return tika.toString(); - } -} +@Path("/version") +public class TikaVersion { + private Tika tika; + + public TikaVersion(TikaConfig tika) { + this.tika = new Tika(tika); + } + + @GET + @Produces("text/plain") + public String getVersion() { + return tika.toString(); + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/TikaWelcome.java Fri Feb 20 19:29:42 2015 @@ -13,12 +13,21 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - */ -package org.apache.tika.server; - -import java.lang.annotation.Annotation; -import java.lang.reflect.Method; -import java.util.ArrayList; + */ +package org.apache.tika.server; + +import javax.ws.rs.DELETE; +import javax.ws.rs.GET; +import javax.ws.rs.HEAD; +import javax.ws.rs.OPTIONS; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; + +import java.lang.annotation.Annotation; +import java.lang.reflect.Method; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -26,83 +35,74 @@ import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import javax.ws.rs.DELETE; -import javax.ws.rs.GET; -import javax.ws.rs.HEAD; -import javax.ws.rs.OPTIONS; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; - -import org.apache.cxf.jaxrs.lifecycle.ResourceProvider; -import org.apache.tika.Tika; -import org.apache.tika.config.TikaConfig; -import sun.misc.Regexp; - -/** - * <p>Provides a basic welcome to the Apache Tika Server.</p> +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.cxf.jaxrs.lifecycle.ResourceProvider; +import org.apache.tika.Tika; +import org.apache.tika.config.TikaConfig; + +/** + * <p>Provides a basic welcome to the Apache Tika Server.</p> */ -@Path("/") -public class TikaWelcome { - private static final String DOCS_URL = "https://wiki.apache.org/tika/TikaJAXRS"; - - private static final Map<Class<? extends Annotation>, String> HTTP_METHODS = - new HashMap<Class<? extends Annotation>, String>(); - static { - HTTP_METHODS.put(DELETE.class , "DELETE"); - HTTP_METHODS.put(GET.class, "GET"); - HTTP_METHODS.put(HEAD.class, "HEAD"); - HTTP_METHODS.put(OPTIONS.class, "OPTIONS"); - HTTP_METHODS.put(POST.class, "POST"); - HTTP_METHODS.put(PUT.class, "PUT"); - } - - private Tika tika; - private HTMLHelper html; - private List<Class<?>> endpoints = new LinkedList<Class<?>>(); - - public TikaWelcome(TikaConfig tika, List<ResourceProvider> rCoreProviders) { - this.tika = new Tika(tika); - this.html = new HTMLHelper(); +@Path("/") +public class TikaWelcome { + private static final String DOCS_URL = "https://wiki.apache.org/tika/TikaJAXRS"; + + private static final Map<Class<? extends Annotation>, String> HTTP_METHODS = + new HashMap<Class<? extends Annotation>, String>(); + + static { + HTTP_METHODS.put(DELETE.class, "DELETE"); + HTTP_METHODS.put(GET.class, "GET"); + HTTP_METHODS.put(HEAD.class, "HEAD"); + HTTP_METHODS.put(OPTIONS.class, "OPTIONS"); + HTTP_METHODS.put(POST.class, "POST"); + HTTP_METHODS.put(PUT.class, "PUT"); + } + + private Tika tika; + private HTMLHelper html; + private List<Class<?>> endpoints = new LinkedList<Class<?>>(); + + public TikaWelcome(TikaConfig tika, List<ResourceProvider> rCoreProviders) { + this.tika = new Tika(tika); + this.html = new HTMLHelper(); for (ResourceProvider rp : rCoreProviders) { - this.endpoints.add(rp.getResourceClass()); - } - } - - protected List<Endpoint> identifyEndpoints() { - List<Endpoint> found = new ArrayList<Endpoint>(); - for (Class<?> endpoint : endpoints) { + this.endpoints.add(rp.getResourceClass()); + } + } + + protected List<Endpoint> identifyEndpoints() { + List<Endpoint> found = new ArrayList<Endpoint>(); + for (Class<?> endpoint : endpoints) { Path p = endpoint.getAnnotation(Path.class); String basePath = null; if (p != null) basePath = p.value(); for (Method m : endpoint.getMethods()) { - String httpMethod = null; - String methodPath = null; - String[] produces = null; - - for (Annotation a : m.getAnnotations()) { - for (Class<? extends Annotation> httpMethAnn : HTTP_METHODS.keySet()) { - if (httpMethAnn.isInstance(a)) { + String httpMethod = null; + String methodPath = null; + String[] produces = null; + + for (Annotation a : m.getAnnotations()) { + for (Class<? extends Annotation> httpMethAnn : HTTP_METHODS.keySet()) { + if (httpMethAnn.isInstance(a)) { httpMethod = HTTP_METHODS.get(httpMethAnn); - } - } - if (a instanceof Path) { - methodPath = ((Path)a).value(); - } - if (a instanceof Produces) { - produces = ((Produces)a).value(); - } - } - - if (httpMethod != null) { - String mPath = basePath; - if (mPath == null) { + } + } + if (a instanceof Path) { + methodPath = ((Path) a).value(); + } + if (a instanceof Produces) { + produces = ((Produces) a).value(); + } + } + + if (httpMethod != null) { + String mPath = basePath; + if (mPath == null) { mPath = ""; } if (methodPath != null) { @@ -124,21 +124,21 @@ public class TikaWelcome { } return res; } - }); - return found; - } - - @GET - @Produces("text/html") - public String getWelcomeHTML() { + }); + return found; + } + + @GET + @Produces("text/html") + public String getWelcomeHTML() { StringBuffer h = new StringBuffer(); - String tikaVersion = tika.toString(); - - html.generateHeader(h, "Welcome to the " + tikaVersion + " Server"); - - h.append("<p>For endpoints, please see <a href=\""); - h.append(DOCS_URL); - h.append("\">"); + String tikaVersion = tika.toString(); + + html.generateHeader(h, "Welcome to the " + tikaVersion + " Server"); + + h.append("<p>For endpoints, please see <a href=\""); + h.append(DOCS_URL); + h.append("\">"); h.append(DOCS_URL); h.append("</a>"); @@ -184,19 +184,19 @@ public class TikaWelcome { } @GET - @Produces("text/plain") - public String getWelcomePlain() { - StringBuffer text = new StringBuffer(); - - text.append(tika.toString()); - text.append("\n"); - text.append("For endpoints, please see "); - text.append(DOCS_URL); - text.append("\n\n"); - - for (Endpoint e : identifyEndpoints()) { - text.append(e.httpMethod); - text.append(" "); + @Produces("text/plain") + public String getWelcomePlain() { + StringBuffer text = new StringBuffer(); + + text.append(tika.toString()); + text.append("\n"); + text.append("For endpoints, please see "); + text.append(DOCS_URL); + text.append("\n\n"); + + for (Endpoint e : identifyEndpoints()) { + text.append(e.httpMethod); + text.append(" "); text.append(e.path); text.append("\n"); for (String produces : e.produces) { @@ -205,19 +205,20 @@ public class TikaWelcome { text.append("\n"); } } - - return text.toString(); - } - - protected class Endpoint { - public final String className; - public final String methodName; - public final String path; - public final String httpMethod; - public final List<String> produces; - protected Endpoint(Class<?> endpoint, Method method, String path, - String httpMethod, String[] produces) { - this.className = endpoint.getCanonicalName(); + + return text.toString(); + } + + protected class Endpoint { + public final String className; + public final String methodName; + public final String path; + public final String httpMethod; + public final List<String> produces; + + protected Endpoint(Class<?> endpoint, Method method, String path, + String httpMethod, String[] produces) { + this.className = endpoint.getCanonicalName(); this.methodName = method.getName(); this.path = path; this.httpMethod = httpMethod; Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/UnpackerResource.java Fri Feb 20 19:29:42 2015 @@ -65,199 +65,200 @@ import org.xml.sax.helpers.DefaultHandle @Path("/unpack") public class UnpackerResource { - private static final Log logger = LogFactory.getLog(UnpackerResource.class); - public static final String TEXT_FILENAME = "__TEXT__"; - private static final String META_FILENAME = "__METADATA__"; + public static final String TEXT_FILENAME = "__TEXT__"; + private static final Log logger = LogFactory.getLog(UnpackerResource.class); + private static final String META_FILENAME = "__METADATA__"; - private TikaConfig tikaConfig; - public UnpackerResource(TikaConfig tikaConfig) { - this.tikaConfig = tikaConfig; - } + private TikaConfig tikaConfig; - @Path("/{id:(/.*)?}") - @PUT - @Produces({"application/zip", "application/x-tar"}) - public Map<String, byte[]> unpack( - InputStream is, - @Context HttpHeaders httpHeaders, - @Context UriInfo info - ) throws Exception { - return process(is, httpHeaders, info, false); - } - - @Path("/all{id:(/.*)?}") - @PUT - @Produces({"application/zip", "application/x-tar"}) - public Map<String, byte[]> unpackAll( - InputStream is, - @Context HttpHeaders httpHeaders, - @Context UriInfo info - ) throws Exception { - return process(is, httpHeaders, info, true); - } - - private Map<String, byte[]> process( - InputStream is, - @Context HttpHeaders httpHeaders, - @Context UriInfo info, - boolean saveAll - ) throws Exception { - Metadata metadata = new Metadata(); - ParseContext pc = new ParseContext(); - - AutoDetectParser parser = TikaResource.createParser(tikaConfig); - - TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders()); - TikaResource.logRequest(logger, info, metadata); - - ContentHandler ch; - ByteArrayOutputStream text = new ByteArrayOutputStream(); - - if (saveAll) { - ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, org.apache.tika.io.IOUtils.UTF_8))); - } else { - ch = new DefaultHandler(); + public UnpackerResource(TikaConfig tikaConfig) { + this.tikaConfig = tikaConfig; } - Map<String, byte[]> files = new HashMap<String, byte[]>(); - MutableInt count = new MutableInt(); + public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException { + CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, org.apache.tika.io.IOUtils.UTF_8)); - pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files)); - TikaResource.parse(parser, logger, info.getPath(), is, ch, metadata, pc); + for (String name : metadata.names()) { + String[] values = metadata.getValues(name); + ArrayList<String> list = new ArrayList<String>(values.length + 1); + list.add(name); + list.addAll(Arrays.asList(values)); + writer.writeNext(list.toArray(values)); + } - if (count.intValue() == 0 && !saveAll) { - throw new WebApplicationException(Response.Status.NO_CONTENT); + writer.close(); } - if (saveAll) { - files.put(TEXT_FILENAME, text.toByteArray()); + @Path("/{id:(/.*)?}") + @PUT + @Produces({"application/zip", "application/x-tar"}) + public Map<String, byte[]> unpack( + InputStream is, + @Context HttpHeaders httpHeaders, + @Context UriInfo info + ) throws Exception { + return process(is, httpHeaders, info, false); + } + + @Path("/all{id:(/.*)?}") + @PUT + @Produces({"application/zip", "application/x-tar"}) + public Map<String, byte[]> unpackAll( + InputStream is, + @Context HttpHeaders httpHeaders, + @Context UriInfo info + ) throws Exception { + return process(is, httpHeaders, info, true); + } + + private Map<String, byte[]> process( + InputStream is, + @Context HttpHeaders httpHeaders, + @Context UriInfo info, + boolean saveAll + ) throws Exception { + Metadata metadata = new Metadata(); + ParseContext pc = new ParseContext(); + + AutoDetectParser parser = TikaResource.createParser(tikaConfig); - ByteArrayOutputStream metaStream = new ByteArrayOutputStream(); - metadataToCsv(metadata, metaStream); + TikaResource.fillMetadata(parser, metadata, pc, httpHeaders.getRequestHeaders()); + TikaResource.logRequest(logger, info, metadata); - files.put(META_FILENAME, metaStream.toByteArray()); - } - - return files; - } + ContentHandler ch; + ByteArrayOutputStream text = new ByteArrayOutputStream(); - public static void metadataToCsv(Metadata metadata, OutputStream outputStream) throws IOException { - CSVWriter writer = new CSVWriter(new OutputStreamWriter(outputStream, org.apache.tika.io.IOUtils.UTF_8)); - - for (String name : metadata.names()) { - String[] values = metadata.getValues(name); - ArrayList<String> list = new ArrayList<String>(values.length+1); - list.add(name); - list.addAll(Arrays.asList(values)); - writer.writeNext(list.toArray(values)); - } + if (saveAll) { + ch = new BodyContentHandler(new RichTextContentHandler(new OutputStreamWriter(text, org.apache.tika.io.IOUtils.UTF_8))); + } else { + ch = new DefaultHandler(); + } - writer.close(); - } + Map<String, byte[]> files = new HashMap<String, byte[]>(); + MutableInt count = new MutableInt(); - private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor { - private final MutableInt count; - private final Map<String, byte[]> zout; + pc.set(EmbeddedDocumentExtractor.class, new MyEmbeddedDocumentExtractor(count, files)); + TikaResource.parse(parser, logger, info.getPath(), is, ch, metadata, pc); - MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) { - this.count = count; - this.zout = zout; - } + if (count.intValue() == 0 && !saveAll) { + throw new WebApplicationException(Response.Status.NO_CONTENT); + } - public boolean shouldParseEmbedded(Metadata metadata) { - return true; - } + if (saveAll) { + files.put(TEXT_FILENAME, text.toByteArray()); - public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b) throws SAXException, IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(inputStream, bos); - byte[] data = bos.toByteArray(); + ByteArrayOutputStream metaStream = new ByteArrayOutputStream(); + metadataToCsv(metadata, metaStream); - String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY); - String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE); + files.put(META_FILENAME, metaStream.toByteArray()); + } - if (name == null) { - name = Integer.toString(count.intValue()); - } + return files; + } - if (!name.contains(".") && contentType!=null) { - try { - String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension(); + private class MyEmbeddedDocumentExtractor implements EmbeddedDocumentExtractor { + private final MutableInt count; + private final Map<String, byte[]> zout; - if (ext!=null) { - name += ext; - } - } catch (MimeTypeException e) { - logger.warn("Unexpected MimeTypeException", e); + MyEmbeddedDocumentExtractor(MutableInt count, Map<String, byte[]> zout) { + this.count = count; + this.zout = zout; } - } - if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) { - POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data)); - OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs); - - if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) { - try { - Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs); - if (ole.getDataSize()>0) { - String label = ole.getLabel(); + public boolean shouldParseEmbedded(Metadata metadata) { + return true; + } - if (label.startsWith("ole-")) { - label = Integer.toString(count.intValue()) + '-' + label; - } + public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean b) throws SAXException, IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(inputStream, bos); + byte[] data = bos.toByteArray(); - name = label; + String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY); + String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE); - data = ole.getDataBuffer(); + if (name == null) { + name = Integer.toString(count.intValue()); } - } catch (Ole10NativeException ex) { - logger.warn("Skipping invalid part", ex); - } - } else { - name += '.' + type.getExtension(); - } - } - final String finalName = name; + if (!name.contains(".") && contentType != null) { + try { + String ext = tikaConfig.getMimeRepository().forName(contentType).getExtension(); + + if (ext != null) { + name += ext; + } + } catch (MimeTypeException e) { + logger.warn("Unexpected MimeTypeException", e); + } + } - if (data.length > 0) { - zout.put(finalName, data); + if ("application/vnd.openxmlformats-officedocument.oleObject".equals(contentType)) { + POIFSFileSystem poifs = new POIFSFileSystem(new ByteArrayInputStream(data)); + OfficeParser.POIFSDocumentType type = OfficeParser.POIFSDocumentType.detectType(poifs); + + if (type == OfficeParser.POIFSDocumentType.OLE10_NATIVE) { + try { + Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs); + if (ole.getDataSize() > 0) { + String label = ole.getLabel(); + + if (label.startsWith("ole-")) { + label = Integer.toString(count.intValue()) + '-' + label; + } + + name = label; + + data = ole.getDataBuffer(); + } + } catch (Ole10NativeException ex) { + logger.warn("Skipping invalid part", ex); + } + } else { + name += '.' + type.getExtension(); + } + } - count.increment(); - } else { - if (inputStream instanceof TikaInputStream) { - TikaInputStream tin = (TikaInputStream) inputStream; + final String finalName = name; - if (tin.getOpenContainer()!=null && tin.getOpenContainer() instanceof DirectoryEntry) { - POIFSFileSystem fs = new POIFSFileSystem(); - copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot()); - ByteArrayOutputStream bos2 = new ByteArrayOutputStream(); - fs.writeFilesystem(bos2); - bos2.close(); + if (data.length > 0) { + zout.put(finalName, data); - zout.put(finalName, bos2.toByteArray()); - } + count.increment(); + } else { + if (inputStream instanceof TikaInputStream) { + TikaInputStream tin = (TikaInputStream) inputStream; + + if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) { + POIFSFileSystem fs = new POIFSFileSystem(); + copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot()); + ByteArrayOutputStream bos2 = new ByteArrayOutputStream(); + fs.writeFilesystem(bos2); + bos2.close(); + + zout.put(finalName, bos2.toByteArray()); + } + } + } } - } - } - protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir) - throws IOException { - for (Entry entry : sourceDir) { - if (entry instanceof DirectoryEntry) { - // Need to recurse - DirectoryEntry newDir = destDir.createDirectory(entry.getName()); - copy((DirectoryEntry) entry, newDir); - } else { - // Copy entry - InputStream contents = new DocumentInputStream((DocumentEntry) entry); - try { - destDir.createDocument(entry.getName(), contents); - } finally { - contents.close(); - } + protected void copy(DirectoryEntry sourceDir, DirectoryEntry destDir) + throws IOException { + for (Entry entry : sourceDir) { + if (entry instanceof DirectoryEntry) { + // Need to recurse + DirectoryEntry newDir = destDir.createDirectory(entry.getName()); + copy((DirectoryEntry) entry, newDir); + } else { + // Copy entry + InputStream contents = new DocumentInputStream((DocumentEntry) entry); + try { + destDir.createDocument(entry.getName(), contents); + } finally { + contents.close(); + } + } + } } - } } - } } Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/XMPMessageBodyWriter.java Fri Feb 20 19:29:42 2015 @@ -49,19 +49,19 @@ public class XMPMessageBodyWriter implem public long getSize(Metadata data, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { return -1; } - - @Override - public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, - MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, - WebApplicationException { - try { - Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8); - XMPMetadata xmp = new XMPMetadata(metadata); - writer.write(xmp.toString()); - writer.flush(); - } catch (TikaException e) { - throw new IOException(e); - } - entityStream.flush(); - } -} + + @Override + public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, + MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, + WebApplicationException { + try { + Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8); + XMPMetadata xmp = new XMPMetadata(metadata); + writer.write(xmp.toString()); + writer.flush(); + } catch (TikaException e) { + throw new IOException(e); + } + entityStream.flush(); + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/ZipWriter.java Fri Feb 20 19:29:42 2015 @@ -14,72 +14,73 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.tika.server; - -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; -import java.io.IOException; -import java.io.OutputStream; -import java.lang.annotation.Annotation; + +package org.apache.tika.server; + +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyWriter; +import javax.ws.rs.ext.Provider; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.annotation.Annotation; import java.lang.reflect.Type; import java.util.Map; import java.util.UUID; import java.util.zip.CRC32; -import java.util.zip.ZipEntry; -import java.util.zip.ZipException; -import java.util.zip.ZipOutputStream; - -@Provider -@Produces("application/zip") -public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> { - private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { - ZipEntry zipEntry = new ZipEntry(name!=null?name: UUID.randomUUID().toString()); - zipEntry.setMethod(ZipOutputStream.STORED); - - zipEntry.setSize(dataBuffer.length); - CRC32 crc32 = new CRC32(); - crc32.update(dataBuffer); - zipEntry.setCrc(crc32.getValue()); - - try { - zip.putArchiveEntry(new ZipArchiveEntry(zipEntry)); - } catch (ZipException ex) { - if (name!=null) { - zipStoreBuffer(zip, "x-"+name, dataBuffer); - return; - } - } - - zip.write(dataBuffer); - - zip.closeArchiveEntry(); - } - - public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return Map.class.isAssignableFrom(type); - } - - public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream); - - zip.setMethod(ZipArchiveOutputStream.STORED); - - for (Map.Entry<String, byte[]> entry : parts.entrySet()) { - zipStoreBuffer(zip, entry.getKey(), entry.getValue()); - } - - zip.close(); - } -} +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipOutputStream; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +@Provider +@Produces("application/zip") +public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> { + private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { + ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString()); + zipEntry.setMethod(ZipOutputStream.STORED); + + zipEntry.setSize(dataBuffer.length); + CRC32 crc32 = new CRC32(); + crc32.update(dataBuffer); + zipEntry.setCrc(crc32.getValue()); + + try { + zip.putArchiveEntry(new ZipArchiveEntry(zipEntry)); + } catch (ZipException ex) { + if (name != null) { + zipStoreBuffer(zip, "x-" + name, dataBuffer); + return; + } + } + + zip.write(dataBuffer); + + zip.closeArchiveEntry(); + } + + public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return Map.class.isAssignableFrom(type); + } + + public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return -1; + } + + public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { + ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream); + + zip.setMethod(ZipArchiveOutputStream.STORED); + + for (Map.Entry<String, byte[]> entry : parts.entrySet()) { + zipStoreBuffer(zip, entry.getKey(), entry.getValue()); + } + + zip.close(); + } +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java Fri Feb 20 19:29:42 2015 @@ -42,115 +42,21 @@ import org.apache.cxf.jaxrs.JAXRSServerF import org.apache.tika.config.TikaConfig; import org.apache.tika.io.IOUtils; import org.junit.After; -import org.junit.Before; - -public abstract class CXFTestBase { - protected static final String endPoint = - "http://localhost:" + TikaServerCli.DEFAULT_PORT; - protected Server server; - protected TikaConfig tika; - - @Before - public void setUp() { - this.tika = TikaConfig.getDefaultConfig(); - - JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); - setUpResources(sf); - setUpProviders(sf); - sf.setAddress(endPoint + "/"); - - BindingFactoryManager manager = sf.getBus().getExtension( - BindingFactoryManager.class - ); - - JAXRSBindingFactory factory = new JAXRSBindingFactory(); - factory.setBus(sf.getBus()); - - manager.registerBindingFactory( - JAXRSBindingFactory.JAXRS_BINDING_ID, - factory - ); - - server = sf.create(); - } - - /** - * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)} - * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)} - */ - protected abstract void setUpResources(JAXRSServerFactoryBean sf); - /** - * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed - */ - protected abstract void setUpProviders(JAXRSServerFactoryBean sf); - - @After - public void tearDown() throws Exception { - server.stop(); - server.destroy(); - } - - public static void assertContains(String needle, String haystack) { - assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle)); - } - public static void assertNotFound(String needle, String haystack) { - assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle)); - } - - protected String getStringFromInputStream(InputStream in) throws Exception { - return IOUtils.toString(in); - } - - protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException { - Map<String, String> data = new HashMap<String, String>(); - File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); - ZipFile zip = new ZipFile(tempFile); - Enumeration<ZipArchiveEntry> entries = zip.getEntries(); - while (entries.hasMoreElements()) { - ZipArchiveEntry entry = entries.nextElement(); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip.getInputStream(entry), bos); - data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); - } - - zip.close(); - tempFile.delete(); - return data; - } - - protected String readArchiveText(InputStream inputStream) throws IOException { - File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); - ZipFile zip = new ZipFile(tempFile); - zip.getEntry(UnpackerResource.TEXT_FILENAME); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos); - - zip.close(); - tempFile.delete(); - return bos.toString(IOUtils.UTF_8.name()); - } - - protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException { - Map<String, String> data = new HashMap<String, String>(); - while (true) { - ArchiveEntry entry = zip.getNextEntry(); - if (entry == null) { - break; - } - - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip, bos); - data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); - } - - return data; - } - - private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException { - File tempFile = File.createTempFile("tmp-", "." + archiveType); - IOUtils.copy(inputStream, new FileOutputStream(tempFile)); - return tempFile; - } +import org.junit.Before; + +public abstract class CXFTestBase { + protected static final String endPoint = + "http://localhost:" + TikaServerCli.DEFAULT_PORT; + protected Server server; + protected TikaConfig tika; + + public static void assertContains(String needle, String haystack) { + assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle)); + } + + public static void assertNotFound(String needle, String haystack) { + assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle)); + } protected static InputStream copy(InputStream in, int remaining) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); @@ -166,4 +72,100 @@ public abstract class CXFTestBase { return new ByteArrayInputStream(out.toByteArray()); } + @Before + public void setUp() { + this.tika = TikaConfig.getDefaultConfig(); + + JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); + setUpResources(sf); + setUpProviders(sf); + sf.setAddress(endPoint + "/"); + + BindingFactoryManager manager = sf.getBus().getExtension( + BindingFactoryManager.class + ); + + JAXRSBindingFactory factory = new JAXRSBindingFactory(); + factory.setBus(sf.getBus()); + + manager.registerBindingFactory( + JAXRSBindingFactory.JAXRS_BINDING_ID, + factory + ); + + server = sf.create(); + } + + /** + * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)} + * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)} + */ + protected abstract void setUpResources(JAXRSServerFactoryBean sf); + + /** + * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed + */ + protected abstract void setUpProviders(JAXRSServerFactoryBean sf); + + @After + public void tearDown() throws Exception { + server.stop(); + server.destroy(); + } + + protected String getStringFromInputStream(InputStream in) throws Exception { + return IOUtils.toString(in); + } + + protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException { + Map<String, String> data = new HashMap<String, String>(); + File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); + ZipFile zip = new ZipFile(tempFile); + Enumeration<ZipArchiveEntry> entries = zip.getEntries(); + while (entries.hasMoreElements()) { + ZipArchiveEntry entry = entries.nextElement(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip.getInputStream(entry), bos); + data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); + } + + zip.close(); + tempFile.delete(); + return data; + } + + protected String readArchiveText(InputStream inputStream) throws IOException { + File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); + ZipFile zip = new ZipFile(tempFile); + zip.getEntry(UnpackerResource.TEXT_FILENAME); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos); + + zip.close(); + tempFile.delete(); + return bos.toString(IOUtils.UTF_8.name()); + } + + protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException { + Map<String, String> data = new HashMap<String, String>(); + while (true) { + ArchiveEntry entry = zip.getNextEntry(); + if (entry == null) { + break; + } + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip, bos); + data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); + } + + return data; + } + + private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException { + File tempFile = File.createTempFile("tmp-", "." + archiveType); + IOUtils.copy(inputStream, new FileOutputStream(tempFile)); + return tempFile; + } + } Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java?rev=1661200&r1=1661199&r2=1661200&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java Fri Feb 20 19:29:42 2015 @@ -17,88 +17,88 @@ package org.apache.tika.server; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import javax.ws.rs.core.Response; - -import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; -import org.apache.cxf.jaxrs.client.WebClient; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import javax.ws.rs.core.Response; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; import org.junit.Test; - -public class DetectorResourceTest extends CXFTestBase { - - private static final String DETECT_PATH = "/detect"; - private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream"; - private static final String FOO_CSV = "foo.csv"; - private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02"; - - @Override - protected void setUpResources(JAXRSServerFactoryBean sf) { - sf.setResourceClasses(DetectorResource.class); - sf.setResourceProvider(DetectorResource.class, - new SingletonResourceProvider(new DetectorResource(tika))); - - } - - @Override - protected void setUpProviders(JAXRSServerFactoryBean sf) { - List<Object> providers = new ArrayList<Object>(); - providers.add(new TarWriter()); - providers.add(new ZipWriter()); - providers.add(new TikaServerParseExceptionMapper(false)); - sf.setProviders(providers); - } - - @Test - public void testDetectCsvWithExt() throws Exception { - String url = endPoint + DETECT_STREAM_PATH; - Response response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + FOO_CSV) - .put(ClassLoader.getSystemResourceAsStream(FOO_CSV)); - assertNotNull(response); - String readMime = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("text/csv", readMime); - - } - - @Test - public void testDetectCsvNoExt() throws Exception { - String url = endPoint + DETECT_STREAM_PATH; - Response response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + CDEC_CSV_NO_EXT) - .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); - assertNotNull(response); - String readMime = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("text/plain", readMime); - - // now trick it by adding .csv to the end - response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv") - .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); - assertNotNull(response); - readMime = getStringFromInputStream((InputStream) response.getEntity()); - assertEquals("text/csv", readMime); - - } -} +public class DetectorResourceTest extends CXFTestBase { + + private static final String DETECT_PATH = "/detect"; + private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream"; + private static final String FOO_CSV = "foo.csv"; + private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02"; + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + sf.setResourceClasses(DetectorResource.class); + sf.setResourceProvider(DetectorResource.class, + new SingletonResourceProvider(new DetectorResource(tika))); + + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + + } + + @Test + public void testDetectCsvWithExt() throws Exception { + String url = endPoint + DETECT_STREAM_PATH; + Response response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + FOO_CSV) + .put(ClassLoader.getSystemResourceAsStream(FOO_CSV)); + assertNotNull(response); + String readMime = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("text/csv", readMime); + + } + + @Test + public void testDetectCsvNoExt() throws Exception { + String url = endPoint + DETECT_STREAM_PATH; + Response response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + CDEC_CSV_NO_EXT) + .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); + assertNotNull(response); + String readMime = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("text/plain", readMime); + + // now trick it by adding .csv to the end + response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv") + .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); + assertNotNull(response); + readMime = getStringFromInputStream((InputStream) response.getEntity()); + assertEquals("text/csv", readMime); + + } +}