Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaMimeTypes.java Wed May 13 13:49:36 2015 @@ -1,177 +1,177 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.server.resource; - -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; - -import org.apache.tika.config.TikaConfig; -import org.apache.tika.mime.MediaType; -import org.apache.tika.mime.MediaTypeRegistry; -import org.apache.tika.parser.CompositeParser; -import org.apache.tika.parser.Parser; -import org.apache.tika.server.HTMLHelper; -import org.eclipse.jetty.util.ajax.JSON; - -/** - * <p>Provides details of all the mimetypes known to Apache Tika, - * similar to <em>--list-supported-types</em> with the Tika CLI. - */ -@Path("/mime-types") -public class TikaMimeTypes { - private TikaConfig tika; - private HTMLHelper html; - - public TikaMimeTypes(TikaConfig tika) { - this.tika = tika; - this.html = new HTMLHelper(); - } - - @GET - @Produces("text/html") - public String getMimeTypesHTML() { - StringBuffer h = new StringBuffer(); - html.generateHeader(h, "Apache Tika Supported Mime Types"); - - // Get our types - List<MediaTypeDetails> types = getMediaTypes(); - - // Get the first type in each section - SortedMap<String, String> firstType = new TreeMap<String, String>(); - for (MediaTypeDetails type : types) { - if (!firstType.containsKey(type.type.getType())) { - firstType.put(type.type.getType(), type.type.toString()); - } - } - h.append("<ul>"); - for (String section : firstType.keySet()) { - h.append("<li><a href=\"#").append(firstType.get(section)).append("\">").append(section).append("</a></li>\n"); - } - h.append("</ul>"); - - // Output all of them - for (MediaTypeDetails type : types) { - h.append("<a name=\"").append(type.type).append("\"></a>\n"); - h.append("<h2>").append(type.type).append("</h2>\n"); - - for (MediaType alias : type.aliases) { - h.append("<div>Alias: ").append(alias).append("</div>\n"); - } - if (type.supertype != null) { - h.append("<div>Super Type: <a href=\"#").append(type.supertype).append("\">").append(type.supertype).append("</a></div>\n"); - } - - if (type.parser != null) { - h.append("<div>Parser: ").append(type.parser).append("</div>\n"); - } - } - - html.generateFooter(h); - return h.toString(); - } - - @GET - @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) - public String getMimeTypesJSON() { - Map<String, Object> details = new HashMap<String, Object>(); - - for (MediaTypeDetails type : getMediaTypes()) { - Map<String, Object> typeDets = new HashMap<String, Object>(); - - typeDets.put("alias", type.aliases); - if (type.supertype != null) { - typeDets.put("supertype", type.supertype); - } - if (type.parser != null) { - typeDets.put("parser", type.parser); - } - - details.put(type.type.toString(), typeDets); - } - - return JSON.toString(details); - } - - @GET - @Produces("text/plain") - public String getMimeTypesPlain() { - StringBuffer text = new StringBuffer(); - - for (MediaTypeDetails type : getMediaTypes()) { - text.append(type.type.toString()); - text.append("\n"); - - for (MediaType alias : type.aliases) { - text.append(" alias: ").append(alias).append("\n"); - } - if (type.supertype != null) { - text.append(" supertype: ").append(type.supertype.toString()).append("\n"); - } - - if (type.parser != null) { - text.append(" parser: ").append(type.parser).append("\n"); - } - } - - return text.toString(); - } - - protected List<MediaTypeDetails> getMediaTypes() { - MediaTypeRegistry registry = tika.getMediaTypeRegistry(); - Map<MediaType, Parser> parsers = ((CompositeParser) tika.getParser()).getParsers(); - List<MediaTypeDetails> types = - new ArrayList<TikaMimeTypes.MediaTypeDetails>(registry.getTypes().size()); - - for (MediaType type : registry.getTypes()) { - MediaTypeDetails details = new MediaTypeDetails(); - details.type = type; - details.aliases = registry.getAliases(type).toArray(new MediaType[0]); - - MediaType supertype = registry.getSupertype(type); - if (supertype != null && !MediaType.OCTET_STREAM.equals(supertype)) { - details.supertype = supertype; - } - - Parser p = parsers.get(type); - if (p != null) { - if (p instanceof CompositeParser) { - p = ((CompositeParser) p).getParsers().get(type); - } - details.parser = p.getClass().getName(); - } - - types.add(details); - } - - return types; - } - - private static class MediaTypeDetails { - private MediaType type; - private MediaType[] aliases; - private MediaType supertype; - private String parser; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.server.resource; + +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.tika.config.TikaConfig; +import org.apache.tika.mime.MediaType; +import org.apache.tika.mime.MediaTypeRegistry; +import org.apache.tika.parser.CompositeParser; +import org.apache.tika.parser.Parser; +import org.apache.tika.server.HTMLHelper; +import org.eclipse.jetty.util.ajax.JSON; + +/** + * <p>Provides details of all the mimetypes known to Apache Tika, + * similar to <em>--list-supported-types</em> with the Tika CLI. + */ +@Path("/mime-types") +public class TikaMimeTypes { + private TikaConfig tika; + private HTMLHelper html; + + public TikaMimeTypes(TikaConfig tika) { + this.tika = tika; + this.html = new HTMLHelper(); + } + + @GET + @Produces("text/html") + public String getMimeTypesHTML() { + StringBuffer h = new StringBuffer(); + html.generateHeader(h, "Apache Tika Supported Mime Types"); + + // Get our types + List<MediaTypeDetails> types = getMediaTypes(); + + // Get the first type in each section + SortedMap<String, String> firstType = new TreeMap<String, String>(); + for (MediaTypeDetails type : types) { + if (!firstType.containsKey(type.type.getType())) { + firstType.put(type.type.getType(), type.type.toString()); + } + } + h.append("<ul>"); + for (String section : firstType.keySet()) { + h.append("<li><a href=\"#").append(firstType.get(section)).append("\">").append(section).append("</a></li>\n"); + } + h.append("</ul>"); + + // Output all of them + for (MediaTypeDetails type : types) { + h.append("<a name=\"").append(type.type).append("\"></a>\n"); + h.append("<h2>").append(type.type).append("</h2>\n"); + + for (MediaType alias : type.aliases) { + h.append("<div>Alias: ").append(alias).append("</div>\n"); + } + if (type.supertype != null) { + h.append("<div>Super Type: <a href=\"#").append(type.supertype).append("\">").append(type.supertype).append("</a></div>\n"); + } + + if (type.parser != null) { + h.append("<div>Parser: ").append(type.parser).append("</div>\n"); + } + } + + html.generateFooter(h); + return h.toString(); + } + + @GET + @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) + public String getMimeTypesJSON() { + Map<String, Object> details = new HashMap<String, Object>(); + + for (MediaTypeDetails type : getMediaTypes()) { + Map<String, Object> typeDets = new HashMap<String, Object>(); + + typeDets.put("alias", type.aliases); + if (type.supertype != null) { + typeDets.put("supertype", type.supertype); + } + if (type.parser != null) { + typeDets.put("parser", type.parser); + } + + details.put(type.type.toString(), typeDets); + } + + return JSON.toString(details); + } + + @GET + @Produces("text/plain") + public String getMimeTypesPlain() { + StringBuffer text = new StringBuffer(); + + for (MediaTypeDetails type : getMediaTypes()) { + text.append(type.type.toString()); + text.append("\n"); + + for (MediaType alias : type.aliases) { + text.append(" alias: ").append(alias).append("\n"); + } + if (type.supertype != null) { + text.append(" supertype: ").append(type.supertype.toString()).append("\n"); + } + + if (type.parser != null) { + text.append(" parser: ").append(type.parser).append("\n"); + } + } + + return text.toString(); + } + + protected List<MediaTypeDetails> getMediaTypes() { + MediaTypeRegistry registry = tika.getMediaTypeRegistry(); + Map<MediaType, Parser> parsers = ((CompositeParser) tika.getParser()).getParsers(); + List<MediaTypeDetails> types = + new ArrayList<TikaMimeTypes.MediaTypeDetails>(registry.getTypes().size()); + + for (MediaType type : registry.getTypes()) { + MediaTypeDetails details = new MediaTypeDetails(); + details.type = type; + details.aliases = registry.getAliases(type).toArray(new MediaType[0]); + + MediaType supertype = registry.getSupertype(type); + if (supertype != null && !MediaType.OCTET_STREAM.equals(supertype)) { + details.supertype = supertype; + } + + Parser p = parsers.get(type); + if (p != null) { + if (p instanceof CompositeParser) { + p = ((CompositeParser) p).getParsers().get(type); + } + details.parser = p.getClass().getName(); + } + + types.add(details); + } + + return types; + } + + private static class MediaTypeDetails { + private MediaType type; + private MediaType[] aliases; + private MediaType supertype; + private String parser; + } +}
Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaParsers.java Wed May 13 13:49:36 2015 @@ -1,235 +1,235 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.server.resource; - -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.tika.config.TikaConfig; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.CompositeParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.ParserDecorator; -import org.apache.tika.server.HTMLHelper; -import org.eclipse.jetty.util.ajax.JSON; - -/** - * <p>Provides details of all the {@link Parser}s registered with - * Apache Tika, similar to <em>--list-parsers</em> and - * <em>--list-parser-details</em> within the Tika CLI. - */ -@Path("/parsers") -public class TikaParsers { - private static final ParseContext EMPTY_PC = new ParseContext(); - private TikaConfig tika; - private HTMLHelper html; - - public TikaParsers(TikaConfig tika) { - this.tika = tika; - this.html = new HTMLHelper(); - } - - @GET - @Path("/details") - @Produces("text/html") - public String getParserDetailsHTML() { - return getParsersHTML(true); - } - - @GET - @Produces("text/html") - public String getParsersHTML() { - return getParsersHTML(false); - } - - protected String getParsersHTML(boolean withMimeTypes) { - ParserDetails p = new ParserDetails(tika.getParser()); - - StringBuffer h = new StringBuffer(); - html.generateHeader(h, "Parsers available to Apache Tika"); - parserAsHTML(p, withMimeTypes, h, 2); - html.generateFooter(h); - return h.toString(); - } - - private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) { - html.append("<h"); - html.append(level); - html.append(">"); - html.append(p.shortName); - html.append("</h"); - html.append(level); - html.append(">"); - html.append("<p>Class: "); - html.append(p.className); - html.append("</p>"); - if (p.isDecorated) { - html.append("<p>Decorated Parser</p>"); - } - if (p.isComposite) { - html.append("<p>Composite Parser</p>"); - for (Parser cp : p.childParsers) { - parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1); - } - } else if (withMimeTypes) { - html.append("<p>Mime Types:"); - html.append("<ul>"); - for (MediaType mt : p.supportedTypes) { - html.append("<li>"); - html.append(mt.toString()); - html.append("</li>"); - } - html.append("</ul>"); - html.append("</p>"); - } - } - - @GET - @Path("/details") - @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) - public String getParserDetailsJSON() { - return getParsersJSON(true); - } - - @GET - @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) - public String getParsersJSON() { - return getParsersJSON(false); - } - - protected String getParsersJSON(boolean withMimeTypes) { - Map<String, Object> details = new HashMap<String, Object>(); - parserAsMap(new ParserDetails(tika.getParser()), withMimeTypes, details); - return JSON.toString(details); - } - - private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) { - details.put("name", p.className); - details.put("composite", p.isComposite); - details.put("decorated", p.isDecorated); - - if (p.isComposite) { - List<Map<String, Object>> c = new ArrayList<Map<String, Object>>(); - for (Parser cp : p.childParsers) { - Map<String, Object> cdet = new HashMap<String, Object>(); - parserAsMap(new ParserDetails(cp), withMimeTypes, cdet); - c.add(cdet); - } - details.put("children", c); - } else if (withMimeTypes) { - List<String> mts = new ArrayList<String>(p.supportedTypes.size()); - for (MediaType mt : p.supportedTypes) { - mts.add(mt.toString()); - } - details.put("supportedTypes", mts); - } - } - - @GET - @Path("/details") - @Produces("text/plain") - public String getParserDetailssPlain() { - return getParsersPlain(true); - } - - @GET - @Produces("text/plain") - public String getParsersPlain() { - return getParsersPlain(false); - } - - protected String getParsersPlain(boolean withMimeTypes) { - StringBuffer text = new StringBuffer(); - renderParser(new ParserDetails(tika.getParser()), withMimeTypes, text, ""); - return text.toString(); - } - - private void renderParser(ParserDetails p, boolean withMimeTypes, StringBuffer text, String indent) { - String nextIndent = indent + " "; - - text.append(indent); - text.append(p.className); - if (p.isDecorated) { - text.append(" (Decorated Parser)"); - } - if (p.isComposite) { - text.append(" (Composite Parser):\n"); - - for (Parser cp : p.childParsers) { - renderParser(new ParserDetails(cp), withMimeTypes, text, nextIndent); - } - } else { - text.append("\n"); - if (withMimeTypes) { - for (MediaType mt : p.supportedTypes) { - text.append(nextIndent); - text.append("Supports: "); - text.append(mt.toString()); - text.append("\n"); - } - } - } - } - - private static class ParserDetails { - private String className; - private String shortName; - private boolean isComposite; - private boolean isDecorated; - private Set<MediaType> supportedTypes; - private List<Parser> childParsers; - - private ParserDetails(Parser p) { - if (p instanceof ParserDecorator) { - isDecorated = true; - p = ((ParserDecorator) p).getWrappedParser(); - } - - className = p.getClass().getName(); - shortName = className.substring(className.lastIndexOf('.') + 1); - - if (p instanceof CompositeParser) { - isComposite = true; - supportedTypes = Collections.emptySet(); - - // Get the unique set of child parsers - Set<Parser> children = new HashSet<Parser>( - ((CompositeParser) p).getParsers(EMPTY_PC).values()); - // Sort it by class name - childParsers = new ArrayList<Parser>(children); - Collections.sort(childParsers, new Comparator<Parser>() { @Override - public int compare(Parser p1, Parser p2) { - return p1.getClass().getName().compareTo(p2.getClass().getName()); - } - }); - } else { - supportedTypes = p.getSupportedTypes(EMPTY_PC); - } - } - } +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.server.resource; + +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.tika.config.TikaConfig; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.CompositeParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.ParserDecorator; +import org.apache.tika.server.HTMLHelper; +import org.eclipse.jetty.util.ajax.JSON; + +/** + * <p>Provides details of all the {@link Parser}s registered with + * Apache Tika, similar to <em>--list-parsers</em> and + * <em>--list-parser-details</em> within the Tika CLI. + */ +@Path("/parsers") +public class TikaParsers { + private static final ParseContext EMPTY_PC = new ParseContext(); + private TikaConfig tika; + private HTMLHelper html; + + public TikaParsers(TikaConfig tika) { + this.tika = tika; + this.html = new HTMLHelper(); + } + + @GET + @Path("/details") + @Produces("text/html") + public String getParserDetailsHTML() { + return getParsersHTML(true); + } + + @GET + @Produces("text/html") + public String getParsersHTML() { + return getParsersHTML(false); + } + + protected String getParsersHTML(boolean withMimeTypes) { + ParserDetails p = new ParserDetails(tika.getParser()); + + StringBuffer h = new StringBuffer(); + html.generateHeader(h, "Parsers available to Apache Tika"); + parserAsHTML(p, withMimeTypes, h, 2); + html.generateFooter(h); + return h.toString(); + } + + private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) { + html.append("<h"); + html.append(level); + html.append(">"); + html.append(p.shortName); + html.append("</h"); + html.append(level); + html.append(">"); + html.append("<p>Class: "); + html.append(p.className); + html.append("</p>"); + if (p.isDecorated) { + html.append("<p>Decorated Parser</p>"); + } + if (p.isComposite) { + html.append("<p>Composite Parser</p>"); + for (Parser cp : p.childParsers) { + parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1); + } + } else if (withMimeTypes) { + html.append("<p>Mime Types:"); + html.append("<ul>"); + for (MediaType mt : p.supportedTypes) { + html.append("<li>"); + html.append(mt.toString()); + html.append("</li>"); + } + html.append("</ul>"); + html.append("</p>"); + } + } + + @GET + @Path("/details") + @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) + public String getParserDetailsJSON() { + return getParsersJSON(true); + } + + @GET + @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) + public String getParsersJSON() { + return getParsersJSON(false); + } + + protected String getParsersJSON(boolean withMimeTypes) { + Map<String, Object> details = new HashMap<String, Object>(); + parserAsMap(new ParserDetails(tika.getParser()), withMimeTypes, details); + return JSON.toString(details); + } + + private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map<String, Object> details) { + details.put("name", p.className); + details.put("composite", p.isComposite); + details.put("decorated", p.isDecorated); + + if (p.isComposite) { + List<Map<String, Object>> c = new ArrayList<Map<String, Object>>(); + for (Parser cp : p.childParsers) { + Map<String, Object> cdet = new HashMap<String, Object>(); + parserAsMap(new ParserDetails(cp), withMimeTypes, cdet); + c.add(cdet); + } + details.put("children", c); + } else if (withMimeTypes) { + List<String> mts = new ArrayList<String>(p.supportedTypes.size()); + for (MediaType mt : p.supportedTypes) { + mts.add(mt.toString()); + } + details.put("supportedTypes", mts); + } + } + + @GET + @Path("/details") + @Produces("text/plain") + public String getParserDetailssPlain() { + return getParsersPlain(true); + } + + @GET + @Produces("text/plain") + public String getParsersPlain() { + return getParsersPlain(false); + } + + protected String getParsersPlain(boolean withMimeTypes) { + StringBuffer text = new StringBuffer(); + renderParser(new ParserDetails(tika.getParser()), withMimeTypes, text, ""); + return text.toString(); + } + + private void renderParser(ParserDetails p, boolean withMimeTypes, StringBuffer text, String indent) { + String nextIndent = indent + " "; + + text.append(indent); + text.append(p.className); + if (p.isDecorated) { + text.append(" (Decorated Parser)"); + } + if (p.isComposite) { + text.append(" (Composite Parser):\n"); + + for (Parser cp : p.childParsers) { + renderParser(new ParserDetails(cp), withMimeTypes, text, nextIndent); + } + } else { + text.append("\n"); + if (withMimeTypes) { + for (MediaType mt : p.supportedTypes) { + text.append(nextIndent); + text.append("Supports: "); + text.append(mt.toString()); + text.append("\n"); + } + } + } + } + + private static class ParserDetails { + private String className; + private String shortName; + private boolean isComposite; + private boolean isDecorated; + private Set<MediaType> supportedTypes; + private List<Parser> childParsers; + + private ParserDetails(Parser p) { + if (p instanceof ParserDecorator) { + isDecorated = true; + p = ((ParserDecorator) p).getWrappedParser(); + } + + className = p.getClass().getName(); + shortName = className.substring(className.lastIndexOf('.') + 1); + + if (p instanceof CompositeParser) { + isComposite = true; + supportedTypes = Collections.emptySet(); + + // Get the unique set of child parsers + Set<Parser> children = new HashSet<Parser>( + ((CompositeParser) p).getParsers(EMPTY_PC).values()); + // Sort it by class name + childParsers = new ArrayList<Parser>(children); + Collections.sort(childParsers, new Comparator<Parser>() { @Override + public int compare(Parser p1, Parser p2) { + return p1.getClass().getName().compareTo(p2.getClass().getName()); + } + }); + } else { + supportedTypes = p.getSupportedTypes(EMPTY_PC); + } + } + } } \ No newline at end of file Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TarWriter.java Wed May 13 13:49:36 2015 @@ -1,68 +1,68 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server.writer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; - -import java.io.IOException; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; -import java.util.Map; - -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; - -@Provider -@Produces("application/x-tar") -public class TarWriter implements MessageBodyWriter<Map<String, byte[]>> { - private static void tarStoreBuffer(TarArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { - TarArchiveEntry entry = new TarArchiveEntry(name); - - entry.setSize(dataBuffer.length); - - zip.putArchiveEntry(entry); - - zip.write(dataBuffer); - - zip.closeArchiveEntry(); - } - - public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return Map.class.isAssignableFrom(type); - } - - public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - TarArchiveOutputStream zip = new TarArchiveOutputStream(entityStream); - - for (Map.Entry<String, byte[]> entry : parts.entrySet()) { - tarStoreBuffer(zip, entry.getKey(), entry.getValue()); - } - - zip.close(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server.writer; + +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyWriter; +import javax.ws.rs.ext.Provider; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; +import java.util.Map; + +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; + +@Provider +@Produces("application/x-tar") +public class TarWriter implements MessageBodyWriter<Map<String, byte[]>> { + private static void tarStoreBuffer(TarArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { + TarArchiveEntry entry = new TarArchiveEntry(name); + + entry.setSize(dataBuffer.length); + + zip.putArchiveEntry(entry); + + zip.write(dataBuffer); + + zip.closeArchiveEntry(); + } + + public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return Map.class.isAssignableFrom(type); + } + + public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return -1; + } + + public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { + TarArchiveOutputStream zip = new TarArchiveOutputStream(entityStream); + + for (Map.Entry<String, byte[]> entry : parts.entrySet()) { + tarStoreBuffer(zip, entry.getKey(), entry.getValue()); + } + + zip.close(); + } +} Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/TextMessageBodyWriter.java Wed May 13 13:49:36 2015 @@ -1,75 +1,75 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server.writer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; - -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; - -import org.apache.tika.io.IOUtils; -import org.apache.tika.metadata.Metadata; - -/** - * Returns simple text string for a particular metadata value. - * This assumes that the metadata object only has one key; - * if there is more than one key or no keys, this will throw a webapp exception. - * <p/> - * This will choose the first value returned for the one key. - */ -@Provider -@Produces(MediaType.TEXT_PLAIN) -public class TextMessageBodyWriter implements MessageBodyWriter<Metadata> { - - public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return mediaType.equals(MediaType.TEXT_PLAIN_TYPE) && Metadata.class.isAssignableFrom(type); - } - - public long getSize(Metadata data, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - @Override - @SuppressWarnings("resource") - public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, - MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, - WebApplicationException { - - if (metadata.names().length != 1) { - throw new WebApplicationException("Metadata object must only have one entry!"); - } - Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8); - - for (String name : metadata.names()) { - writer.write(metadata.get(name)); - } - - // Don't close, just flush the stream - writer.flush(); - } -} - +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server.writer; + +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyWriter; +import javax.ws.rs.ext.Provider; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; + +import org.apache.tika.io.IOUtils; +import org.apache.tika.metadata.Metadata; + +/** + * Returns simple text string for a particular metadata value. + * This assumes that the metadata object only has one key; + * if there is more than one key or no keys, this will throw a webapp exception. + * <p/> + * This will choose the first value returned for the one key. + */ +@Provider +@Produces(MediaType.TEXT_PLAIN) +public class TextMessageBodyWriter implements MessageBodyWriter<Metadata> { + + public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return mediaType.equals(MediaType.TEXT_PLAIN_TYPE) && Metadata.class.isAssignableFrom(type); + } + + public long getSize(Metadata data, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return -1; + } + + @Override + @SuppressWarnings("resource") + public void writeTo(Metadata metadata, Class<?> type, Type genericType, Annotation[] annotations, + MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, + WebApplicationException { + + if (metadata.names().length != 1) { + throw new WebApplicationException("Metadata object must only have one entry!"); + } + Writer writer = new OutputStreamWriter(entityStream, IOUtils.UTF_8); + + for (String name : metadata.names()) { + writer.write(metadata.get(name)); + } + + // Don't close, just flush the stream + writer.flush(); + } +} + Modified: tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java (original) +++ tika/trunk/tika-server/src/main/java/org/apache/tika/server/writer/ZipWriter.java Wed May 13 13:49:36 2015 @@ -1,86 +1,86 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server.writer; - -import javax.ws.rs.Produces; -import javax.ws.rs.WebApplicationException; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.ext.MessageBodyWriter; -import javax.ws.rs.ext.Provider; - -import java.io.IOException; -import java.io.OutputStream; -import java.lang.annotation.Annotation; -import java.lang.reflect.Type; -import java.util.Map; -import java.util.UUID; -import java.util.zip.CRC32; -import java.util.zip.ZipEntry; -import java.util.zip.ZipException; -import java.util.zip.ZipOutputStream; - -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; - -@Provider -@Produces("application/zip") -public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> { - private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { - ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString()); - zipEntry.setMethod(ZipOutputStream.STORED); - - zipEntry.setSize(dataBuffer.length); - CRC32 crc32 = new CRC32(); - crc32.update(dataBuffer); - zipEntry.setCrc(crc32.getValue()); - - try { - zip.putArchiveEntry(new ZipArchiveEntry(zipEntry)); - } catch (ZipException ex) { - if (name != null) { - zipStoreBuffer(zip, "x-" + name, dataBuffer); - return; - } - } - - zip.write(dataBuffer); - - zip.closeArchiveEntry(); - } - - public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return Map.class.isAssignableFrom(type); - } - - public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { - return -1; - } - - public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { - ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream); - - zip.setMethod(ZipArchiveOutputStream.STORED); - - for (Map.Entry<String, byte[]> entry : parts.entrySet()) { - zipStoreBuffer(zip, entry.getKey(), entry.getValue()); - } - - zip.close(); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server.writer; + +import javax.ws.rs.Produces; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.ext.MessageBodyWriter; +import javax.ws.rs.ext.Provider; + +import java.io.IOException; +import java.io.OutputStream; +import java.lang.annotation.Annotation; +import java.lang.reflect.Type; +import java.util.Map; +import java.util.UUID; +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; +import java.util.zip.ZipException; +import java.util.zip.ZipOutputStream; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; + +@Provider +@Produces("application/zip") +public class ZipWriter implements MessageBodyWriter<Map<String, byte[]>> { + private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer) throws IOException { + ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString()); + zipEntry.setMethod(ZipOutputStream.STORED); + + zipEntry.setSize(dataBuffer.length); + CRC32 crc32 = new CRC32(); + crc32.update(dataBuffer); + zipEntry.setCrc(crc32.getValue()); + + try { + zip.putArchiveEntry(new ZipArchiveEntry(zipEntry)); + } catch (ZipException ex) { + if (name != null) { + zipStoreBuffer(zip, "x-" + name, dataBuffer); + return; + } + } + + zip.write(dataBuffer); + + zip.closeArchiveEntry(); + } + + public boolean isWriteable(Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return Map.class.isAssignableFrom(type); + } + + public long getSize(Map<String, byte[]> stringMap, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType) { + return -1; + } + + public void writeTo(Map<String, byte[]> parts, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException { + ZipArchiveOutputStream zip = new ZipArchiveOutputStream(entityStream); + + zip.setMethod(ZipArchiveOutputStream.STORED); + + for (Map.Entry<String, byte[]> entry : parts.entrySet()) { + zipStoreBuffer(zip, entry.getKey(), entry.getValue()); + } + + zip.close(); + } +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/CXFTestBase.java Wed May 13 13:49:36 2015 @@ -1,172 +1,172 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Map; - -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.commons.compress.archivers.ArchiveEntry; -import org.apache.commons.compress.archivers.ArchiveInputStream; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipFile; -import org.apache.cxf.binding.BindingFactoryManager; -import org.apache.cxf.endpoint.Server; -import org.apache.cxf.jaxrs.JAXRSBindingFactory; -import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; -import org.apache.tika.config.TikaConfig; -import org.apache.tika.io.IOUtils; -import org.apache.tika.server.resource.UnpackerResource; -import org.junit.After; -import org.junit.Before; - -public abstract class CXFTestBase { - protected static final String endPoint = - "http://localhost:" + TikaServerCli.DEFAULT_PORT; - protected Server server; - protected TikaConfig tika; - - public static void assertContains(String needle, String haystack) { - assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle)); - } - - public static void assertNotFound(String needle, String haystack) { - assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle)); - } - - protected static InputStream copy(InputStream in, int remaining) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - while (remaining > 0) { - byte[] bytes = new byte[remaining]; - int n = in.read(bytes); - if (n <= 0) { - break; - } - out.write(bytes, 0, n); - remaining -= n; - } - return new ByteArrayInputStream(out.toByteArray()); - } - - @Before - public void setUp() { - this.tika = TikaConfig.getDefaultConfig(); - - JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); - setUpResources(sf); - setUpProviders(sf); - sf.setAddress(endPoint + "/"); - - BindingFactoryManager manager = sf.getBus().getExtension( - BindingFactoryManager.class - ); - - JAXRSBindingFactory factory = new JAXRSBindingFactory(); - factory.setBus(sf.getBus()); - - manager.registerBindingFactory( - JAXRSBindingFactory.JAXRS_BINDING_ID, - factory - ); - - server = sf.create(); - } - - /** - * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)} - * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)} - */ - protected abstract void setUpResources(JAXRSServerFactoryBean sf); - - /** - * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed - */ - protected abstract void setUpProviders(JAXRSServerFactoryBean sf); - - @After - public void tearDown() throws Exception { - server.stop(); - server.destroy(); - } - - protected String getStringFromInputStream(InputStream in) throws Exception { - return IOUtils.toString(in); - } - - protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException { - Map<String, String> data = new HashMap<String, String>(); - File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); - ZipFile zip = new ZipFile(tempFile); - Enumeration<ZipArchiveEntry> entries = zip.getEntries(); - while (entries.hasMoreElements()) { - ZipArchiveEntry entry = entries.nextElement(); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip.getInputStream(entry), bos); - data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); - } - - zip.close(); - tempFile.delete(); - return data; - } - - protected String readArchiveText(InputStream inputStream) throws IOException { - File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); - ZipFile zip = new ZipFile(tempFile); - zip.getEntry(UnpackerResource.TEXT_FILENAME); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos); - - zip.close(); - tempFile.delete(); - return bos.toString(IOUtils.UTF_8.name()); - } - - protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException { - Map<String, String> data = new HashMap<String, String>(); - while (true) { - ArchiveEntry entry = zip.getNextEntry(); - if (entry == null) { - break; - } - - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - IOUtils.copy(zip, bos); - data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); - } - - return data; - } - - private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException { - File tempFile = File.createTempFile("tmp-", "." + archiveType); - IOUtils.copy(inputStream, new FileOutputStream(tempFile)); - return tempFile; - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.ArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; +import org.apache.cxf.binding.BindingFactoryManager; +import org.apache.cxf.endpoint.Server; +import org.apache.cxf.jaxrs.JAXRSBindingFactory; +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.tika.config.TikaConfig; +import org.apache.tika.io.IOUtils; +import org.apache.tika.server.resource.UnpackerResource; +import org.junit.After; +import org.junit.Before; + +public abstract class CXFTestBase { + protected static final String endPoint = + "http://localhost:" + TikaServerCli.DEFAULT_PORT; + protected Server server; + protected TikaConfig tika; + + public static void assertContains(String needle, String haystack) { + assertTrue(needle + " not found in:\n" + haystack, haystack.contains(needle)); + } + + public static void assertNotFound(String needle, String haystack) { + assertFalse(needle + " unexpectedly found in:\n" + haystack, haystack.contains(needle)); + } + + protected static InputStream copy(InputStream in, int remaining) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + while (remaining > 0) { + byte[] bytes = new byte[remaining]; + int n = in.read(bytes); + if (n <= 0) { + break; + } + out.write(bytes, 0, n); + remaining -= n; + } + return new ByteArrayInputStream(out.toByteArray()); + } + + @Before + public void setUp() { + this.tika = TikaConfig.getDefaultConfig(); + + JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); + setUpResources(sf); + setUpProviders(sf); + sf.setAddress(endPoint + "/"); + + BindingFactoryManager manager = sf.getBus().getExtension( + BindingFactoryManager.class + ); + + JAXRSBindingFactory factory = new JAXRSBindingFactory(); + factory.setBus(sf.getBus()); + + manager.registerBindingFactory( + JAXRSBindingFactory.JAXRS_BINDING_ID, + factory + ); + + server = sf.create(); + } + + /** + * Have the test do {@link JAXRSServerFactoryBean#setResourceClasses(Class...)} + * and {@link JAXRSServerFactoryBean#setResourceProvider(Class, org.apache.cxf.jaxrs.lifecycle.ResourceProvider)} + */ + protected abstract void setUpResources(JAXRSServerFactoryBean sf); + + /** + * Have the test do {@link JAXRSServerFactoryBean#setProviders(java.util.List)}, if needed + */ + protected abstract void setUpProviders(JAXRSServerFactoryBean sf); + + @After + public void tearDown() throws Exception { + server.stop(); + server.destroy(); + } + + protected String getStringFromInputStream(InputStream in) throws Exception { + return IOUtils.toString(in); + } + + protected Map<String, String> readZipArchive(InputStream inputStream) throws IOException { + Map<String, String> data = new HashMap<String, String>(); + File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); + ZipFile zip = new ZipFile(tempFile); + Enumeration<ZipArchiveEntry> entries = zip.getEntries(); + while (entries.hasMoreElements()) { + ZipArchiveEntry entry = entries.nextElement(); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip.getInputStream(entry), bos); + data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); + } + + zip.close(); + tempFile.delete(); + return data; + } + + protected String readArchiveText(InputStream inputStream) throws IOException { + File tempFile = writeTemporaryArchiveFile(inputStream, "zip"); + ZipFile zip = new ZipFile(tempFile); + zip.getEntry(UnpackerResource.TEXT_FILENAME); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip.getInputStream(zip.getEntry(UnpackerResource.TEXT_FILENAME)), bos); + + zip.close(); + tempFile.delete(); + return bos.toString(IOUtils.UTF_8.name()); + } + + protected Map<String, String> readArchiveFromStream(ArchiveInputStream zip) throws IOException { + Map<String, String> data = new HashMap<String, String>(); + while (true) { + ArchiveEntry entry = zip.getNextEntry(); + if (entry == null) { + break; + } + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + IOUtils.copy(zip, bos); + data.put(entry.getName(), DigestUtils.md5Hex(bos.toByteArray())); + } + + return data; + } + + private File writeTemporaryArchiveFile(InputStream inputStream, String archiveType) throws IOException { + File tempFile = File.createTempFile("tmp-", "." + archiveType); + IOUtils.copy(inputStream, new FileOutputStream(tempFile)); + return tempFile; + } + +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/DetectorResourceTest.java Wed May 13 13:49:36 2015 @@ -1,107 +1,107 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -import javax.ws.rs.core.Response; - -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; -import org.apache.cxf.jaxrs.client.WebClient; -import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; -import org.apache.tika.server.resource.DetectorResource; -import org.apache.tika.server.writer.TarWriter; -import org.apache.tika.server.writer.ZipWriter; -import org.junit.Test; - -public class DetectorResourceTest extends CXFTestBase { - - private static final String DETECT_PATH = "/detect"; - private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream"; - private static final String FOO_CSV = "foo.csv"; - private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02"; - - @Override - protected void setUpResources(JAXRSServerFactoryBean sf) { - sf.setResourceClasses(DetectorResource.class); - sf.setResourceProvider(DetectorResource.class, - new SingletonResourceProvider(new DetectorResource(tika))); - - } - - @Override - protected void setUpProviders(JAXRSServerFactoryBean sf) { - List<Object> providers = new ArrayList<Object>(); - providers.add(new TarWriter()); - providers.add(new ZipWriter()); - providers.add(new TikaServerParseExceptionMapper(false)); - sf.setProviders(providers); - - } - - @Test - public void testDetectCsvWithExt() throws Exception { - String url = endPoint + DETECT_STREAM_PATH; - Response response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + FOO_CSV) - .put(ClassLoader.getSystemResourceAsStream(FOO_CSV)); - assertNotNull(response); - String readMime = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("text/csv", readMime); - - } - - @Test - public void testDetectCsvNoExt() throws Exception { - String url = endPoint + DETECT_STREAM_PATH; - Response response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + CDEC_CSV_NO_EXT) - .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); - assertNotNull(response); - String readMime = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("text/plain", readMime); - - // now trick it by adding .csv to the end - response = WebClient - .create(endPoint + DETECT_STREAM_PATH) - .type("text/csv") - .accept("*/*") - .header("Content-Disposition", - "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv") - .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); - assertNotNull(response); - readMime = getStringFromInputStream((InputStream) response.getEntity()); - assertEquals("text/csv", readMime); - - } -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import javax.ws.rs.core.Response; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; +import org.apache.tika.server.resource.DetectorResource; +import org.apache.tika.server.writer.TarWriter; +import org.apache.tika.server.writer.ZipWriter; +import org.junit.Test; + +public class DetectorResourceTest extends CXFTestBase { + + private static final String DETECT_PATH = "/detect"; + private static final String DETECT_STREAM_PATH = DETECT_PATH + "/stream"; + private static final String FOO_CSV = "foo.csv"; + private static final String CDEC_CSV_NO_EXT = "CDEC_WEATHER_2010_03_02"; + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + sf.setResourceClasses(DetectorResource.class); + sf.setResourceProvider(DetectorResource.class, + new SingletonResourceProvider(new DetectorResource(tika))); + + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + + } + + @Test + public void testDetectCsvWithExt() throws Exception { + String url = endPoint + DETECT_STREAM_PATH; + Response response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + FOO_CSV) + .put(ClassLoader.getSystemResourceAsStream(FOO_CSV)); + assertNotNull(response); + String readMime = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("text/csv", readMime); + + } + + @Test + public void testDetectCsvNoExt() throws Exception { + String url = endPoint + DETECT_STREAM_PATH; + Response response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + CDEC_CSV_NO_EXT) + .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); + assertNotNull(response); + String readMime = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("text/plain", readMime); + + // now trick it by adding .csv to the end + response = WebClient + .create(endPoint + DETECT_STREAM_PATH) + .type("text/csv") + .accept("*/*") + .header("Content-Disposition", + "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv") + .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); + assertNotNull(response); + readMime = getStringFromInputStream((InputStream) response.getEntity()); + assertEquals("text/csv", readMime); + + } +} Modified: tika/trunk/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff ============================================================================== --- tika/trunk/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java (original) +++ tika/trunk/tika-server/src/test/java/org/apache/tika/server/LanguageResourceTest.java Wed May 13 13:49:36 2015 @@ -1,109 +1,109 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.tika.server; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; - -import javax.ws.rs.core.Response; - -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; -import org.apache.cxf.jaxrs.client.WebClient; -import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; -import org.apache.tika.server.resource.LanguageResource; -import org.apache.tika.server.writer.TarWriter; -import org.apache.tika.server.writer.ZipWriter; -import org.junit.Test; - -public class LanguageResourceTest extends CXFTestBase { - - private static final String LANG_PATH = "/language"; - private static final String LANG_STREAM_PATH = LANG_PATH + "/stream"; - private static final String LANG_STRING_PATH = LANG_PATH + "/string"; - private static final String ENGLISH_STRING = "This is English!"; - private static final String FRENCH_STRING = "comme çi comme ça"; - - @Override - protected void setUpResources(JAXRSServerFactoryBean sf) { - sf.setResourceClasses(LanguageResource.class); - sf.setResourceProvider(LanguageResource.class, - new SingletonResourceProvider(new LanguageResource(tika))); - - } - - @Override - protected void setUpProviders(JAXRSServerFactoryBean sf) { - List<Object> providers = new ArrayList<Object>(); - providers.add(new TarWriter()); - providers.add(new ZipWriter()); - providers.add(new TikaServerParseExceptionMapper(false)); - sf.setProviders(providers); - - } - - @Test - public void testDetectEnglishString() throws Exception { - String url = endPoint + LANG_STRING_PATH; - Response response = WebClient.create(url).type("text/plain") - .accept("text/plain").put(ENGLISH_STRING); - assertNotNull(response); - String readLang = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("en", readLang); - } - - @Test - public void testDetectFrenchString() throws Exception { - String url = endPoint + LANG_STRING_PATH; - Response response = WebClient.create(url).type("text/plain") - .accept("text/plain").put(FRENCH_STRING); - assertNotNull(response); - String readLang = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("fr", readLang); - } - - @Test - public void testDetectEnglishFile() throws Exception { - String url = endPoint + LANG_STREAM_PATH; - Response response = WebClient.create(url).type("text/plain") - .accept("text/plain") - .put(ClassLoader.getSystemResourceAsStream("english.txt")); - assertNotNull(response); - String readLang = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("en", readLang); - } - - @Test - public void testDetectFrenchFile() throws Exception { - String url = endPoint + LANG_STREAM_PATH; - Response response = WebClient.create(url).type("text/plain") - .accept("text/plain") - .put(ClassLoader.getSystemResourceAsStream("french.txt")); - assertNotNull(response); - String readLang = getStringFromInputStream((InputStream) response - .getEntity()); - assertEquals("fr", readLang); - } - -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.server; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import javax.ws.rs.core.Response; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider; +import org.apache.tika.server.resource.LanguageResource; +import org.apache.tika.server.writer.TarWriter; +import org.apache.tika.server.writer.ZipWriter; +import org.junit.Test; + +public class LanguageResourceTest extends CXFTestBase { + + private static final String LANG_PATH = "/language"; + private static final String LANG_STREAM_PATH = LANG_PATH + "/stream"; + private static final String LANG_STRING_PATH = LANG_PATH + "/string"; + private static final String ENGLISH_STRING = "This is English!"; + private static final String FRENCH_STRING = "comme çi comme ça"; + + @Override + protected void setUpResources(JAXRSServerFactoryBean sf) { + sf.setResourceClasses(LanguageResource.class); + sf.setResourceProvider(LanguageResource.class, + new SingletonResourceProvider(new LanguageResource(tika))); + + } + + @Override + protected void setUpProviders(JAXRSServerFactoryBean sf) { + List<Object> providers = new ArrayList<Object>(); + providers.add(new TarWriter()); + providers.add(new ZipWriter()); + providers.add(new TikaServerParseExceptionMapper(false)); + sf.setProviders(providers); + + } + + @Test + public void testDetectEnglishString() throws Exception { + String url = endPoint + LANG_STRING_PATH; + Response response = WebClient.create(url).type("text/plain") + .accept("text/plain").put(ENGLISH_STRING); + assertNotNull(response); + String readLang = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("en", readLang); + } + + @Test + public void testDetectFrenchString() throws Exception { + String url = endPoint + LANG_STRING_PATH; + Response response = WebClient.create(url).type("text/plain") + .accept("text/plain").put(FRENCH_STRING); + assertNotNull(response); + String readLang = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("fr", readLang); + } + + @Test + public void testDetectEnglishFile() throws Exception { + String url = endPoint + LANG_STREAM_PATH; + Response response = WebClient.create(url).type("text/plain") + .accept("text/plain") + .put(ClassLoader.getSystemResourceAsStream("english.txt")); + assertNotNull(response); + String readLang = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("en", readLang); + } + + @Test + public void testDetectFrenchFile() throws Exception { + String url = endPoint + LANG_STREAM_PATH; + Response response = WebClient.create(url).type("text/plain") + .accept("text/plain") + .put(ClassLoader.getSystemResourceAsStream("french.txt")); + assertNotNull(response); + String readLang = getStringFromInputStream((InputStream) response + .getEntity()); + assertEquals("fr", readLang); + } + +}
