Repository: any23 Updated Branches: refs/heads/master 2b91bbf42 -> 7c6868860
ANY23-314 Service fails to return extraction in case of extraction error Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d0e627a9 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d0e627a9 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d0e627a9 Branch: refs/heads/master Commit: d0e627a957c6ba5ec59ff40ba5a73cf7e52dd1d4 Parents: bcd87e5 Author: Lewis John McGibbney <[email protected]> Authored: Tue Dec 12 13:51:48 2017 -0800 Committer: Lewis John McGibbney <[email protected]> Committed: Tue Dec 12 13:51:48 2017 -0800 ---------------------------------------------------------------------- .../any23/writer/WriterFactoryRegistry.java | 66 ++++++++++---------- .../apache/any23/servlet/RedirectServlet.java | 2 + .../java/org/apache/any23/servlet/Servlet.java | 44 ++++++++----- .../org/apache/any23/servlet/WebResponder.java | 41 ++++++++---- .../servlet/conneg/ContentTypeNegotiator.java | 13 ++-- .../any23/servlet/conneg/MediaRangeSpec.java | 29 +++++---- service/src/main/resources/form.html | 2 +- 7 files changed, 115 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java ---------------------------------------------------------------------- diff --git a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java index 454d866..cbe5f9a 100644 --- a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java +++ b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java @@ -49,22 +49,46 @@ public class WriterFactoryRegistry { * List of registered writers. */ private final List<WriterFactory> writers = - new ArrayList<WriterFactory>(); + new ArrayList<>(); /** * MIME Type to {@link FormatWriter} class. */ private final Map<String,List<WriterFactory>> mimeToWriter = - new HashMap<String, List<WriterFactory>>(); + new HashMap<>(); /** * Identifier to {@link FormatWriter} class. */ private final Map<String,WriterFactory> idToWriter = - new HashMap<String, WriterFactory>(); + new HashMap<>(); - private List<String> identifiers = new ArrayList<String>(); + private List<String> identifiers = new ArrayList<>(); + public WriterFactoryRegistry() { + ServiceLoader<WriterFactory> serviceLoader = java.util.ServiceLoader.load(WriterFactory.class, this.getClass().getClassLoader()); + + Iterator<WriterFactory> iterator = serviceLoader.iterator(); + + // use while(true) loop so that we can isolate all service loader errors from .next and .hasNext to a single service + while(true) + { + try + { + if(!iterator.hasNext()) + break; + + WriterFactory factory = iterator.next(); + + this.register(factory); + } + catch(ServiceConfigurationError error) + { + LOG.error("Found error loading a WriterFactory", error); + } + } + } + /** * Reads the identifier specified for the given {@link FormatWriter}. * @@ -88,37 +112,13 @@ public class WriterFactoryRegistry { /** * @return the {@link WriterFactoryRegistry} singleton instance. */ - public synchronized static WriterFactoryRegistry getInstance() { + public static synchronized WriterFactoryRegistry getInstance() { if(instance == null) { instance = new WriterFactoryRegistry(); } return instance; } - public WriterFactoryRegistry() { - ServiceLoader<WriterFactory> serviceLoader = java.util.ServiceLoader.load(WriterFactory.class, this.getClass().getClassLoader()); - - Iterator<WriterFactory> iterator = serviceLoader.iterator(); - - // use while(true) loop so that we can isolate all service loader errors from .next and .hasNext to a single service - while(true) - { - try - { - if(!iterator.hasNext()) - break; - - WriterFactory factory = iterator.next(); - - this.register(factory); - } - catch(ServiceConfigurationError error) - { - LOG.error("Found error loading a WriterFactory", error); - } - } - } - /** * Registers a new {@link WriterFactory} to the registry. * @@ -127,7 +127,8 @@ public class WriterFactoryRegistry { * or empty strings or if the identifier has been already defined. */ public synchronized void register(WriterFactory writerClass) { - if(writerClass == null) throw new NullPointerException("writerClass cannot be null."); + if(writerClass == null) + throw new NullPointerException("writerClass cannot be null."); final String id = writerClass.getIdentifier(); final String mimeType = writerClass.getMimeType(); if(id == null || id.trim().length() == 0) { @@ -143,7 +144,7 @@ public class WriterFactoryRegistry { identifiers.add(writerClass.getIdentifier()); List<WriterFactory> writerClasses = mimeToWriter.get(mimeType); if(writerClasses == null) { - writerClasses = new ArrayList<WriterFactory>(); + writerClasses = new ArrayList<>(); mimeToWriter.put(mimeType, writerClasses); } writerClasses.add(writerClass); @@ -199,8 +200,7 @@ public class WriterFactoryRegistry { * @return a list of matching writers or an empty list. */ public synchronized Collection<WriterFactory> getWritersByMimeType(String mimeType) { - final List<WriterFactory> writerClasses = mimeToWriter.get(mimeType); - return writerClasses; + return mimeToWriter.get(mimeType); } /** http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java index 454c782..ede383d 100644 --- a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java +++ b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java @@ -31,11 +31,13 @@ import java.io.IOException; */ public class RedirectServlet extends HttpServlet { + @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } + @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { // Show /resources/form.html for GET requests to the app's root http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/Servlet.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java index b60ad5f..b93662e 100644 --- a/service/src/main/java/org/apache/any23/servlet/Servlet.java +++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java @@ -56,6 +56,9 @@ public class Servlet extends HttpServlet { private static final long serialVersionUID = 8207685628715421336L; + private final static Pattern schemeAndSingleSlashRegex = + Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]"); + // RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) private final static Pattern schemeRegex = Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:"); @@ -157,7 +160,8 @@ public class Servlet extends HttpServlet { } private String getFormatFromRequest(HttpServletRequest request) { - if (request.getPathInfo() == null) return "best"; + if (request.getPathInfo() == null) + return "best"; String[] args = request.getPathInfo().split("/", 3); if (args.length < 2 || "".equals(args[1])) { if (request.getParameter("format") == null) { @@ -170,7 +174,8 @@ public class Servlet extends HttpServlet { } private String getInputIRIFromRequest(HttpServletRequest request) { - if (request.getPathInfo() == null) return null; + if (request.getPathInfo() == null) + return null; String[] args = request.getPathInfo().split("/", 3); if (args.length < 3) { if (request.getParameter("uri") != null) { @@ -202,20 +207,21 @@ public class Servlet extends HttpServlet { return schemeRegex.matcher(uri).find(); } - private final static Pattern schemeAndSingleSlashRegex = - Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]"); - private boolean hasOnlySingleSlashAfterScheme(String uri) { return schemeAndSingleSlashRegex.matcher(uri).find(); } private String getContentTypeHeader(HttpServletRequest req) { - if (req.getHeader("Content-Type") == null) return null; - if ("".equals(req.getHeader("Content-Type"))) return null; - String contentType = req.getHeader("Content-Type"); + String cType = "Content-Type"; + if (req.getHeader(cType) == null) + return null; + if ("".equals(req.getHeader(cType))) + return null; + String contentType = req.getHeader(cType); // strip off parameters such as ";charset=UTF-8" - int index = contentType.indexOf(";"); - if (index == -1) return contentType; + int index = contentType.indexOf(';'); + if (index == -1) + return contentType; return contentType.substring(0, index); } @@ -251,14 +257,18 @@ public class Servlet extends HttpServlet { } private ValidationMode getValidationMode(HttpServletRequest request) { - final String PARAMETER = "validation-mode"; - final String validationMode = request.getParameter(PARAMETER); - if (validationMode == null) return ValidationMode.None; - if ("none".equalsIgnoreCase(validationMode)) return ValidationMode.None; - if ("validate".equalsIgnoreCase(validationMode)) return ValidationMode.Validate; - if ("validate-fix".equalsIgnoreCase(validationMode)) return ValidationMode.ValidateAndFix; + final String parameter = "validation-mode"; + final String validationMode = request.getParameter(parameter); + if (validationMode == null) + return ValidationMode.None; + if ("none".equalsIgnoreCase(validationMode)) + return ValidationMode.None; + if ("validate".equalsIgnoreCase(validationMode)) + return ValidationMode.Validate; + if ("validate-fix".equalsIgnoreCase(validationMode)) + return ValidationMode.ValidateAndFix; throw new IllegalArgumentException( - String.format("Invalid value '%s' for '%s' parameter.", validationMode, PARAMETER) + String.format("Invalid value '%s' for '%s' parameter.", validationMode, parameter) ); } http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/WebResponder.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/WebResponder.java b/service/src/main/java/org/apache/any23/servlet/WebResponder.java index 658b0e7..3101e09 100644 --- a/service/src/main/java/org/apache/any23/servlet/WebResponder.java +++ b/service/src/main/java/org/apache/any23/servlet/WebResponder.java @@ -33,6 +33,7 @@ import org.apache.any23.writer.CountingTripleHandler; import org.apache.any23.writer.FormatWriter; import org.apache.any23.writer.ReportingTripleHandler; import org.apache.any23.writer.TripleHandler; +import org.apache.any23.writer.TripleHandlerException; import org.apache.any23.writer.WriterFactory; import org.apache.any23.writer.WriterFactoryRegistry; import sun.security.validator.ValidatorException; @@ -94,7 +95,7 @@ class WebResponder { this.any23servlet = any23servlet; this.response = response; this.runner = new Any23(); - runner.setHTTPUserAgent("Any23-Servlet"); + runner.setHTTPUserAgent("Apache Any23 Servlet http://any23.org/"); } protected Any23 getRunner() { @@ -107,9 +108,11 @@ class WebResponder { String format, boolean report, boolean annotate ) throws IOException { - if (in == null) return; - if (!initRdfWriter(format, report, annotate)) return; - final ExtractionReport er; + if (in == null) + return; + if (!initRdfWriter(format, report, annotate)) + return; + ExtractionReport er = null; try { er = runner.extract(eps, in, rdfWriter); rdfWriter.close(); @@ -135,9 +138,20 @@ class WebResponder { sendError(502, "Could not fetch input.", ioe, null, report); return; } catch (ExtractionException e) { - // Extraction error. - any23servlet.log("Could not parse input", e); - sendError(502, "Could not parse input.", e, null, report); + if (rdfWriter != null) { + try { + rdfWriter.close(); + } catch (TripleHandlerException the) { + throw new RuntimeException("Error while closing TripleHandler", the); + } + } + + // Extraction error. Although there is a critical error we still wish + // to return accurate, partial extraction results to the user + String extractionError = "Failed to fully parse input. The extraction result, at the bottom " + + "of this response, if any, will contain extractions only up until the extraction error."; + any23servlet.log(extractionError, e); + sendError(502, extractionError, e, er, report); return; } catch (Exception e) { any23servlet.log("Internal error", e); @@ -207,7 +221,8 @@ class WebResponder { for(Extractor<?> extractor : er.getMatchingExtractors()) { final String name = extractor.getDescription().getExtractorName(); final Collection<IssueReport.Issue> extractorIssues = er.getExtractorIssues(name); - if(extractorIssues.isEmpty()) continue; + if(extractorIssues.isEmpty()) + continue; ps.println( String.format("<extractorIssues extractor=\"%s\">", name)); for(IssueReport.Issue issue : er.getExtractorIssues(name)) { ps.println( @@ -232,7 +247,7 @@ class WebResponder { // Human readable error message. if(msg != null) { - ps.printf("<message>%s</message>\n", msg); + ps.printf("<message>%s</message>%n", msg); } else { ps.print("<message/>\n"); } @@ -278,7 +293,9 @@ class WebResponder { throws IOException { response.setStatus(code); response.setContentType("text/plain"); - final PrintStream ps = new PrintStream(response.getOutputStream()); + final ServletOutputStream sos = response.getOutputStream(); + final PrintStream ps = new PrintStream(sos); + final byte[] data = byteOutStream.toByteArray(); if (report) { try { printHeader(ps); @@ -292,6 +309,7 @@ class WebResponder { ps.println("================================================================"); e.printStackTrace(ps); ps.println("================================================================"); + printData(data, ps); } } } @@ -343,8 +361,7 @@ class WebResponder { } else { return null; } - final WriterFactory writer = writerRegistry.getWriterByIdentifier(finalFormat); - return writer; + return writerRegistry.getWriterByIdentifier(finalFormat); } } http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java b/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java index b74951f..c0e1de3 100644 --- a/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java +++ b/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java @@ -29,11 +29,11 @@ import java.util.regex.Pattern; */ public class ContentTypeNegotiator { - private List<VariantSpec> variantSpecs = new ArrayList<VariantSpec>(); + private List<VariantSpec> variantSpecs = new ArrayList<>(); private List<MediaRangeSpec> defaultAcceptRanges = Collections.singletonList(MediaRangeSpec.parseRange("*/*")); - private Collection<AcceptHeaderOverride> userAgentOverrides = new ArrayList<AcceptHeaderOverride>(); + private Collection<AcceptHeaderOverride> userAgentOverrides = new ArrayList<>(); protected ContentTypeNegotiator(){} @@ -123,7 +123,7 @@ public class ContentTypeNegotiator { protected class VariantSpec { private MediaRangeSpec type; - private List<MediaRangeSpec> aliases = new ArrayList<MediaRangeSpec>(); + private List<MediaRangeSpec> aliases = new ArrayList<>(); private boolean isDefault = false; public VariantSpec(String mediaType) { @@ -182,7 +182,8 @@ public class ContentTypeNegotiator { } private void evaluateVariantAlias(MediaRangeSpec variant, MediaRangeSpec isAliasFor) { - if (variant.getBestMatch(ranges) == null) return; + if (variant.getBestMatch(ranges) == null) + return; double q = variant.getBestMatch(ranges).getQuality(); if (q * variant.getQuality() > bestMatchingQuality) { bestMatchingVariant = isAliasFor; @@ -215,10 +216,6 @@ public class ContentTypeNegotiator { this.replacement = replacement; } - boolean matches(String acceptHeader) { - return matches(acceptHeader, null); - } - boolean matches(String acceptHeader, String userAgentHeader) { return (userAgentPattern == null || userAgentPattern.matcher(userAgentHeader).find()) http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java ---------------------------------------------------------------------- diff --git a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java index db7b543..b71e3ee 100644 --- a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java +++ b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java @@ -110,8 +110,8 @@ public class MediaRangeSpec { if ("*".equals(type) && !"*".equals(subtype)) { return null; } - List<String> parameterNames = new ArrayList<String>(); - List<String> parameterValues = new ArrayList<String>(); + List<String> parameterNames = new ArrayList<>(); + List<String> parameterValues = new ArrayList<>(); while (m.find()) { String name = m.group(1).toLowerCase(); String value = (m.group(3) == null) ? m.group(2) : unescape(m.group(3)); @@ -136,7 +136,7 @@ public class MediaRangeSpec { * @return A List of MediaRangeSpecs */ public static List<MediaRangeSpec> parseAccept(String s) { - List<MediaRangeSpec> result = new ArrayList<MediaRangeSpec>(); + List<MediaRangeSpec> result = new ArrayList<>(); Matcher m = mediaRangePattern.matcher(s); while (m.find()) { result.add(parseRange(m.group())); @@ -175,7 +175,7 @@ public class MediaRangeSpec { result.append(";"); result.append(parameterNames.get(i)); result.append("="); - String value = (String) parameterValues.get(i); + String value = parameterValues.get(i); if (tokenPattern.matcher(value).matches()) { result.append(value); } else { @@ -205,7 +205,7 @@ public class MediaRangeSpec { public String getParameter(String parameterName) { for (int i = 0; i < parameterNames.size(); i++) { - if (parameterNames.get(i).equals(parameterName.toLowerCase())) { + if (parameterNames.get(i).equalsIgnoreCase(parameterName)) { return parameterValues.get(i); } } @@ -225,16 +225,22 @@ public class MediaRangeSpec { } public int getPrecedence(MediaRangeSpec range) { - if (range.isWildcardType()) return 1; - if (!range.type.equals(type)) return 0; - if (range.isWildcardSubtype()) return 2; - if (!range.subtype.equals(subtype)) return 0; - if (range.getParameterNames().isEmpty()) return 3; + if (range.isWildcardType()) + return 1; + if (!range.type.equals(type)) + return 0; + if (range.isWildcardSubtype()) + return 2; + if (!range.subtype.equals(subtype)) + return 0; + if (range.getParameterNames().isEmpty()) + return 3; int result = 3; for (int i = 0; i < range.getParameterNames().size(); i++) { String name = range.getParameterNames().get(i); String value = range.getParameter(name); - if (!value.equals(getParameter(name))) return 0; + if (!value.equals(getParameter(name))) + return 0; result++; } return result; @@ -254,6 +260,7 @@ public class MediaRangeSpec { return result; } + @Override public String toString() { return mediaType + ";q=" + quality; } http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/resources/form.html ---------------------------------------------------------------------- diff --git a/service/src/main/resources/form.html b/service/src/main/resources/form.html index e11a019..374d017 100644 --- a/service/src/main/resources/form.html +++ b/service/src/main/resources/form.html @@ -394,7 +394,7 @@ Content-Length: 174 <footer class="footer"> <div class="container-fluid"> - Copyright © ${project.inceptionYear}-2014 The <a href="http://www.apache.org/">Apache Software Foundation</a>. All Rights Reserved.<br/> Apache Any23, Apache, the Apache feather logo, and the Apache Any23 project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners. + Copyright © ${project.inceptionYear}-2018 The <a href="http://www.apache.org/">Apache Software Foundation</a>. All Rights Reserved.<br/> Apache Any23, Apache, the Apache feather logo, and the Apache Any23 project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners. </div> </footer>
