Repository: any23
Updated Branches:
  refs/heads/master 2b91bbf42 -> 7c6868860


ANY23-314 Service fails to return extraction in case of extraction error


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d0e627a9
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d0e627a9
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d0e627a9

Branch: refs/heads/master
Commit: d0e627a957c6ba5ec59ff40ba5a73cf7e52dd1d4
Parents: bcd87e5
Author: Lewis John McGibbney <[email protected]>
Authored: Tue Dec 12 13:51:48 2017 -0800
Committer: Lewis John McGibbney <[email protected]>
Committed: Tue Dec 12 13:51:48 2017 -0800

----------------------------------------------------------------------
 .../any23/writer/WriterFactoryRegistry.java     | 66 ++++++++++----------
 .../apache/any23/servlet/RedirectServlet.java   |  2 +
 .../java/org/apache/any23/servlet/Servlet.java  | 44 ++++++++-----
 .../org/apache/any23/servlet/WebResponder.java  | 41 ++++++++----
 .../servlet/conneg/ContentTypeNegotiator.java   | 13 ++--
 .../any23/servlet/conneg/MediaRangeSpec.java    | 29 +++++----
 service/src/main/resources/form.html            |  2 +-
 7 files changed, 115 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
----------------------------------------------------------------------
diff --git 
a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java 
b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
index 454d866..cbe5f9a 100644
--- a/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
+++ b/api/src/main/java/org/apache/any23/writer/WriterFactoryRegistry.java
@@ -49,22 +49,46 @@ public class WriterFactoryRegistry {
      * List of registered writers.
      */
     private final List<WriterFactory> writers =
-            new ArrayList<WriterFactory>();
+            new ArrayList<>();
 
     /**
      * MIME Type to {@link FormatWriter} class.
      */
     private final Map<String,List<WriterFactory>> mimeToWriter =
-            new HashMap<String, List<WriterFactory>>();
+            new HashMap<>();
 
     /**
      * Identifier to {@link FormatWriter} class.
      */
     private final Map<String,WriterFactory> idToWriter =
-            new HashMap<String, WriterFactory>();
+            new HashMap<>();
 
-    private List<String> identifiers = new ArrayList<String>();
+    private List<String> identifiers = new ArrayList<>();
 
+    public WriterFactoryRegistry() {
+      ServiceLoader<WriterFactory> serviceLoader = 
java.util.ServiceLoader.load(WriterFactory.class, 
this.getClass().getClassLoader());
+      
+      Iterator<WriterFactory> iterator = serviceLoader.iterator();
+      
+      // use while(true) loop so that we can isolate all service loader errors 
from .next and .hasNext to a single service
+      while(true)
+      {
+          try
+          {
+              if(!iterator.hasNext())
+                  break;
+              
+              WriterFactory factory = iterator.next();
+              
+              this.register(factory);
+          }
+          catch(ServiceConfigurationError error)
+          {
+              LOG.error("Found error loading a WriterFactory", error);
+          }
+      }
+    }
+    
     /**
      * Reads the identifier specified for the given {@link FormatWriter}.
      *
@@ -88,37 +112,13 @@ public class WriterFactoryRegistry {
     /**
      * @return the {@link WriterFactoryRegistry} singleton instance.
      */
-    public synchronized static WriterFactoryRegistry getInstance() {
+    public static synchronized WriterFactoryRegistry getInstance() {
         if(instance == null) {
             instance = new WriterFactoryRegistry();
         }
         return instance;
     }
 
-    public WriterFactoryRegistry() {
-        ServiceLoader<WriterFactory> serviceLoader = 
java.util.ServiceLoader.load(WriterFactory.class, 
this.getClass().getClassLoader());
-        
-        Iterator<WriterFactory> iterator = serviceLoader.iterator();
-        
-        // use while(true) loop so that we can isolate all service loader 
errors from .next and .hasNext to a single service
-        while(true)
-        {
-            try
-            {
-                if(!iterator.hasNext())
-                    break;
-                
-                WriterFactory factory = iterator.next();
-                
-                this.register(factory);
-            }
-            catch(ServiceConfigurationError error)
-            {
-                LOG.error("Found error loading a WriterFactory", error);
-            }
-        }
-    }
-
     /**
      * Registers a new {@link WriterFactory} to the registry.
      *
@@ -127,7 +127,8 @@ public class WriterFactoryRegistry {
      *                                  or empty strings or if the identifier 
has been already defined.
      */
     public synchronized void register(WriterFactory writerClass) {
-        if(writerClass == null) throw new NullPointerException("writerClass 
cannot be null.");
+        if(writerClass == null)
+            throw new NullPointerException("writerClass cannot be null.");
         final String id       = writerClass.getIdentifier();
         final String mimeType = writerClass.getMimeType();
         if(id == null || id.trim().length() == 0) {
@@ -143,7 +144,7 @@ public class WriterFactoryRegistry {
         identifiers.add(writerClass.getIdentifier());
         List<WriterFactory> writerClasses = mimeToWriter.get(mimeType);
         if(writerClasses == null) {
-            writerClasses = new ArrayList<WriterFactory>();
+            writerClasses = new ArrayList<>();
             mimeToWriter.put(mimeType, writerClasses);
         }
         writerClasses.add(writerClass);
@@ -199,8 +200,7 @@ public class WriterFactoryRegistry {
      * @return a list of matching writers or an empty list.
      */
     public synchronized Collection<WriterFactory> getWritersByMimeType(String 
mimeType) {
-        final List<WriterFactory> writerClasses = mimeToWriter.get(mimeType);
-        return writerClasses;
+        return mimeToWriter.get(mimeType);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java
----------------------------------------------------------------------
diff --git 
a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java 
b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java
index 454c782..ede383d 100644
--- a/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/RedirectServlet.java
@@ -31,11 +31,13 @@ import java.io.IOException;
  */
 public class RedirectServlet extends HttpServlet {
     
+    @Override
     protected void doPost(HttpServletRequest request, HttpServletResponse 
response)
     throws ServletException, IOException {
         doGet(request, response);
     }
 
+    @Override
     protected void doGet(HttpServletRequest request, HttpServletResponse 
response)
     throws ServletException, IOException {
         // Show /resources/form.html for GET requests to the app's root

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java 
b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index b60ad5f..b93662e 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -56,6 +56,9 @@ public class Servlet extends HttpServlet {
 
     private static final long serialVersionUID = 8207685628715421336L;
 
+    private final static Pattern schemeAndSingleSlashRegex =
+            Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]");
+
     // RFC 3986: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
     private final static Pattern schemeRegex =
             Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:");
@@ -157,7 +160,8 @@ public class Servlet extends HttpServlet {
     }
 
     private String getFormatFromRequest(HttpServletRequest request) {
-        if (request.getPathInfo() == null) return "best";
+        if (request.getPathInfo() == null)
+            return "best";
         String[] args = request.getPathInfo().split("/", 3);
         if (args.length < 2 || "".equals(args[1])) {
             if (request.getParameter("format") == null) {
@@ -170,7 +174,8 @@ public class Servlet extends HttpServlet {
     }
 
     private String getInputIRIFromRequest(HttpServletRequest request) {
-        if (request.getPathInfo() == null) return null;
+        if (request.getPathInfo() == null)
+            return null;
         String[] args = request.getPathInfo().split("/", 3);
         if (args.length < 3) {
             if (request.getParameter("uri") != null) {
@@ -202,20 +207,21 @@ public class Servlet extends HttpServlet {
         return schemeRegex.matcher(uri).find();
     }
 
-    private final static Pattern schemeAndSingleSlashRegex =
-            Pattern.compile("^[a-zA-Z][a-zA-Z0-9.+-]*:/[^/]");
-
     private boolean hasOnlySingleSlashAfterScheme(String uri) {
         return schemeAndSingleSlashRegex.matcher(uri).find();
     }
 
     private String getContentTypeHeader(HttpServletRequest req) {
-        if (req.getHeader("Content-Type") == null) return null;
-        if ("".equals(req.getHeader("Content-Type"))) return null;
-        String contentType = req.getHeader("Content-Type");
+        String cType = "Content-Type";
+        if (req.getHeader(cType) == null)
+            return null;
+        if ("".equals(req.getHeader(cType)))
+            return null;
+        String contentType = req.getHeader(cType);
         // strip off parameters such as ";charset=UTF-8"
-        int index = contentType.indexOf(";");
-        if (index == -1) return contentType;
+        int index = contentType.indexOf(';');
+        if (index == -1)
+            return contentType;
         return contentType.substring(0, index);
     }
 
@@ -251,14 +257,18 @@ public class Servlet extends HttpServlet {
     }
 
     private ValidationMode getValidationMode(HttpServletRequest request) {
-        final String PARAMETER = "validation-mode";
-        final String validationMode = request.getParameter(PARAMETER);
-        if (validationMode == null) return ValidationMode.None;
-        if ("none".equalsIgnoreCase(validationMode)) return 
ValidationMode.None;
-        if ("validate".equalsIgnoreCase(validationMode)) return 
ValidationMode.Validate;
-        if ("validate-fix".equalsIgnoreCase(validationMode)) return 
ValidationMode.ValidateAndFix;
+        final String parameter = "validation-mode";
+        final String validationMode = request.getParameter(parameter);
+        if (validationMode == null)
+            return ValidationMode.None;
+        if ("none".equalsIgnoreCase(validationMode))
+            return ValidationMode.None;
+        if ("validate".equalsIgnoreCase(validationMode))
+            return ValidationMode.Validate;
+        if ("validate-fix".equalsIgnoreCase(validationMode))
+            return ValidationMode.ValidateAndFix;
         throw new IllegalArgumentException(
-                String.format("Invalid value '%s' for '%s' parameter.", 
validationMode, PARAMETER)
+                String.format("Invalid value '%s' for '%s' parameter.", 
validationMode, parameter)
         );
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/WebResponder.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/WebResponder.java 
b/service/src/main/java/org/apache/any23/servlet/WebResponder.java
index 658b0e7..3101e09 100644
--- a/service/src/main/java/org/apache/any23/servlet/WebResponder.java
+++ b/service/src/main/java/org/apache/any23/servlet/WebResponder.java
@@ -33,6 +33,7 @@ import org.apache.any23.writer.CountingTripleHandler;
 import org.apache.any23.writer.FormatWriter;
 import org.apache.any23.writer.ReportingTripleHandler;
 import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
 import org.apache.any23.writer.WriterFactory;
 import org.apache.any23.writer.WriterFactoryRegistry;
 import sun.security.validator.ValidatorException;
@@ -94,7 +95,7 @@ class WebResponder {
         this.any23servlet = any23servlet;
         this.response = response;
         this.runner = new Any23();
-        runner.setHTTPUserAgent("Any23-Servlet");
+        runner.setHTTPUserAgent("Apache Any23 Servlet http://any23.org/";);
     }
 
     protected Any23 getRunner() {
@@ -107,9 +108,11 @@ class WebResponder {
             String format,
             boolean report, boolean annotate
     ) throws IOException {
-        if (in == null) return;
-        if (!initRdfWriter(format, report, annotate)) return;
-        final ExtractionReport er;
+        if (in == null)
+          return;
+        if (!initRdfWriter(format, report, annotate))
+          return;
+        ExtractionReport er = null;
         try {
             er = runner.extract(eps, in, rdfWriter);
             rdfWriter.close();
@@ -135,9 +138,20 @@ class WebResponder {
             sendError(502, "Could not fetch input.", ioe, null, report);
             return;
         } catch (ExtractionException e) {
-            // Extraction error.
-            any23servlet.log("Could not parse input", e);
-            sendError(502, "Could not parse input.", e, null, report);
+            if (rdfWriter != null) {
+                try {
+                    rdfWriter.close();
+                } catch (TripleHandlerException the) {
+                    throw new RuntimeException("Error while closing 
TripleHandler", the);
+                }
+            }
+
+            // Extraction error. Although there is a critical error we still 
wish 
+            // to return accurate, partial extraction results to the user
+            String extractionError = "Failed to fully parse input. The 
extraction result, at the bottom "
+                    + "of this response, if any, will contain extractions only 
up until the extraction error.";
+            any23servlet.log(extractionError, e);
+            sendError(502, extractionError, e, er, report);
             return;
         } catch (Exception e) {
             any23servlet.log("Internal error", e);
@@ -207,7 +221,8 @@ class WebResponder {
         for(Extractor<?> extractor : er.getMatchingExtractors()) {
             final String name = extractor.getDescription().getExtractorName();
             final Collection<IssueReport.Issue> extractorIssues = 
er.getExtractorIssues(name);
-            if(extractorIssues.isEmpty()) continue;
+            if(extractorIssues.isEmpty())
+                continue;
             ps.println( String.format("<extractorIssues extractor=\"%s\">", 
name));
             for(IssueReport.Issue issue : er.getExtractorIssues(name)) {
                 ps.println(
@@ -232,7 +247,7 @@ class WebResponder {
 
         // Human readable error message.
         if(msg != null) {
-            ps.printf("<message>%s</message>\n", msg);
+            ps.printf("<message>%s</message>%n", msg);
         } else {
             ps.print("<message/>\n");
         }
@@ -278,7 +293,9 @@ class WebResponder {
     throws IOException {
         response.setStatus(code);
         response.setContentType("text/plain");
-        final PrintStream ps = new PrintStream(response.getOutputStream());
+        final ServletOutputStream sos = response.getOutputStream();
+        final PrintStream ps = new PrintStream(sos);
+        final byte[] data = byteOutStream.toByteArray();
         if (report) {
             try {
                 printHeader(ps);
@@ -292,6 +309,7 @@ class WebResponder {
                 
ps.println("================================================================");
                 e.printStackTrace(ps);
                 
ps.println("================================================================");
+                printData(data, ps);
             }
         }
     }
@@ -343,8 +361,7 @@ class WebResponder {
         } else {
             return null;
         }
-        final WriterFactory writer = 
writerRegistry.getWriterByIdentifier(finalFormat);
-        return writer;
+        return writerRegistry.getWriterByIdentifier(finalFormat);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java
----------------------------------------------------------------------
diff --git 
a/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java
 
b/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java
index b74951f..c0e1de3 100644
--- 
a/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java
+++ 
b/service/src/main/java/org/apache/any23/servlet/conneg/ContentTypeNegotiator.java
@@ -29,11 +29,11 @@ import java.util.regex.Pattern;
  */
 public class ContentTypeNegotiator {
 
-    private List<VariantSpec> variantSpecs = new ArrayList<VariantSpec>();
+    private List<VariantSpec> variantSpecs = new ArrayList<>();
 
     private List<MediaRangeSpec> defaultAcceptRanges = 
Collections.singletonList(MediaRangeSpec.parseRange("*/*"));
     
-    private Collection<AcceptHeaderOverride> userAgentOverrides = new 
ArrayList<AcceptHeaderOverride>();
+    private Collection<AcceptHeaderOverride> userAgentOverrides = new 
ArrayList<>();
 
     protected ContentTypeNegotiator(){}
 
@@ -123,7 +123,7 @@ public class ContentTypeNegotiator {
     protected class VariantSpec {
 
         private MediaRangeSpec type;
-        private List<MediaRangeSpec> aliases = new ArrayList<MediaRangeSpec>();
+        private List<MediaRangeSpec> aliases = new ArrayList<>();
         private boolean isDefault = false;
 
         public VariantSpec(String mediaType) {
@@ -182,7 +182,8 @@ public class ContentTypeNegotiator {
         }
 
         private void evaluateVariantAlias(MediaRangeSpec variant, 
MediaRangeSpec isAliasFor) {
-            if (variant.getBestMatch(ranges) == null) return;
+            if (variant.getBestMatch(ranges) == null)
+              return;
             double q = variant.getBestMatch(ranges).getQuality();
             if (q * variant.getQuality() > bestMatchingQuality) {
                 bestMatchingVariant = isAliasFor;
@@ -215,10 +216,6 @@ public class ContentTypeNegotiator {
             this.replacement = replacement;
         }
 
-        boolean matches(String acceptHeader) {
-            return matches(acceptHeader, null);
-        }
-
         boolean matches(String acceptHeader, String userAgentHeader) {
             return (userAgentPattern == null
                     || userAgentPattern.matcher(userAgentHeader).find())

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
----------------------------------------------------------------------
diff --git 
a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java 
b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
index db7b543..b71e3ee 100644
--- a/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
+++ b/service/src/main/java/org/apache/any23/servlet/conneg/MediaRangeSpec.java
@@ -110,8 +110,8 @@ public class MediaRangeSpec {
         if ("*".equals(type) && !"*".equals(subtype)) {
             return null;
         }
-        List<String> parameterNames = new ArrayList<String>();
-        List<String> parameterValues = new ArrayList<String>();
+        List<String> parameterNames = new ArrayList<>();
+        List<String> parameterValues = new ArrayList<>();
         while (m.find()) {
             String name = m.group(1).toLowerCase();
             String value = (m.group(3) == null) ? m.group(2) : 
unescape(m.group(3));
@@ -136,7 +136,7 @@ public class MediaRangeSpec {
      * @return A List of MediaRangeSpecs
      */
     public static List<MediaRangeSpec> parseAccept(String s) {
-        List<MediaRangeSpec> result = new ArrayList<MediaRangeSpec>();
+        List<MediaRangeSpec> result = new ArrayList<>();
         Matcher m = mediaRangePattern.matcher(s);
         while (m.find()) {
             result.add(parseRange(m.group()));
@@ -175,7 +175,7 @@ public class MediaRangeSpec {
             result.append(";");
             result.append(parameterNames.get(i));
             result.append("=");
-            String value = (String) parameterValues.get(i);
+            String value = parameterValues.get(i);
             if (tokenPattern.matcher(value).matches()) {
                 result.append(value);
             } else {
@@ -205,7 +205,7 @@ public class MediaRangeSpec {
 
     public String getParameter(String parameterName) {
         for (int i = 0; i < parameterNames.size(); i++) {
-            if (parameterNames.get(i).equals(parameterName.toLowerCase())) {
+            if (parameterNames.get(i).equalsIgnoreCase(parameterName)) {
                 return parameterValues.get(i);
             }
         }
@@ -225,16 +225,22 @@ public class MediaRangeSpec {
     }
 
     public int getPrecedence(MediaRangeSpec range) {
-        if (range.isWildcardType()) return 1;
-        if (!range.type.equals(type)) return 0;
-        if (range.isWildcardSubtype()) return 2;
-        if (!range.subtype.equals(subtype)) return 0;
-        if (range.getParameterNames().isEmpty()) return 3;
+        if (range.isWildcardType())
+          return 1;
+        if (!range.type.equals(type))
+          return 0;
+        if (range.isWildcardSubtype())
+          return 2;
+        if (!range.subtype.equals(subtype))
+          return 0;
+        if (range.getParameterNames().isEmpty())
+          return 3;
         int result = 3;
         for (int i = 0; i < range.getParameterNames().size(); i++) {
             String name  = range.getParameterNames().get(i);
             String value = range.getParameter(name);
-            if (!value.equals(getParameter(name))) return 0;
+            if (!value.equals(getParameter(name)))
+              return 0;
             result++;
         }
         return result;
@@ -254,6 +260,7 @@ public class MediaRangeSpec {
         return result;
     }
 
+    @Override
     public String toString() {
         return mediaType + ";q=" + quality;
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/d0e627a9/service/src/main/resources/form.html
----------------------------------------------------------------------
diff --git a/service/src/main/resources/form.html 
b/service/src/main/resources/form.html
index e11a019..374d017 100644
--- a/service/src/main/resources/form.html
+++ b/service/src/main/resources/form.html
@@ -394,7 +394,7 @@ Content-Length: 174
 
   <footer class="footer">
   <div class="container-fluid">
-  Copyright &copy; ${project.inceptionYear}-2014 The <a 
href="http://www.apache.org/";>Apache Software Foundation</a>. All Rights 
Reserved.<br/> Apache Any23, Apache, the Apache feather logo, and the Apache 
Any23 project logos are trademarks of The Apache Software Foundation. All other 
marks mentioned may be trademarks or registered trademarks of their respective 
owners.
+  Copyright &copy; ${project.inceptionYear}-2018 The <a 
href="http://www.apache.org/";>Apache Software Foundation</a>. All Rights 
Reserved.<br/> Apache Any23, Apache, the Apache feather logo, and the Apache 
Any23 project logos are trademarks of The Apache Software Foundation. All other 
marks mentioned may be trademarks or registered trademarks of their respective 
owners.
   </div>
   </footer>
 

Reply via email to