Repository: any23
Updated Branches:
  refs/heads/master 6a5471916 -> 6a399c7f5


ANY23-395 fail early if not enough memory for OpenIE


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6a399c7f
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6a399c7f
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6a399c7f

Branch: refs/heads/master
Commit: 6a399c7f5d6c8d41b66683f1368459c44a0afcbe
Parents: 6a54719
Author: Hans <[email protected]>
Authored: Tue Oct 30 18:47:47 2018 -0500
Committer: Hans <[email protected]>
Committed: Tue Oct 30 18:47:47 2018 -0500

----------------------------------------------------------------------
 .../extractor/openie/OpenIEExtractor.java       | 25 +++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6a399c7f/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
----------------------------------------------------------------------
diff --git 
a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
 
b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
index 9b62626..3992388 100644
--- 
a/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
+++ 
b/plugins/openie/src/main/java/org/apache/any23/plugin/extractor/openie/OpenIEExtractor.java
@@ -24,8 +24,6 @@ import 
javax.xml.transform.TransformerFactoryConfigurationError;
 
 import org.apache.any23.extractor.Extractor;
 import org.apache.any23.extractor.IssueReport;
-import org.apache.any23.configuration.Configuration;
-import org.apache.any23.configuration.DefaultConfiguration;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.plugin.Author;
@@ -86,6 +84,21 @@ public class OpenIEExtractor implements 
Extractor.TagSoupDOMExtractor {
             ExtractionContext context, Document in, ExtractionResult out)
                     throws IOException, ExtractionException {
 
+        Runtime runtime = Runtime.getRuntime();
+        long maxMemory = runtime.maxMemory();
+        //free up as much memory as possible before performing this calculation
+        runtime.gc();
+        long usedMemory = Math.max(0L, runtime.totalMemory() - 
runtime.freeMemory());
+        long availableMemory = maxMemory - usedMemory;
+        if (availableMemory < 4294967296L) {
+            out.notifyIssue(IssueReport.IssueLevel.FATAL,
+                    "Not enough heap space available to perform OpenIE 
extraction: "
+                            + (availableMemory/1048576L) + "/" + (maxMemory / 
1048576L)
+                            + " MB. Requires 4096 MB.", -1, -1);
+            LOG.error("Increase JVM heap size when running OpenIE extractor. 
max=" + maxMemory + "; available=" + availableMemory);
+            return;
+        }
+
         IRI documentIRI = context.getDocumentIRI();
         RDFUtils.iri(documentIRI.toString() + "root");
         out.writeNamespace(RDF.PREFIX, RDF.NAMESPACE);
@@ -105,13 +118,7 @@ public class OpenIEExtractor implements 
Extractor.TagSoupDOMExtractor {
             LOG.error("Encountered error during OpenIE extraction.", e);
         } catch (TikaException e) {
             LOG.error("Encountered error whilst parsing InputStream with 
Tika.", e);
-        } catch (OutOfMemoryError e) {
-          //let the gc do its thing
-          openIE = null;
-          out.notifyIssue(IssueReport.IssueLevel.FATAL, "Not enough memory 
available to perform OpenIE extraction.", -1, -1);
-          LOG.error("Encountered OutOfMemoryError... increase JVM heap when 
running OpenIEExtractor.", e);
-          return;
-      }
+        }
 
         List<Instance> listExtractions = 
JavaConversions.seqAsJavaList(extractions);
         // for each extraction instance we can obtain a number of extraction 
elements

Reply via email to