This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new 3fc3476f9 TIKA-4385 : fix : GDAL process output read in another thread 
(#2126)
3fc3476f9 is described below

commit 3fc3476f997d559713fa823c42716141545c5283
Author: Leszek Sliwko <[email protected]>
AuthorDate: Sun Feb 16 15:38:49 2025 +0000

    TIKA-4385 : fix : GDAL process output read in another thread (#2126)
    
    * TIKA-4385 : fix : GDAL process output read in another thread
    
    Co-authored-by: Tilman Hausherr <[email protected]>
    (cherry picked from commit 05500bbd9a2840934652e5b647e0a85506a0325b)
---
 .../org/apache/tika/parser/gdal/GDALParser.java    | 90 +++++++++++-----------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
 
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
index e82f9b6b7..1ba196942 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
+++ 
b/tika-parsers/tika-parsers-extended/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
@@ -19,12 +19,10 @@ package org.apache.tika.parser.gdal;
 
 //JDK imports
 
-import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;
@@ -42,15 +40,20 @@ import org.slf4j.LoggerFactory;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import org.apache.tika.config.Field;
+import org.apache.tika.config.TikaTaskTimeout;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.ExternalProcess;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.utils.FileProcessResult;
+import org.apache.tika.utils.ProcessUtils;
 
 //Tika imports
 //SAX imports
@@ -76,6 +79,8 @@ public class GDALParser implements Parser {
     private static final long serialVersionUID = -3869130527323941401L;
     private static final Logger LOG = 
LoggerFactory.getLogger(GDALParser.class);
 
+    public static final long DEFAULT_TIMEOUT_MS = 60000;
+
     private static final Set<MediaType> SUPPORTED_TYPES = 
Collections.unmodifiableSet(new HashSet<>(
             Arrays.asList(MediaType.application("x-netcdf"), 
MediaType.application("vrt"),
                     MediaType.image("geotiff"), MediaType.image("nitf"),
@@ -140,6 +145,12 @@ public class GDALParser implements Parser {
 
     private String command;
 
+    private int maxStdErr = 100000;
+
+    private int maxStdOut = 100000;
+
+    private long timeoutMs = DEFAULT_TIMEOUT_MS;
+
     public GDALParser() {
         setCommand("gdalinfo ${INPUT}");
     }
@@ -184,8 +195,23 @@ public class GDALParser implements Parser {
         TemporaryResources tmp = new TemporaryResources();
         TikaInputStream tis = TikaInputStream.get(stream, tmp, metadata);
 
-        String runCommand = processCommand(tis);
-        String output = execCommand(new String[]{runCommand});
+        String[] runCommand = processCommand(tis).split("\\s+", -1);
+
+        long localTimeoutMillis = TikaTaskTimeout.getTimeoutMillis(context, 
timeoutMs);
+        FileProcessResult result = ProcessUtils.execute(new 
ProcessBuilder(runCommand),
+                localTimeoutMillis, maxStdOut, maxStdErr);
+
+        metadata.set(ExternalProcess.IS_TIMEOUT, result.isTimeout());
+        metadata.set(ExternalProcess.EXIT_VALUE, result.getExitValue());
+        metadata.set(ExternalProcess.STD_OUT_LENGTH, result.getStdoutLength());
+        metadata.set(ExternalProcess.STD_OUT_IS_TRUNCATED, 
result.isStdoutTruncated());
+        metadata.set(ExternalProcess.STD_ERR_LENGTH, result.getStderrLength());
+        metadata.set(ExternalProcess.STD_ERR_IS_TRUNCATED, 
result.isStderrTruncated());
+
+        metadata.set(ExternalProcess.STD_OUT, result.getStdout());
+        metadata.set(ExternalProcess.STD_ERR, result.getStderr());
+
+        String output = result.getStdout();
 
         // now extract the actual metadata params
         // from the GDAL output in the content stream
@@ -290,47 +316,6 @@ public class GDALParser implements Parser {
         }
     }
 
-    private String execCommand(String[] cmd) throws IOException {
-        // Execute
-        Process process;
-        String output = null;
-        if (cmd.length == 1) {
-            process = Runtime.getRuntime().exec(cmd[0]);
-        } else {
-            process = Runtime.getRuntime().exec(cmd);
-        }
-
-        try {
-            InputStream out = process.getInputStream();
-
-            try {
-                output = extractOutput(out);
-            } catch (Exception e) {
-                LOG.warn("Exception extracting output", e);
-                output = "";
-            }
-
-        } finally {
-            try {
-                process.waitFor();
-            } catch (InterruptedException ignore) {
-            }
-        }
-        return output;
-
-    }
-
-    private String extractOutput(InputStream stream) throws SAXException, 
IOException {
-        StringBuilder sb = new StringBuilder();
-        try (Reader reader = new InputStreamReader(stream, UTF_8)) {
-            char[] buffer = new char[1024];
-            for (int n = reader.read(buffer); n != -1; n = 
reader.read(buffer)) {
-                sb.append(buffer, 0, n);
-            }
-        }
-        return sb.toString();
-    }
-
     private void processOutput(ContentHandler handler, Metadata metadata, 
String output)
             throws SAXException, IOException {
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
@@ -349,4 +334,19 @@ public class GDALParser implements Parser {
 
     }
 
+    @Field
+    public void setTimeoutMs(long timeoutMs) {
+        this.timeoutMs = timeoutMs;
+    }
+
+    @Field
+    public void setMaxStdErr(int maxStdErr) {
+        this.maxStdErr = maxStdErr;
+    }
+
+    @Field
+    public void setMaxStdOut(int maxStdOut) {
+        this.maxStdOut = maxStdOut;
+    }
+
 }

Reply via email to