This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4ea421a7aef5d360ae2011c31d961c3dfbe6bfb9 Author: TALLISON <talli...@apache.org> AuthorDate: Tue Nov 13 21:34:44 2018 -0500 TIKA-2782 -- confirm child streams are redirected. Add workaround (shameless hack) if logger writes before streams are redirected. --- .../org/apache/tika/server/ServerTimeouts.java | 10 ++ .../java/org/apache/tika/server/TikaServerCli.java | 24 +++-- .../org/apache/tika/server/TikaServerWatchDog.java | 29 +++++- .../tika/server/TikaServerIntegrationTest.java | 103 ++++++++++++++++++++- .../src/test/resources/logging/log4j_child.xml | 29 ++++++ .../src/test/resources/mock/testStdOutErr.xml | 75 +++++++++++++++ 6 files changed, 260 insertions(+), 10 deletions(-) diff --git a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java b/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java index b85d89c..34df411 100644 --- a/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java +++ b/tika-server/src/main/java/org/apache/tika/server/ServerTimeouts.java @@ -52,6 +52,8 @@ public class ServerTimeouts { */ public static final long DEFAULT_TASK_TIMEOUT_MILLIS = 120000; + private int maxRestarts = -1; + private long taskTimeoutMillis = DEFAULT_TASK_TIMEOUT_MILLIS; private long pingTimeoutMillis = DEFAULT_PING_TIMEOUT_MILLIS; @@ -103,4 +105,12 @@ public class ServerTimeouts { public void setPingPulseMillis(long pingPulseMillis) { this.pingPulseMillis = pingPulseMillis; } + + public int getMaxRestarts() { + return maxRestarts; + } + + public void setMaxRestarts(int maxRestarts) { + this.maxRestarts = maxRestarts; + } } diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java index d63a515..0cdce50 100644 --- a/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java +++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerCli.java @@ -18,6 +18,8 @@ package org.apache.tika.server; import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -84,7 +86,7 @@ public class TikaServerCli { private static final List<String> ONLY_IN_SPAWN_CHILD_MODE = Arrays.asList(new String[] { "taskTimeoutMillis", "taskPulseMillis", - "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome"}); + "pingTimeoutMillis", "pingPulseMillis", "maxFiles", "javaHome", "maxRestarts"}); private static Options getOptions() { Options options = new Options(); @@ -104,7 +106,7 @@ public class TikaServerCli { options.addOption("taskPulseMillis", true, "Only in spawn child mode: how often to check if a task has timed out."); options.addOption("pingTimeoutMillis", true, "Only in spawn child mode: how long to wait to wait for a ping and/or ping response."); options.addOption("pingPulseMillis", true, "Only in spawn child mode: how often to check if a ping has timed out."); - + options.addOption("maxRestarts", true, "Only in spawn child mode: how many times to restart child process, default is -1 (always restart)"); options.addOption("maxFiles", true, "Only in spawn child mode: shutdown server after this many files -- use only in 'spawnChild' mode"); options.addOption("javaHome", true, "Override system property JAVA_HOME for calling java for the child process"); options.addOption("child", false, "this process is a child process -- EXPERT -- " + @@ -257,18 +259,24 @@ public class TikaServerCli { ServerStatus serverStatus = new ServerStatus(); //if this is a child process if (line.hasOption("child")) { + //redirect!!! + PrintStream out = System.out; + InputStream in = System.in; + System.setIn(new ByteArrayInputStream(new byte[0])); + System.setOut(System.err); + long maxFiles = DEFAULT_MAX_FILES; if (line.hasOption("maxFiles")) { maxFiles = Long.parseLong(line.getOptionValue("maxFiles")); } ServerTimeouts serverTimeouts = configureServerTimeouts(line); + Thread serverThread = - new Thread(new ServerStatusWatcher(serverStatus, System.in, - System.out, maxFiles, serverTimeouts)); + new Thread(new ServerStatusWatcher(serverStatus, in, + out, maxFiles, serverTimeouts)); + serverThread.start(); - System.setIn(new ByteArrayInputStream(new byte[0])); - System.setOut(System.err); } TikaResource.init(tika, digester, inputStreamFactory, serverStatus); JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean(); @@ -347,6 +355,10 @@ public class TikaServerCli { Long.parseLong(line.getOptionValue("pingPulseMillis"))); } + if (line.hasOption("maxRestarts")) { + serverTimeouts.setMaxRestarts(Integer.parseInt(line.getOptionValue("maxRestarts"))); + } + return serverTimeouts; } diff --git a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java b/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java index 964577c..5bf9bf3 100644 --- a/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java +++ b/tika-server/src/main/java/org/apache/tika/server/TikaServerWatchDog.java @@ -21,6 +21,7 @@ import org.apache.tika.utils.ProcessUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -49,9 +50,10 @@ public class TikaServerWatchDog { private volatile CHILD_STATUS childStatus = CHILD_STATUS.INITIALIZING; private volatile Instant lastPing = null; private ChildProcess childProcess = null; - int restarts = 0; + public void execute(String[] args, ServerTimeouts serverTimeouts) throws Exception { + LOG.info("server watch dog is starting up"); //if the child thread is in stop-the-world mode, and isn't //responding to the ping, this thread checks to make sure //that the parent ping is sent and received often enough @@ -92,6 +94,7 @@ public class TikaServerWatchDog { try { childProcess = new ChildProcess(args); setChildStatus(CHILD_STATUS.RUNNING); + int restarts = 0; while (true) { if (!childProcess.ping()) { @@ -102,6 +105,11 @@ public class TikaServerWatchDog { childProcess = new ChildProcess(args); LOG.info("Successfully restarted child process -- {} restarts so far)", ++restarts); setChildStatus(CHILD_STATUS.RUNNING); + restarts++; + if (serverTimeouts.getMaxRestarts() > -1 && restarts >= serverTimeouts.getMaxRestarts()) { + LOG.warn("hit max restarts: "+restarts+". Stopping now"); + break; + } } Thread.sleep(serverTimeouts.getPingPulseMillis()); } @@ -194,7 +202,22 @@ public class TikaServerWatchDog { this.fromChild = new DataInputStream(process.getInputStream()); this.toChild = new DataOutputStream(process.getOutputStream()); - byte status = fromChild.readByte(); + //if logger's debug=true, there can be a bunch of stuff that + //was written to the process's inputstream _before_ + //we did the redirect. + //These bytes need to be read from fromChild before the child has actually + //started...allow 64,000 bytes...completely arbitrary. + //this is admittedly hacky...If the logger writes 0, we'd + //interpret that as "OPERATING"...need to figure out + //better way to siphon statically written bytes before + //we do the redirect of streams. + int maxStartBytes = 64000; + int status = fromChild.readByte(); + int read = 0; + while (status > -1 && read < maxStartBytes && status != ServerStatus.STATUS.OPERATING.getByte()) { + status = fromChild.readByte(); + read++; + } if (status != ServerStatus.STATUS.OPERATING.getByte()) { try { ServerStatus.STATUS currStatus = ServerStatus.STATUS.lookup(status); @@ -205,7 +228,7 @@ public class TikaServerWatchDog { } int len = process.getInputStream().available(); byte[] msg = new byte[len+1]; - msg[0] = status; + msg[0] = (byte)status; process.getInputStream().read(msg, 1, len); throw new IOException( diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java index 7003ab6..0d8a128 100644 --- a/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java +++ b/tika-server/src/test/java/org/apache/tika/server/TikaServerIntegrationTest.java @@ -22,7 +22,9 @@ import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.OfficeOpenXMLExtended; import org.apache.tika.metadata.serialization.JsonMetadataList; import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -30,6 +32,9 @@ import javax.ws.rs.core.Response; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.security.Permission; import java.time.Duration; import java.time.Instant; @@ -46,6 +51,7 @@ public class TikaServerIntegrationTest extends TikaTest { private static final String TEST_SYSTEM_EXIT = "mock/system_exit.xml"; private static final String TEST_HEAVY_HANG = "mock/heavy_hang_30000.xml"; private static final String TEST_HEAVY_HANG_SHORT = "mock/heavy_hang_100.xml"; + private static final String TEST_STDOUT_STDERR = "mock/testStdOutErr.xml"; private static final String META_PATH = "/rmeta"; //running into conflicts on 9998 with the CXFTestBase tests @@ -56,6 +62,7 @@ public class TikaServerIntegrationTest extends TikaTest { "http://localhost:" + INTEGRATION_TEST_PORT; private SecurityManager existingSecurityManager = null; + private static Path LOG_FILE; private static class MyExitException extends RuntimeException { private final int status; @@ -67,6 +74,12 @@ public class TikaServerIntegrationTest extends TikaTest { return status; } } + @BeforeClass + public static void staticSetup() throws Exception { + LOG_FILE = Files.createTempFile("tika-server-integration", ".xml"); + Files.copy(TikaServerIntegrationTest.class.getResourceAsStream("/logging/log4j_child.xml"), LOG_FILE, StandardCopyOption.REPLACE_EXISTING); + } + @Before public void setUp() throws Exception { SecurityManager existingSecurityManager = System.getSecurityManager(); @@ -87,8 +100,13 @@ public class TikaServerIntegrationTest extends TikaTest { }); } + @AfterClass + public static void staticTearDown() throws Exception { + Files.delete(LOG_FILE); + } + @After - public void tearDown() { + public void tearDown() throws Exception { System.setSecurityManager(existingSecurityManager); } @@ -332,6 +350,89 @@ public class TikaServerIntegrationTest extends TikaTest { assertEquals(-1, i.get()); } + + @Test + public void testStdErrOutBasic() throws Exception { + final AtomicInteger i = new AtomicInteger(); + Thread serverThread = new Thread() { + @Override + public void run() { + TikaServerCli.main( + new String[]{ + "-spawnChild", + "-p", INTEGRATION_TEST_PORT, + "-taskTimeoutMillis", "10000", "-taskPulseMillis", "500", + "-pingPulseMillis", "100" + }); + } + }; + serverThread.start(); + awaitServerStartup(); + + Response response = WebClient + .create(endPoint + META_PATH) + .accept("application/json") + .put(ClassLoader + .getSystemResourceAsStream(TEST_STDOUT_STDERR)); + Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); + List<Metadata> metadataList = JsonMetadataList.fromJson(reader); + assertEquals(1, metadataList.size()); + assertContains("quick brown fox", metadataList.get(0).get("X-TIKA:content")); + + response = WebClient + .create(endPoint + META_PATH) + .accept("application/json") + .put(ClassLoader + .getSystemResourceAsStream(TEST_RECURSIVE_DOC)); + reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); + metadataList = JsonMetadataList.fromJson(reader); + assertEquals(12, metadataList.size()); + assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION)); + assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content")); + serverThread.interrupt(); + } + + @Test + public void testStdErrOutLogging() throws Exception { + final AtomicInteger i = new AtomicInteger(); + Thread serverThread = new Thread() { + @Override + public void run() { + TikaServerCli.main( + new String[]{ + "-spawnChild", + "-p", INTEGRATION_TEST_PORT, + "-taskTimeoutMillis", "10000", "-taskPulseMillis", "500", + "-pingPulseMillis", "100", "-maxRestarts", "0", + "-JDlog4j.configuration=file:"+ LOG_FILE.toAbsolutePath() + }); + } + }; + serverThread.start(); + awaitServerStartup(); + + Response response = WebClient + .create(endPoint + META_PATH) + .accept("application/json") + .put(ClassLoader + .getSystemResourceAsStream(TEST_STDOUT_STDERR)); + Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); + List<Metadata> metadataList = JsonMetadataList.fromJson(reader); + assertEquals(1, metadataList.size()); + assertContains("quick brown fox", metadataList.get(0).get("X-TIKA:content")); + + response = WebClient + .create(endPoint + META_PATH) + .accept("application/json") + .put(ClassLoader + .getSystemResourceAsStream(TEST_RECURSIVE_DOC)); + reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); + metadataList = JsonMetadataList.fromJson(reader); + assertEquals(12, metadataList.size()); + assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION)); + assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content")); + serverThread.interrupt(); + } private void awaitServerStartup() throws Exception { Instant started = Instant.now(); diff --git a/tika-server/src/test/resources/logging/log4j_child.xml b/tika-server/src/test/resources/logging/log4j_child.xml new file mode 100644 index 0000000..7391faf --- /dev/null +++ b/tika-server/src/test/resources/logging/log4j_child.xml @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd"> +<log4j:configuration debug="true"> + <appender name="stdout" class="org.apache.log4j.ConsoleAppender"> + <param name="Target" value="System.out"/> + <layout class="org.apache.log4j.PatternLayout"> + <!-- Pattern to output the caller's file name and line number --> + <!--<param name="ConversionPattern" value="%5p [%t] (%F:%L) - %m%n"/>--> + <param name="ConversionPattern" value="%m%n"/> + </layout> + </appender> + <appender name="stderr" class="org.apache.log4j.ConsoleAppender"> + <param name="Target" value="System.err"/> + <layout class="org.apache.log4j.PatternLayout"> + <!-- Pattern to output the caller's file name and line number --> + <!--<param name="ConversionPattern" value="%5p [%t] (%F:%L) - %m%n"/>--> + <param name="ConversionPattern" value="%m%n"/> + </layout> + </appender> + <logger name="org.apache.tika" additivity="true"> + <level value="debug"/> + <appender-ref ref="stdout"/> + </logger> + <logger name="org.apache.tika" additivity="true"> + <level value="debug"/> + <appender-ref ref="stderr"/> + </logger> + +</log4j:configuration> \ No newline at end of file diff --git a/tika-server/src/test/resources/mock/testStdOutErr.xml b/tika-server/src/test/resources/mock/testStdOutErr.xml new file mode 100644 index 0000000..899faef --- /dev/null +++ b/tika-server/src/test/resources/mock/testStdOutErr.xml @@ -0,0 +1,75 @@ +<?xml version="1.0" encoding="UTF-8" ?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +<mock> + + + <!-- action can be "add" or "set" --> + <metadata action="add" name="dc:creator">Nikolai Lobachevsky</metadata> + <!-- element is the name of the sax event to write, p=paragraph + if the element is not specified, the default is <p> --> + <hang millis="100" heavy="false" /> + <write element="p">hello world! </write> + <!-- write something to System.out --> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="10" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="80" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="70" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="60" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="200" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="10" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="300" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="101" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="110" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="20" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="200" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="300" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="100" heavy="false" /> + <print_out>writing to System.out</print_out> + <hang millis="500" heavy="false" /> + <!-- write something to System.err --> + <print_err>writing to System.err</print_err> + + <write element="p">Even after the calls to Sys.out/err...hello world! the quick brown fox jumped over the lazy dog</write> + +</mock> \ No newline at end of file