This is an automated email from the ASF dual-hosted git repository.
wenweihuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new eca3e64e04 [INLONG-10889][Agent] When the oom is detected, the process
exits (#10891)
eca3e64e04 is described below
commit eca3e64e04cf3910669a008acaf0dc75a8b8f5bc
Author: justinwwhuang <[email protected]>
AuthorDate: Wed Aug 28 16:19:09 2024 +0800
[INLONG-10889][Agent] When the oom is detected, the process exits (#10891)
* [INLONG-10889][Agent] When the oom is detected, the process exits
* Update
inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
Co-authored-by: AloysZhang <[email protected]>
* Update
inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
Co-authored-by: AloysZhang <[email protected]>
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
* [INLONG-10889][Agent] When the oom is detected, the process exits
---------
Co-authored-by: AloysZhang <[email protected]>
---
.../org/apache/inlong/agent/utils/ThreadUtils.java | 4 +-
.../apache/inlong/agent/utils/file/FileUtils.java | 4 +-
inlong-agent/agent-installer/bin/installer.sh | 10 ++--
.../agent/plugin/instance/CommonInstance.java | 1 +
.../plugin/sinks/filecollect/SenderManager.java | 8 +--
.../inlong/agent/plugin/sources/LogFileSource.java | 3 ++
.../agent/plugin/sources/file/AbstractSource.java | 2 +
.../inlong/agent/plugin/task/AbstractTask.java | 2 +
inlong-agent/bin/agent-env.sh | 13 +++--
inlong-agent/bin/oom.sh | 63 ++++++++++++++++++++++
10 files changed, 92 insertions(+), 18 deletions(-)
diff --git
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/ThreadUtils.java
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/ThreadUtils.java
index 42270ad765..e438712ab8 100644
---
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/ThreadUtils.java
+++
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/ThreadUtils.java
@@ -35,7 +35,7 @@ public class ThreadUtils {
}
private static void handleOOM(Thread t, Throwable e) {
- if (ExceptionUtils.indexOfThrowable(e,
java.lang.OutOfMemoryError.class) != -1) {
+ if (ExceptionUtils.indexOfThrowable(e, OutOfMemoryError.class) != -1) {
LOGGER.error("Agent exit caused by {} OutOfMemory: ", t.getName(),
e);
forceShutDown();
}
@@ -43,7 +43,7 @@ public class ThreadUtils {
private static void forceShutDown() {
try {
- Runtime.getRuntime().exit(-1);
+ Runtime.getRuntime().halt(-1);
} catch (Throwable e) {
LOGGER.error("exit failed, just halt, exception: ", e);
Runtime.getRuntime().halt(-2);
diff --git
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
index b141bad43a..f5fcbabdec 100644
---
a/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
+++
b/inlong-agent/agent-common/src/main/java/org/apache/inlong/agent/utils/file/FileUtils.java
@@ -58,7 +58,7 @@ public class FileUtils {
creationTime = Files.readAttributes(Paths.get(fileName),
BasicFileAttributes.class).creationTime()
.toMillis();
} catch (IOException e) {
- LOGGER.error("getFileCreationTime error {}", e.getMessage());
+ LOGGER.error("getFileCreationTime error.", e);
}
return creationTime;
}
@@ -68,7 +68,7 @@ public class FileUtils {
try {
lastModify =
Files.getLastModifiedTime(Paths.get(fileName)).toMillis();
} catch (IOException e) {
- LOGGER.error("getFileLastModifyTime error {}", e.getMessage());
+ LOGGER.error("getFileLastModifyTime error.", e);
}
return lastModify;
}
diff --git a/inlong-agent/agent-installer/bin/installer.sh
b/inlong-agent/agent-installer/bin/installer.sh
index f382e29da8..09a412348d 100755
--- a/inlong-agent/agent-installer/bin/installer.sh
+++ b/inlong-agent/agent-installer/bin/installer.sh
@@ -23,12 +23,12 @@ CONSOLE_OUTPUT_FILE="${LOG_DIR}/agent-out.log"
function help() {
echo "Usage: agent.sh {status|start|stop|restart|clean}" >&2
- echo " status: the status of inlong agent"
- echo " start: start the inlong agent"
- echo " stop: stop the inlong agent"
- echo " restart: restart the inlong agent"
+ echo " status: the status of agent installer"
+ echo " start: start the agent installer"
+ echo " stop: stop the agent installer"
+ echo " restart: restart the agent installer"
echo " clean: unregister this node in manager"
- echo " help: get help from inlong agent"
+ echo " help: get help from agent installer"
}
function running() {
diff --git
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/instance/CommonInstance.java
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/instance/CommonInstance.java
index 7eb77c7237..415b05825a 100644
---
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/instance/CommonInstance.java
+++
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/instance/CommonInstance.java
@@ -114,6 +114,7 @@ public abstract class CommonInstance extends Instance {
doRun();
} catch (Throwable e) {
LOGGER.error("do run error: ", e);
+ ThreadUtils.threadThrowableHandler(Thread.currentThread(), e);
}
running = false;
}
diff --git
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sinks/filecollect/SenderManager.java
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sinks/filecollect/SenderManager.java
index fff55577c6..984baf6de6 100755
---
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sinks/filecollect/SenderManager.java
+++
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sinks/filecollect/SenderManager.java
@@ -262,6 +262,7 @@ public class SenderManager {
}
retry++;
AgentUtils.silenceSleepInMs(retrySleepTime);
+ ThreadUtils.threadThrowableHandler(Thread.currentThread(),
exception);
}
}
}
@@ -299,10 +300,9 @@ public class SenderManager {
message.getTotalSize(), auditVersion);
sendBatchWithRetryCount(callback.message,
callback.retry + 1);
}
- } catch (Exception ex) {
- LOGGER.error("error caught", ex);
- } catch (Throwable t) {
- ThreadUtils.threadThrowableHandler(Thread.currentThread(),
t);
+ } catch (Exception e) {
+ LOGGER.error("error caught", e);
+ ThreadUtils.threadThrowableHandler(Thread.currentThread(),
e);
} finally {
AgentUtils.silenceSleepInMs(batchFlushInterval);
}
diff --git
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/LogFileSource.java
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/LogFileSource.java
index 929a331777..4f2048b87f 100755
---
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/LogFileSource.java
+++
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/LogFileSource.java
@@ -332,6 +332,9 @@ public class LogFileSource extends AbstractSource {
data.setReadBytes(String.valueOf(bytePosition));
data.setReadLines(String.valueOf(linePosition));
OffsetProfile offsetProfile =
OffsetManager.getInstance().getOffset(taskId, instanceId);
+ if (offsetProfile == null) {
+ return;
+ }
data.setSendLines(offsetProfile.getOffset());
FileStaticManager.getInstance().putStaticMsg(data);
randomAccessFile.close();
diff --git
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/file/AbstractSource.java
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/file/AbstractSource.java
index fde4cb4ff4..6ee2950d3c 100644
---
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/file/AbstractSource.java
+++
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/sources/file/AbstractSource.java
@@ -32,6 +32,7 @@ import org.apache.inlong.agent.plugin.Message;
import org.apache.inlong.agent.plugin.file.Source;
import org.apache.inlong.agent.plugin.sources.file.extend.ExtendedHandler;
import org.apache.inlong.agent.utils.AgentUtils;
+import org.apache.inlong.agent.utils.ThreadUtils;
import org.apache.inlong.common.metric.MetricRegister;
import lombok.AllArgsConstructor;
@@ -153,6 +154,7 @@ public abstract class AbstractSource implements Source {
doRun();
} catch (Throwable e) {
LOGGER.error("do run error maybe file deleted: ", e);
+ ThreadUtils.threadThrowableHandler(Thread.currentThread(), e);
}
running = false;
};
diff --git
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/task/AbstractTask.java
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/task/AbstractTask.java
index c463543bf5..75d87bb235 100644
---
a/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/task/AbstractTask.java
+++
b/inlong-agent/agent-plugins/src/main/java/org/apache/inlong/agent/plugin/task/AbstractTask.java
@@ -28,6 +28,7 @@ import org.apache.inlong.agent.plugin.file.Task;
import org.apache.inlong.agent.state.State;
import org.apache.inlong.agent.store.Store;
import org.apache.inlong.agent.utils.AgentUtils;
+import org.apache.inlong.agent.utils.ThreadUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -111,6 +112,7 @@ public abstract class AbstractTask extends Task {
doRun();
} catch (Throwable e) {
LOGGER.error("do run error: ", e);
+ ThreadUtils.threadThrowableHandler(Thread.currentThread(), e);
}
running = false;
}
diff --git a/inlong-agent/bin/agent-env.sh b/inlong-agent/bin/agent-env.sh
index 8d00c44843..a60557f7ec 100755
--- a/inlong-agent/bin/agent-env.sh
+++ b/inlong-agent/bin/agent-env.sh
@@ -25,6 +25,7 @@ export OTEL_LOGS_EXPORTER=otlp
export ENABLE_OBSERVABILITY=false
# OTEL_EXPORTER_OTLP_ENDPOINT must be configured as a URL when
ENABLE_OBSERVABILITY=true.
export OTEL_EXPORTER_OTLP_ENDPOINT=
+export TDW_SECURITY_URL_NULL
#project directory
BASE_DIR=$(cd "$(dirname "$0")"/../;pwd)
@@ -45,17 +46,19 @@ else
fi
if [ -z "$AGENT_JVM_HEAP_OPTS" ]; then
- HEAP_OPTS="-Xmx512m -Xss512k"
+ HEAP_OPTS=" -Xmx2048m -Xms512m -Xss512k "
else
HEAP_OPTS="$AGENT_JVM_HEAP_OPTS"
fi
-GC_OPTS="-XX:+UseG1GC -XX:MaxGCPauseMillis=200
-XX:InitiatingHeapOccupancyPercent=60 -Djava.net.preferIPv4Stack=true
-Dfile.encoding=UTF-8"
-LOG_OPTS="-Xloggc:$BASE_DIR/logs/gc.log -XX:+PrintGCDetails
-XX:+PrintGCDateStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=20M"
+
+GVM_OPTS=" -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8 "
+OOM_HANDLER=" -XX:OnOutOfMemoryError=$BASE_DIR/bin/oom.sh"
+GC_OPTS=" -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+TraceClassLoading
-XX:InitiatingHeapOccupancyPercent=45 -XX:G1HeapRegionSize=16m
-XX:G1MixedGCCountTarget=16 -XX:G1HeapWastePercent=10 -XX:+PrintGCDetails
-XX:+PrintGCDateStamps"
+LOG_OPTS=" -Xloggc:$BASE_DIR/logs/gc.log -XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=20M"
if [ -n "$NEED_TRACK_NATIVE_MEMORY" ] && [ "$NEED_TRACK_NATIVE_MEMORY" =
"true" ]; then
GC_OPTS="$GC_OPTS -XX:NativeMemoryTracking"
fi
-AGENT_JVM_ARGS="$HEAP_OPTS $GC_OPTS $LOG_OPTS"
-
+AGENT_JVM_ARGS="$HEAP_OPTS $GVM_OPTS $GC_OPTS $LOG_OPTS $OOM_HANDLER"
# Add Agent Rmi Args when necessary
AGENT_RMI_ARGS="-Dcom.sun.management.jmxremote \
-Dcom.sun.management.jmxremote.port=18080
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false"
diff --git a/inlong-agent/bin/oom.sh b/inlong-agent/bin/oom.sh
new file mode 100755
index 0000000000..ff26029165
--- /dev/null
+++ b/inlong-agent/bin/oom.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+BASE_DIR=$(dirname $0)/..
+
+AGENT_CONF="${BASE_DIR}"/conf/agent.properties
+source "${BASE_DIR}"/bin/agent-env.sh
+CONSOLE_OUTPUT_FILE="${LOG_DIR}/oom.log"
+
+function running() {
+ agent_uniq=`cat ${AGENT_CONF}|grep -Ev '^[[:space:]].*|^#' |grep -E
'agent.uniq.id'`
+ check_agent_uniq="${agent_uniq:-"agent.uniq.id=1"}"
+ arg_uniq="-D${check_agent_uniq}"
+ process=$(ps -aux | grep 'java' | grep 'inlong-agent' | grep
"$check_agent_uniq" | awk '{print $2}')
+ if [ "${process}" = "" ]; then
+ return 1;
+ else
+ return 0;
+ fi
+}
+
+function stop_agent() {
+ time=$(date "+%Y-%m-%d %H:%M:%S")
+ if ! running; then
+ echo "$time oom agent is not running." >> $CONSOLE_OUTPUT_FILE
+ exit 1
+ fi
+ count=0
+ while running;
+ do
+ (( count++ ))
+ time=$(date "+%Y-%m-%d %H:%M:%S")
+ pid=$(ps -aux | grep 'java' | grep 'inlong-agent' | grep
"$check_agent_uniq" | awk '{print $2}')
+ echo "$time oom stopping agent($pid) $count times" >> $CONSOLE_OUTPUT_FILE
+ if [ "${count}" -gt 10 ]; then
+ echo "$time oom kill -9 $pid" >> $CONSOLE_OUTPUT_FILE
+ kill -9 "${pid}"
+ else
+ echo "$time oom kill $pid" >> $CONSOLE_OUTPUT_FILE
+ kill "${pid}"
+ fi
+ sleep 6;
+ done
+ time=$(date "+%Y-%m-%d %H:%M:%S")
+ echo "$time oom stop agent($pid) successfully." >> $CONSOLE_OUTPUT_FILE
+}
+
+stop_agent;
\ No newline at end of file