This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 5932b9c35dd50df3412ad3a63aef27b6b41506e5 Author: Alexey Serbin <[email protected]> AuthorDate: Tue Nov 30 16:55:09 2021 -0800 [scripts] add hard memory limit for mini-cluster processes Before this patch, the start_kudu.sh script ran all the Kudu server processes at the same node where every kudu-tserver and kudu-master assumed it was the only Kudu process running there, so it would automatically self-impose the hard limit to be 80% of all the memory available. However, since there may be many of those, the mini-cluster could end up in an OOM condition. This patch addresses the issue, dividing all the available memory at a node between the kudu-tserver processes the script starts, so they all use up to 80% of all available memory (the same threshold is used by the auto-detection logic on the amount of memory to consume for --memory_limit_hard_bytes in Kudu servers). The kudu-master processes are started without imposing the hard memory limit assuming they usually don't consume a lot of memory: that's to allocate more memory to kudu-tserver processes. This script is supposed to cover a limited set of use cases automatically and it's always possible to add flags setting or overriding the limits set by the script for kudu-master and kudu-tserver processes via the -M/--master-flags and -T/--tserver-flags command line options correspondingly. I verified that the OOM condition that I saw when running a mini-cluster in a GCP VM no longer happens with this patch. Change-Id: I4de6c96fb0227554edbd5e69b29840f84e408326 Reviewed-on: http://gerrit.cloudera.org:8080/18059 Tested-by: Kudu Jenkins Reviewed-by: Andrew Wong <[email protected]> Reviewed-by: Attila Bukor <[email protected]> --- src/kudu/scripts/start_kudu.sh | 58 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/src/kudu/scripts/start_kudu.sh b/src/kudu/scripts/start_kudu.sh index db33d4e..cce9316 100755 --- a/src/kudu/scripts/start_kudu.sh +++ b/src/kudu/scripts/start_kudu.sh @@ -22,6 +22,7 @@ ######################################################################## set -e +set -o pipefail ulimit -n 2048 function usage() { @@ -166,10 +167,38 @@ function set_port_vars_and_print() { echo " HTTP port $HTTP_PORT" } -pids=() +# Return a flag to set the hard memory limit for the Kudu server processes +# running at the same node. Each of the processes is able to set the hard +# memory limit based on the total amount of memory available, but such a +# provision assumes there is a single Kudu server process running at a node. +# Since there is going to be NUM_TSERVERS kudu-tserver and NUM_MASTERS +# kudu-master processes running, it's necessary to divide the available memory +# among them. +function get_memory_limit_hard_bytes_flag() { + local num_processes=$1 + local mem_size_bytes=0 + if [[ "$OSTYPE" =~ ^linux ]]; then + local mem_size_kb=$(grep -E '^MemTotal' /proc/meminfo | awk '{print $2}') + mem_size_bytes=$((mem_size_kb * 1024)) + elif [[ "$OSTYPE" =~ ^darwin ]]; then + mem_size_bytes=$(sysctl hw.memsize | awk '{print $2}') + fi + + # Do not set the limit for a non-recognized OS. + if [ $mem_size_bytes -eq 0 ]; then + echo "" + return + fi -# Start master server function + # Allocate 80% of all available memory to be used by all the Kudu processes. + local mem_limit_bytes=$((mem_size_bytes * 4 / 5)) + mem_limit_bytes=$((mem_limit_bytes / num_processes)) + echo "--memory_limit_hard_bytes=$mem_limit_bytes" +} + +pids=() +# Start kudu-master process. function start_master() { create_dirs_and_set_vars $1 set_port_vars_and_print $1 $2 $3 @@ -183,14 +212,20 @@ function start_master() { ARGS="$ARGS --unlock_unsafe_flags" ARGS="$ARGS --webserver_port=$HTTP_PORT" ARGS="$ARGS --webserver_interface=$IP" - if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi + if [ -d "$WEBSERVER_DOC_ROOT" ]; then + ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT" + fi + # NOTE: a kudu-master process doesn't usually consume a lot of memory, + # so the memory hard limit isn't set for them; if kudu-master memory + # consumption becomes an issue, provide the necessary flags for + # kudu-master processing using the --master-flags/-M command line + # option ARGS="$ARGS $EXTRA_MASTER_FLAGS" $ARGS & pids+=($!) } -# Start tablet server function - +# Start kudu-tserver process. function start_tserver() { create_dirs_and_set_vars $1 set_port_vars_and_print $1 $2 $3 @@ -203,8 +238,16 @@ function start_tserver() { ARGS="$ARGS --unlock_unsafe_flags" ARGS="$ARGS --webserver_port=$HTTP_PORT" ARGS="$ARGS --webserver_interface=$IP" - if [ -d "$WEBSERVER_DOC_ROOT" ]; then ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT"; fi ARGS="$ARGS --tserver_master_addrs=$4" + if [ -d "$WEBSERVER_DOC_ROOT" ]; then + ARGS="$ARGS --webserver_doc_root=$WEBSERVER_DOC_ROOT" + fi + + # If applicable, set the memory hard limit. + local mem_limit_flag=$(get_memory_limit_hard_bytes_flag $NUM_TSERVERS) + if [ -n $mem_limit_flag ]; then + ARGS="$ARGS $mem_limit_flag" + fi ARGS="$ARGS $EXTRA_TSERVER_FLAGS" $ARGS & pids+=($!) @@ -235,6 +278,5 @@ for i in $(seq 0 $((NUM_TSERVERS - 1))); do start_tserver tserver-$i $TSERVER_RPC_PORT $TSERVER_HTTP_PORT $MASTER_ADDRESSES done -# Show status of started processes - +# Show the status of the started processes. ps -wwo args -p ${pids[@]}
