Hello! On Mon, Aug 28, 2023 at 04:30:44PM +0400, Roman Arutyunyan wrote:
> # HG changeset patch > # User Roman Arutyunyan <a...@nginx.com> > # Date 1693218974 -14400 > # Mon Aug 28 14:36:14 2023 +0400 > # Node ID 42c3166b675a6a7624bdf3d78c0d4685ce8172e3 > # Parent fdad00808cb485ab83e919be59e9211ef06ac56a > Added "max_shutdown_workers" directive. > > The directive sets maximum number of workers in shutdown state while reloading > nginx configuration. Upon reaching this value, workers restart is postponed > until shutdown workers start exiting. This allows for lower peak resource > consumption at the cost of slower reconfiguration. I can't say I like the name, as the preferred term is "worker processes", not "workers". max_shutdown_processes? Also note that this doesn't really limit the total resource usage during configuration reloads, since cache manager and cache loader processes are exempt from the limit, but new processes are started on each configuration reload. It would be nice to actually provide a limit which makes multiple/frequent configuration reloads completely safe from the resource usage point of view, that is, to limit all processes spawned during configuration reloads, and not just worker processes. > > diff --git a/src/core/nginx.c b/src/core/nginx.c > --- a/src/core/nginx.c > +++ b/src/core/nginx.c > @@ -83,6 +83,13 @@ static ngx_command_t ngx_core_commands[ > 0, > NULL }, > > + { ngx_string("max_shutdown_workers"), > + NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_TAKE1, > + ngx_conf_set_num_slot, > + 0, > + offsetof(ngx_core_conf_t, max_shutdown_workers), > + NULL }, > + > { ngx_string("debug_points"), > NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_TAKE1, > ngx_conf_set_enum_slot, > @@ -1117,6 +1124,7 @@ ngx_core_module_create_conf(ngx_cycle_t > ccf->shutdown_timeout = NGX_CONF_UNSET_MSEC; > > ccf->worker_processes = NGX_CONF_UNSET; > + ccf->max_shutdown_workers = NGX_CONF_UNSET; > ccf->debug_points = NGX_CONF_UNSET; > > ccf->rlimit_nofile = NGX_CONF_UNSET; > @@ -1146,6 +1154,7 @@ ngx_core_module_init_conf(ngx_cycle_t *c > ngx_conf_init_msec_value(ccf->shutdown_timeout, 0); > > ngx_conf_init_value(ccf->worker_processes, 1); > + ngx_conf_init_value(ccf->max_shutdown_workers, 0); > ngx_conf_init_value(ccf->debug_points, 0); > > #if (NGX_HAVE_CPU_AFFINITY) > diff --git a/src/core/ngx_cycle.h b/src/core/ngx_cycle.h > --- a/src/core/ngx_cycle.h > +++ b/src/core/ngx_cycle.h > @@ -94,6 +94,7 @@ typedef struct { > ngx_msec_t shutdown_timeout; > > ngx_int_t worker_processes; > + ngx_int_t max_shutdown_workers; > ngx_int_t debug_points; > > ngx_int_t rlimit_nofile; > diff --git a/src/os/unix/ngx_process.c b/src/os/unix/ngx_process.c > --- a/src/os/unix/ngx_process.c > +++ b/src/os/unix/ngx_process.c > @@ -216,6 +216,7 @@ ngx_spawn_process(ngx_cycle_t *cycle, ng > ngx_processes[s].data = data; > ngx_processes[s].name = name; > ngx_processes[s].exiting = 0; > + ngx_processes[s].old = 0; > > switch (respawn) { > > diff --git a/src/os/unix/ngx_process.h b/src/os/unix/ngx_process.h > --- a/src/os/unix/ngx_process.h > +++ b/src/os/unix/ngx_process.h > @@ -33,6 +33,7 @@ typedef struct { > unsigned detached:1; > unsigned exiting:1; > unsigned exited:1; > + unsigned old:1; > } ngx_process_t; > > > diff --git a/src/os/unix/ngx_process_cycle.c b/src/os/unix/ngx_process_cycle.c > --- a/src/os/unix/ngx_process_cycle.c > +++ b/src/os/unix/ngx_process_cycle.c > @@ -15,6 +15,7 @@ static void ngx_start_worker_processes(n > ngx_int_t type); > static void ngx_start_worker_process(ngx_cycle_t *cycle, ngx_int_t i, > ngx_int_t type); > +static void ngx_restart_worker_processes(ngx_cycle_t *cycle); > static void ngx_start_cache_manager_processes(ngx_cycle_t *cycle, > ngx_uint_t respawn); > static void ngx_pass_open_channel(ngx_cycle_t *cycle); > @@ -22,6 +23,7 @@ static void ngx_signal_worker_processes( > static void ngx_signal_worker_process(ngx_cycle_t *cycle, ngx_int_t i, > int signo); > static ngx_uint_t ngx_reap_children(ngx_cycle_t *cycle); > +static ngx_uint_t ngx_restart_worker_process(ngx_cycle_t *cycle); > static void ngx_master_process_exit(ngx_cycle_t *cycle); > static void ngx_worker_process_cycle(ngx_cycle_t *cycle, void *data); > static void ngx_worker_process_init(ngx_cycle_t *cycle, ngx_int_t worker); > @@ -235,8 +237,8 @@ ngx_master_process_cycle(ngx_cycle_t *cy > ngx_cycle = cycle; > ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, > ngx_core_module); > - ngx_start_worker_processes(cycle, ccf->worker_processes, > - NGX_PROCESS_JUST_RESPAWN); > + ngx_restart_worker_processes(cycle); > + > ngx_start_cache_manager_processes(cycle, 1); > > /* allow new processes to start */ > @@ -360,6 +362,70 @@ ngx_start_worker_process(ngx_cycle_t *cy > > > static void > +ngx_restart_worker_processes(ngx_cycle_t *cycle) > +{ > + ngx_int_t i, n, m, worker; > + ngx_core_conf_t *ccf; > + > + ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module); > + > + n = 0; > + m = ccf->max_shutdown_workers; > + > + if (m == 0) { > + goto start; > + } > + > + for (i = 0; i < ngx_last_process; i++) { > + > + if (ngx_processes[i].pid == -1 > + || ngx_processes[i].proc != ngx_worker_process_cycle) > + { Checking "proc" here looks like a layering violation. For example, ngx_signal_worker_process() does try to depend on the process entry point, and instead relies on the "detached" flag to decide which processes need to be signalled. > + continue; > + } > + > + ngx_processes[i].old = 0; > + worker = (intptr_t) ngx_processes[i].data; > + > + if (!ngx_processes[i].exiting && worker < ccf->worker_processes) { > + n++; It is not clear what we are counting here, and why. My best guess it that it's an attempt to count worker processes we are going to restart (and not just stop). It is not clear why we are doing this though: the limit as defined does not differentiate between shutting down processes being stopped and processes being restarted. Further, I don't think it's done correctly: since "worker" is an index number of the worker process, there can be at least gaps (if worker process failed to start and cannot be respawn). > + > + } else if (m > 0) { > + m--; > + } > + } > + > + n = (n > m) ? n - m : 0; n = ngx_max(n - m, 0); ? Also, some better names for the variables and/or some comments explaining what is being done might worth the effort. (As far as I can tell, "n" here is the number of processes we want to restart but not allowed to.) > + > + if (n == 0) { > + goto start; > + } > + > + for (i = 0; i < ngx_last_process; i++) { > + > + if (ngx_processes[i].pid == -1 > + || ngx_processes[i].proc != ngx_worker_process_cycle) > + { > + continue; > + } > + > + ngx_processes[i].old = 1; > + worker = (intptr_t) ngx_processes[i].data; > + > + if (!ngx_processes[i].exiting && worker < n) { Here "n" is the number of processes we are not allowed to restart right now, while "worker" is the index number of the worker process in question. For a given "n", number of worker processes matching this condition might be at least less than "n": if some worker processes failed to start. (Just in case, the rule of thumb is: if you are using worker process index number for anything, most likely you are doing it wrong.) > + ngx_processes[i].just_spawn = 1; The "just_spawn" flag implies the process was just started, and so it is ignored by the following ngx_signal_worker_process() call. Abusing it to ignore processes during shutdown might work, but I can't say I like this approach. Using a separate flag might be better. > + } > + } > + > +start: > + > + for (i = n; i < ccf->worker_processes; i++) { > + ngx_start_worker_process(cycle, i, NGX_PROCESS_JUST_RESPAWN); > + } The order of processes being started looks somewhat counter-intuitive here. > +} > + > + > +static void > ngx_start_cache_manager_processes(ngx_cycle_t *cycle, ngx_uint_t respawn) > { > ngx_uint_t i, manager, loader; > @@ -486,15 +552,16 @@ ngx_signal_worker_process(ngx_cycle_t *c > ch.fd = -1; > > > - ngx_log_debug7(NGX_LOG_DEBUG_EVENT, cycle->log, 0, > - "child: %i %P e:%d t:%d d:%d r:%d j:%d", > + ngx_log_debug8(NGX_LOG_DEBUG_EVENT, cycle->log, 0, > + "child: %i %P e:%d t:%d d:%d r:%d j:%d o:%d", > i, > ngx_processes[i].pid, > ngx_processes[i].exiting, > ngx_processes[i].exited, > ngx_processes[i].detached, > ngx_processes[i].respawn, > - ngx_processes[i].just_spawn); > + ngx_processes[i].just_spawn, > + ngx_processes[i].old); > > if (ngx_processes[i].detached || ngx_processes[i].pid == -1) { > return; > @@ -563,15 +630,16 @@ ngx_reap_children(ngx_cycle_t *cycle) > live = 0; > for (i = 0; i < ngx_last_process; i++) { > > - ngx_log_debug7(NGX_LOG_DEBUG_EVENT, cycle->log, 0, > - "child: %i %P e:%d t:%d d:%d r:%d j:%d", > + ngx_log_debug8(NGX_LOG_DEBUG_EVENT, cycle->log, 0, > + "child: %i %P e:%d t:%d d:%d r:%d j:%d o:%d", > i, > ngx_processes[i].pid, > ngx_processes[i].exiting, > ngx_processes[i].exited, > ngx_processes[i].detached, > ngx_processes[i].respawn, > - ngx_processes[i].just_spawn); > + ngx_processes[i].just_spawn, > + ngx_processes[i].old); > > if (ngx_processes[i].pid == -1) { > continue; > @@ -660,6 +728,16 @@ ngx_reap_children(ngx_cycle_t *cycle) > ngx_processes[i].pid = -1; > } > Note: when a process dies, it is restarted if the "respawn" flag is set, regardless of the "old" flag. The "old" flag is preserved even if it is no longer relevant after the respawn: the process is restarted with the new configuration, and there is no need to restart it again. > + if (ngx_processes[i].old > + && ngx_processes[i].exiting > + && !ngx_terminate > + && !ngx_quit) > + { > + if (ngx_restart_worker_process(cycle)) { > + live = 1; > + } > + } > + > } else if (ngx_processes[i].exiting || !ngx_processes[i].detached) { > live = 1; > } > @@ -669,6 +747,53 @@ ngx_reap_children(ngx_cycle_t *cycle) > } > > > +static ngx_uint_t > +ngx_restart_worker_process(ngx_cycle_t *cycle) > +{ > + ngx_int_t i, j, m; > + ngx_core_conf_t *ccf; > + > + ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module); > + > + j = -1; > + m = ccf->max_shutdown_workers; > + > + if (m == 0) { > + return 0; > + } > + > + for (i = 0; i < ngx_last_process; i++) { > + > + if (ngx_processes[i].pid == -1 || !ngx_processes[i].old) { > + continue; > + } > + > + if (!ngx_processes[i].exiting && !ngx_processes[i].exited) { The "exited" check here is not needed to catch the processes we were called for, or other processes being shut down - they already have "exiting" flag set. On the other hand, this can catch unexpectedly died processes (the ones which are still waiting for restart). As per the code, such processes will be counted against max_shutdown_workers as if they are shutting down - this looks wrong and can cause reload hang. > + j = i; > + continue; > + } > + > + if (--m == 0) { > + return 0; > + } > + } > + > + if (j == -1) { > + return 0; > + } > + > + ngx_start_worker_process(cycle, (intptr_t) ngx_processes[j].data, > + NGX_PROCESS_RESPAWN); > + > + ngx_msleep(100); > + > + ngx_signal_worker_process(cycle, j, > + ngx_signal_value(NGX_SHUTDOWN_SIGNAL)); Also note that the "exited" check above implies that there can be multiple exited processes at the same time, but we only restart just one process here. It might be a good idea to restart multiple processes at the same time: sleeping for 100ms for each process means that if, for example, 64 processes will exit at the same time after shutdown timeout expiration, master process will be unresponsive for 6 seconds. Also, this looks very close to ngx_restart_worker_processes(). It might be a good idea to rework things to ensure unified approach to restarting processes instead of providing multiple duplicate ways. > + > + return 1; > +} > + > + > static void > ngx_master_process_exit(ngx_cycle_t *cycle) > { Overall, I tend to think that the suggested code is very fragile, and I would rather consider re-thinking the approach. -- Maxim Dounin http://mdounin.ru/ _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org https://mailman.nginx.org/mailman/listinfo/nginx-devel