Hi,
On 2023-01-05 13:44:20 -0500, Reid Thompson wrote:
> From 0a6b152e0559a250dddd33bd7d43eb0959432e0d Mon Sep 17 00:00:00 2001
> From: Reid Thompson <[email protected]>
> Date: Thu, 11 Aug 2022 12:01:25 -0400
> Subject: [PATCH 1/2] Add tracking of backend memory allocated to
> pg_stat_activity
>
> This new field displays the current bytes of memory allocated to the
> backend process. It is updated as memory for the process is
> malloc'd/free'd. Memory allocated to items on the freelist is included in
> the displayed value.
It doesn't actually malloc/free. It tracks palloc/pfree.
> Dynamic shared memory allocations are included only in the value displayed
> for the backend that created them, they are not included in the value for
> backends that are attached to them to avoid double counting.
As mentioned before, I don't think accounting DSM this way makes sense.
> --- a/src/backend/postmaster/autovacuum.c
> +++ b/src/backend/postmaster/autovacuum.c
> @@ -407,6 +407,9 @@ StartAutoVacLauncher(void)
>
> #ifndef EXEC_BACKEND
> case 0:
> + /* Zero allocated bytes to avoid double counting parent
> allocation */
> + pgstat_zero_my_allocated_bytes();
> +
> /* in postmaster child ... */
> InitPostmasterChild();
> @@ -1485,6 +1488,9 @@ StartAutoVacWorker(void)
>
> #ifndef EXEC_BACKEND
> case 0:
> + /* Zero allocated bytes to avoid double counting parent
> allocation */
> + pgstat_zero_my_allocated_bytes();
> +
> /* in postmaster child ... */
> InitPostmasterChild();
>
> diff --git a/src/backend/postmaster/postmaster.c
> b/src/backend/postmaster/postmaster.c
> index eac3450774..24278e5c18 100644
> --- a/src/backend/postmaster/postmaster.c
> +++ b/src/backend/postmaster/postmaster.c
> @@ -4102,6 +4102,9 @@ BackendStartup(Port *port)
> {
> free(bn);
>
> + /* Zero allocated bytes to avoid double counting parent
> allocation */
> + pgstat_zero_my_allocated_bytes();
> +
> /* Detangle from postmaster */
> InitPostmasterChild();
It doesn't at all seem right to call pgstat_zero_my_allocated_bytes() here,
before even InitPostmasterChild() is called. Nor does it seem right to add the
call to so many places.
Note that this is before we even delete postmaster's memory, see e.g.:
/*
* If the PostmasterContext is still around, recycle the space; we don't
* need it anymore after InitPostgres completes. Note this does not
trash
* *MyProcPort, because ConnCreate() allocated that space with malloc()
* ... else we'd need to copy the Port data first. Also, subsidiary
data
* such as the username isn't lost either; see ProcessStartupPacket().
*/
if (PostmasterContext)
{
MemoryContextDelete(PostmasterContext);
PostmasterContext = NULL;
}
calling pgstat_zero_my_allocated_bytes() before we do this will lead to
undercounting memory usage, afaict.
> +/* Enum helper for reporting memory allocated bytes */
> +enum allocation_direction
> +{
> + PG_ALLOC_DECREASE = -1,
> + PG_ALLOC_IGNORE,
> + PG_ALLOC_INCREASE,
> +};
What's the point of this?
> +/* ----------
> + * pgstat_report_allocated_bytes() -
> + *
> + * Called to report change in memory allocated for this backend.
> + *
> + * my_allocated_bytes initially points to local memory, making it safe to
> call
> + * this before pgstats has been initialized. allocation_direction is a
> + * positive/negative multiplier enum defined above.
> + * ----------
> + */
> +static inline void
> +pgstat_report_allocated_bytes(int64 allocated_bytes, int
> allocation_direction)
I don't think this should take allocation_direction as a parameter, I'd make
it two different functions.
> +{
> + uint64 temp;
> +
> + /*
> + * Avoid *my_allocated_bytes unsigned integer overflow on
> + * PG_ALLOC_DECREASE
> + */
> + if (allocation_direction == PG_ALLOC_DECREASE &&
> + pg_sub_u64_overflow(*my_allocated_bytes, allocated_bytes,
> &temp))
> + {
> + *my_allocated_bytes = 0;
> + ereport(LOG,
> + errmsg("Backend %d deallocated %lld bytes,
> exceeding the %llu bytes it is currently reporting allocated. Setting
> reported to 0.",
> + MyProcPid, (long long)
> allocated_bytes,
> + (unsigned long long)
> *my_allocated_bytes));
We certainly shouldn't have an ereport in here. This stuff really needs to be
cheap.
> + }
> + else
> + *my_allocated_bytes += (allocated_bytes) * allocation_direction;
Superfluous parens?
> +/* ----------
> + * pgstat_get_all_memory_allocated() -
> + *
> + * Return a uint64 representing the current shared memory allocated to all
> + * backends. This looks directly at the BackendStatusArray, and so will
> + * provide current information regardless of the age of our transaction's
> + * snapshot of the status array.
> + * In the future we will likely utilize additional values - perhaps limit
> + * backend allocation by user/role, etc.
> + * ----------
> + */
> +uint64
> +pgstat_get_all_backend_memory_allocated(void)
> +{
> + PgBackendStatus *beentry;
> + int i;
> + uint64 all_memory_allocated = 0;
> +
> + beentry = BackendStatusArray;
> +
> + /*
> + * We probably shouldn't get here before shared memory has been set up,
> + * but be safe.
> + */
> + if (beentry == NULL || BackendActivityBuffer == NULL)
> + return 0;
> +
> + /*
> + * We include AUX procs in all backend memory calculation
> + */
> + for (i = 1; i <= NumBackendStatSlots; i++)
> + {
> + /*
> + * We use a volatile pointer here to ensure the compiler
> doesn't try
> + * to get cute.
> + */
> + volatile PgBackendStatus *vbeentry = beentry;
> + bool found;
> + uint64 allocated_bytes = 0;
> +
> + for (;;)
> + {
> + int before_changecount;
> + int after_changecount;
> +
> + pgstat_begin_read_activity(vbeentry,
> before_changecount);
> +
> + /*
> + * Ignore invalid entries, which may contain invalid
> data.
> + * See pgstat_beshutdown_hook()
> + */
> + if (vbeentry->st_procpid > 0)
> + allocated_bytes = vbeentry->allocated_bytes;
> +
> + pgstat_end_read_activity(vbeentry, after_changecount);
> +
> + if ((found =
> pgstat_read_activity_complete(before_changecount,
> +
> after_changecount)))
> + break;
> +
> + /* Make sure we can break out of loop if stuck... */
> + CHECK_FOR_INTERRUPTS();
> + }
> +
> + if (found)
> + all_memory_allocated += allocated_bytes;
> +
> + beentry++;
> + }
> +
> + return all_memory_allocated;
> +}
> +
> +/*
> + * Determine if allocation request will exceed max backend memory allowed.
> + * Do not apply to auxiliary processes.
> + */
> +bool
> +exceeds_max_total_bkend_mem(uint64 allocation_request)
> +{
> + bool result = false;
> +
> + /* Exclude auxiliary processes from the check */
> + if (MyAuxProcType != NotAnAuxProcess)
> + return result;
> +
> + /* Convert max_total_bkend_mem to bytes for comparison */
> + if (max_total_bkend_mem &&
> + pgstat_get_all_backend_memory_allocated() +
> + allocation_request > (uint64) max_total_bkend_mem * 1024 * 1024)
> + {
> + /*
> + * Explicitly identify the OOM being a result of this
> configuration
> + * parameter vs a system failure to allocate OOM.
> + */
> + ereport(WARNING,
> + errmsg("allocation would exceed
> max_total_memory limit (%llu > %llu)",
> + (unsigned long long)
> pgstat_get_all_backend_memory_allocated() +
> + allocation_request, (unsigned long
> long) max_total_bkend_mem * 1024 * 1024));
> +
> + result = true;
> + }
I think it's completely unfeasible to execute something as expensive as
pgstat_get_all_backend_memory_allocated() on every allocation. Like,
seriously, no.
And we absolutely definitely shouldn't just add CHECK_FOR_INTERRUPT() calls
into the middle of allocator code.
Greetings,
Andres Freund