Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Willy Tarreau
On Thu, Nov 25, 2021 at 04:29:55PM +, David CARLIER wrote:
> Ok I applied your suggestions and move back the malloc_trim/mallinfo
> part as it was before.

Thanks, now merged!
Willy



Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread David CARLIER
Ok I applied your suggestions and move back the malloc_trim/mallinfo
part as it was before.

On Thu, 25 Nov 2021 at 14:37, Willy Tarreau  wrote:
>
> On Thu, Nov 25, 2021 at 01:19:39PM +, David CARLIER wrote:
> > Here a patchset instead :)
>
> Thanks!
>
> I've reviewed it, I'm having some comments below:
>
> > From e8daa477b53a43ab39113cf0e9c43d9bbda1e9a9 Mon Sep 17 00:00:00 2001
> > From: David Carlier 
> > Date: Thu, 25 Nov 2021 10:26:50 +
> > Subject: [PATCH 1/3] MINOR: pool: refactoring to make it available for other
> >  systems/allocators.
> >
> > Actually it's focusing on linux/glibc, we re trying to expand it for other 
> > possible systems and allocators combinations.
> > ---
> >  include/haproxy/compat.h |  9 +++--
> >  src/pool.c   | 38 +++---
> >  2 files changed, 14 insertions(+), 33 deletions(-)
> >
> > diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h
> > index 25b15a1f0..b801dac08 100644
> > --- a/include/haproxy/compat.h
> > +++ b/include/haproxy/compat.h
> > @@ -263,9 +263,14 @@ typedef struct { } empty_t;
> >  #endif
> >
> >  /* glibc 2.33 provides mallinfo2() that overcomes mallinfo()'s type 
> > limitations */
> > -#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && 
> > __GLIBC_MINOR__ >= 33))
> > +#if defined(__GNU_LIBRARY__)
> >  #include 
> > -#define HA_HAVE_MALLINFO2
> > +#if (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33)
> > +#define HA_MALLINFO_API mallinfo2
> > +#else
> > +#define HA_MALLINFO_API mallinfo
> > +#endif
> > +#define HA_HAVE_MALLINFO
> >  #endif
> >
> >  /* FreeBSD also has malloc_usable_size() but it requires malloc_np.h */
> > diff --git a/src/pool.c b/src/pool.c
> > index af46b4469..16ef76cf0 100644
> > --- a/src/pool.c
> > +++ b/src/pool.c
> > @@ -42,14 +42,15 @@ int mem_poison_byte = -1;
> >  static int mem_fail_rate = 0;
> >  #endif
> >
> > -#if defined(HA_HAVE_MALLOC_TRIM)
> >  static int using_libc_allocator = 0;
> >
> >  /* ask the allocator to trim memory pools */
> >  static void trim_all_pools(void)
> >  {
> > +#ifdef HAVE_MALLOC_TRIM
> >   if (using_libc_allocator)
> >   malloc_trim(0);
> > +#endif
> >  }
> >
> >  /* check if we're using the same allocator as the one that provides
> > @@ -60,48 +61,23 @@ static void trim_all_pools(void)
> >   */
> >  static void detect_allocator(void)
> >  {
> > -#ifdef HA_HAVE_MALLINFO2
> > - struct mallinfo2 mi1, mi2;
> > -#else
> > - struct mallinfo mi1, mi2;
> > -#endif
> > +#if defined(HA_HAVE_MALLINFO)
> > + struct HA_MALLINFO_API mi1, mi2;
> >   void *ptr;
> >
> > -#ifdef HA_HAVE_MALLINFO2
> > - mi1 = mallinfo2();
> > -#else
> > - mi1 = mallinfo();
> > -#endif
> > + mi1 = HA_MALLINFO_API();
> >   ptr = DISGUISE(malloc(1));
> > -#ifdef HA_HAVE_MALLINFO2
> > - mi2 = mallinfo2();
> > -#else
> > - mi2 = mallinfo();
> > -#endif
> > + mi2 = HA_MALLINFO_API();
> >   free(DISGUISE(ptr));
> >
> >   using_libc_allocator = !!memcmp(, , sizeof(mi1));
> > +#endif
> >  }
>
> I find it unwelcome to use a single macro for the struct name and
> the function. It just turns out that they are the same but they're
> for different object types, and if one day we have mallinfo3() that
> works on struct mallinfo2, we're doomed.

I hope it will never come to such things, sounds terrible to me :)
even tough I have hard time to imagine a situation like this.

And the gain in legibility
> is not that great overall, I'd rather keep them with the ugly ifdefs
> here. If you prefer, just write two blocks in the function, one for
> mallinfo2 and one for the other. That's what I did the first time
> but wasn't much convinced about the benefit, but at this point it's
> a matter of taste.
>
> If you really want to have it this way, otherwise use two different
> macros, e.g.:
>   #define HA_MALLINFO_TYPE struct mallinfo
>   #define HA_MALLINFO_FUNC(x) mallinfo(x)
>
> But again this adds abstraction that doesn't help much, especially in
> tricky code which is written to diagnose certain things.
>
> > From c0f5cf6f935392e1ea69a84aae104de5ed06e68c Mon Sep 17 00:00:00 2001
> > From: David Carlier 
> > Date: Thu, 25 Nov 2021 13:00:54 +
> > Subject: [PATCH 2/3] MEDIUM: pool : detecting jemalloc usage at runtime.
> >
> > Here we're just looking up if the mallctl's jemalloc API symbol is present.
> > ---
> >  src/pool.c | 26 +++---
> >  1 file changed, 19 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/pool.c b/src/pool.c
> > index 16ef76cf0..65c037911 100644
> > --- a/src/pool.c
> > +++ b/src/pool.c
> > @@ -43,6 +43,7 @@ static int mem_fail_rate = 0;
> >  #endif
> >
> >  static int using_libc_allocator = 0;
> > +static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = 
> > NULL;
> >
> >  /* ask the allocator to trim memory pools */
> >  static void trim_all_pools(void)
> > @@ -61,17 +62,28 @@ static void trim_all_pools(void)
> >   */
> >  

Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Willy Tarreau
On Thu, Nov 25, 2021 at 01:19:39PM +, David CARLIER wrote:
> Here a patchset instead :)

Thanks!

I've reviewed it, I'm having some comments below:

> From e8daa477b53a43ab39113cf0e9c43d9bbda1e9a9 Mon Sep 17 00:00:00 2001
> From: David Carlier 
> Date: Thu, 25 Nov 2021 10:26:50 +
> Subject: [PATCH 1/3] MINOR: pool: refactoring to make it available for other
>  systems/allocators.
> 
> Actually it's focusing on linux/glibc, we re trying to expand it for other 
> possible systems and allocators combinations.
> ---
>  include/haproxy/compat.h |  9 +++--
>  src/pool.c   | 38 +++---
>  2 files changed, 14 insertions(+), 33 deletions(-)
> 
> diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h
> index 25b15a1f0..b801dac08 100644
> --- a/include/haproxy/compat.h
> +++ b/include/haproxy/compat.h
> @@ -263,9 +263,14 @@ typedef struct { } empty_t;
>  #endif
>  
>  /* glibc 2.33 provides mallinfo2() that overcomes mallinfo()'s type 
> limitations */
> -#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && 
> __GLIBC_MINOR__ >= 33))
> +#if defined(__GNU_LIBRARY__)
>  #include 
> -#define HA_HAVE_MALLINFO2
> +#if (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33)
> +#define HA_MALLINFO_API mallinfo2
> +#else
> +#define HA_MALLINFO_API mallinfo
> +#endif
> +#define HA_HAVE_MALLINFO
>  #endif
>  
>  /* FreeBSD also has malloc_usable_size() but it requires malloc_np.h */
> diff --git a/src/pool.c b/src/pool.c
> index af46b4469..16ef76cf0 100644
> --- a/src/pool.c
> +++ b/src/pool.c
> @@ -42,14 +42,15 @@ int mem_poison_byte = -1;
>  static int mem_fail_rate = 0;
>  #endif
>  
> -#if defined(HA_HAVE_MALLOC_TRIM)
>  static int using_libc_allocator = 0;
>  
>  /* ask the allocator to trim memory pools */
>  static void trim_all_pools(void)
>  {
> +#ifdef HAVE_MALLOC_TRIM
>   if (using_libc_allocator)
>   malloc_trim(0);
> +#endif
>  }
>  
>  /* check if we're using the same allocator as the one that provides
> @@ -60,48 +61,23 @@ static void trim_all_pools(void)
>   */
>  static void detect_allocator(void)
>  {
> -#ifdef HA_HAVE_MALLINFO2
> - struct mallinfo2 mi1, mi2;
> -#else
> - struct mallinfo mi1, mi2;
> -#endif
> +#if defined(HA_HAVE_MALLINFO)
> + struct HA_MALLINFO_API mi1, mi2;
>   void *ptr;
>  
> -#ifdef HA_HAVE_MALLINFO2
> - mi1 = mallinfo2();
> -#else
> - mi1 = mallinfo();
> -#endif
> + mi1 = HA_MALLINFO_API();
>   ptr = DISGUISE(malloc(1));
> -#ifdef HA_HAVE_MALLINFO2
> - mi2 = mallinfo2();
> -#else
> - mi2 = mallinfo();
> -#endif
> + mi2 = HA_MALLINFO_API();
>   free(DISGUISE(ptr));
>  
>   using_libc_allocator = !!memcmp(, , sizeof(mi1));
> +#endif
>  }

I find it unwelcome to use a single macro for the struct name and
the function. It just turns out that they are the same but they're
for different object types, and if one day we have mallinfo3() that
works on struct mallinfo2, we're doomed. And the gain in legibility
is not that great overall, I'd rather keep them with the ugly ifdefs
here. If you prefer, just write two blocks in the function, one for
mallinfo2 and one for the other. That's what I did the first time
but wasn't much convinced about the benefit, but at this point it's
a matter of taste.

If you really want to have it this way, otherwise use two different
macros, e.g.:
  #define HA_MALLINFO_TYPE struct mallinfo
  #define HA_MALLINFO_FUNC(x) mallinfo(x)

But again this adds abstraction that doesn't help much, especially in
tricky code which is written to diagnose certain things.

> From c0f5cf6f935392e1ea69a84aae104de5ed06e68c Mon Sep 17 00:00:00 2001
> From: David Carlier 
> Date: Thu, 25 Nov 2021 13:00:54 +
> Subject: [PATCH 2/3] MEDIUM: pool : detecting jemalloc usage at runtime.
> 
> Here we're just looking up if the mallctl's jemalloc API symbol is present.
> ---
>  src/pool.c | 26 +++---
>  1 file changed, 19 insertions(+), 7 deletions(-)
> 
> diff --git a/src/pool.c b/src/pool.c
> index 16ef76cf0..65c037911 100644
> --- a/src/pool.c
> +++ b/src/pool.c
> @@ -43,6 +43,7 @@ static int mem_fail_rate = 0;
>  #endif
>  
>  static int using_libc_allocator = 0;
> +static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = 
> NULL;
>  
>  /* ask the allocator to trim memory pools */
>  static void trim_all_pools(void)
> @@ -61,17 +62,28 @@ static void trim_all_pools(void)
>   */
>  static void detect_allocator(void)
>  {
> + extern int mallctl(const char *, void *, size_t *, void *, size_t) 
> __attribute__((weak));
> +
> + my_mallctl = mallctl;
> + 
> + if (!my_mallctl)
> + my_mallctl = get_sym_curr_addr("mallctl");
> +
> + if (!my_mallctl) {
>  #if defined(HA_HAVE_MALLINFO)
> - struct HA_MALLINFO_API mi1, mi2;
> - void *ptr;
> + struct HA_MALLINFO_API mi1, mi2;
> + void *ptr;
>  
> - mi1 = HA_MALLINFO_API();
> - ptr = 

Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Илья Шипицин
чт, 25 нояб. 2021 г. в 18:22, David CARLIER :

> On Thu, 25 Nov 2021 at 12:25, Willy Tarreau  wrote:
> >
> > On Thu, Nov 25, 2021 at 04:38:27PM +0500,  ??? wrote:
> > > > Thus I think that instead of focusing on the OS we ought to continue
> > > > to focus on the allocator and improve runtime detection:
> > > >
> > > >   - glibc (currently detected using detect_allocator)
> > > > => use malloc_trim()
> > > >   - jemalloc at build time (mallctl != NULL)
> > > > => use mallctl() as you did
> > > >   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") !=
> NULL)
> > > > => use mallctl() as you did
> > > >   - others
> > > > => no trimming
> > > >
> > >
> > > I never imagined earlier that high level applications (such as reverse
> > > https/tcp proxy) cares about such low level things as allocator
> behaviour.
> > > no jokes, really.
> >
> > Yes it does count a lot. That's also why we spent a lot of time
> optimizing
> > the pools, to limit the number of calls to the system's allocator for
> > everything that uses a fixed size. I've seen some performance graphs in
> > our internal ticket tracker showing the memory consumption between and
> > after the switch to jemalloc, and the CPU usage as well, and sometimes
> > it was very important.
> >
> > Glibc improved quite a bit recently (2.28 or 2.33 I don't remember) by
> > implementing a per-thread cache in its ptmalloc. But in our case it's
> > still not as good as jemalloc, and neither perform as well as our
> > thread-local pools for fixed sizes.
> >
> > I'm seeing in a paper about snmalloc that it performs exceptionally well
> > for small allocations. I just don't know how this degrades depending on
> > the access patterns. For example some allocators are fast when you free()
> > in the exact reverse allocation order, but can start to fragment or have
> > more work to do finding holes if you don't free() in the exact same
> order.
> >
>
> If you re curious there is also mimalloc (with a pretty rich C api)
> from Microsoft too.
>
>
I had bad experience with tcmalloc on arm64. It turned out that it was not
properly tested under arm64.
actually, I think "do we really need massive alloc/free" instead of using
preallocated objects.


> > But that's something to keep an eye on in the future.
> >
> > Willy
>


Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread David CARLIER
On Thu, 25 Nov 2021 at 12:25, Willy Tarreau  wrote:
>
> On Thu, Nov 25, 2021 at 04:38:27PM +0500,  ??? wrote:
> > > Thus I think that instead of focusing on the OS we ought to continue
> > > to focus on the allocator and improve runtime detection:
> > >
> > >   - glibc (currently detected using detect_allocator)
> > > => use malloc_trim()
> > >   - jemalloc at build time (mallctl != NULL)
> > > => use mallctl() as you did
> > >   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
> > > => use mallctl() as you did
> > >   - others
> > > => no trimming
> > >
> >
> > I never imagined earlier that high level applications (such as reverse
> > https/tcp proxy) cares about such low level things as allocator behaviour.
> > no jokes, really.
>
> Yes it does count a lot. That's also why we spent a lot of time optimizing
> the pools, to limit the number of calls to the system's allocator for
> everything that uses a fixed size. I've seen some performance graphs in
> our internal ticket tracker showing the memory consumption between and
> after the switch to jemalloc, and the CPU usage as well, and sometimes
> it was very important.
>
> Glibc improved quite a bit recently (2.28 or 2.33 I don't remember) by
> implementing a per-thread cache in its ptmalloc. But in our case it's
> still not as good as jemalloc, and neither perform as well as our
> thread-local pools for fixed sizes.
>
> I'm seeing in a paper about snmalloc that it performs exceptionally well
> for small allocations. I just don't know how this degrades depending on
> the access patterns. For example some allocators are fast when you free()
> in the exact reverse allocation order, but can start to fragment or have
> more work to do finding holes if you don't free() in the exact same order.
>

If you re curious there is also mimalloc (with a pretty rich C api)
from Microsoft too.

> But that's something to keep an eye on in the future.
>
> Willy



Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread David CARLIER
Here a patchset instead :)

On Thu, 25 Nov 2021 at 12:25, Willy Tarreau  wrote:
>
> On Thu, Nov 25, 2021 at 04:38:27PM +0500,  ??? wrote:
> > > Thus I think that instead of focusing on the OS we ought to continue
> > > to focus on the allocator and improve runtime detection:
> > >
> > >   - glibc (currently detected using detect_allocator)
> > > => use malloc_trim()
> > >   - jemalloc at build time (mallctl != NULL)
> > > => use mallctl() as you did
> > >   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
> > > => use mallctl() as you did
> > >   - others
> > > => no trimming
> > >
> >
> > I never imagined earlier that high level applications (such as reverse
> > https/tcp proxy) cares about such low level things as allocator behaviour.
> > no jokes, really.
>
> Yes it does count a lot. That's also why we spent a lot of time optimizing
> the pools, to limit the number of calls to the system's allocator for
> everything that uses a fixed size. I've seen some performance graphs in
> our internal ticket tracker showing the memory consumption between and
> after the switch to jemalloc, and the CPU usage as well, and sometimes
> it was very important.
>
> Glibc improved quite a bit recently (2.28 or 2.33 I don't remember) by
> implementing a per-thread cache in its ptmalloc. But in our case it's
> still not as good as jemalloc, and neither perform as well as our
> thread-local pools for fixed sizes.
>
> I'm seeing in a paper about snmalloc that it performs exceptionally well
> for small allocations. I just don't know how this degrades depending on
> the access patterns. For example some allocators are fast when you free()
> in the exact reverse allocation order, but can start to fragment or have
> more work to do finding holes if you don't free() in the exact same order.
>
> But that's something to keep an eye on in the future.
>
> Willy
From 7206aff8ce6b6cb8c9b9b36d33b4f218f33cf0d1 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Thu, 25 Nov 2021 13:14:37 +
Subject: [PATCH 3/3] MEDIUM: pool: using jemalloc when relevant.

Finally, we trim the arenas with jemalloc if detected otherwise falling back
 to glibc's malloc_trim.
---
 src/pool.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/pool.c b/src/pool.c
index 65c037911..3ec7a5386 100644
--- a/src/pool.c
+++ b/src/pool.c
@@ -48,10 +48,24 @@ static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL;
 /* ask the allocator to trim memory pools */
 static void trim_all_pools(void)
 {
+	if (using_libc_allocator) {
+		if (my_mallctl) {
+			unsigned int i, narenas = 0;
+			size_t len = sizeof(narenas);
+
+			if (my_mallctl("arenas.narenas", , , NULL, 0) == 0) {
+for (i = 0; i < narenas; i ++) {
+	char mib[32] = {0};
+	snprintf(mib, sizeof(mib), "arena.%u.purge", i);
+	(void)my_mallctl(mib, NULL, NULL, NULL, 0);
+}
+			}
+		} else {
 #ifdef HAVE_MALLOC_TRIM
-	if (using_libc_allocator)
-		malloc_trim(0);
+			malloc_trim(0);
 #endif
+		}
+	}
 }
 
 /* check if we're using the same allocator as the one that provides
-- 
2.33.1

From e8daa477b53a43ab39113cf0e9c43d9bbda1e9a9 Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Thu, 25 Nov 2021 10:26:50 +
Subject: [PATCH 1/3] MINOR: pool: refactoring to make it available for other
 systems/allocators.

Actually it's focusing on linux/glibc, we re trying to expand it for other possible systems and allocators combinations.
---
 include/haproxy/compat.h |  9 +++--
 src/pool.c   | 38 +++---
 2 files changed, 14 insertions(+), 33 deletions(-)

diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h
index 25b15a1f0..b801dac08 100644
--- a/include/haproxy/compat.h
+++ b/include/haproxy/compat.h
@@ -263,9 +263,14 @@ typedef struct { } empty_t;
 #endif
 
 /* glibc 2.33 provides mallinfo2() that overcomes mallinfo()'s type limitations */
-#if (defined(__GNU_LIBRARY__) && (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33))
+#if defined(__GNU_LIBRARY__)
 #include 
-#define HA_HAVE_MALLINFO2
+#if (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 33)
+#define HA_MALLINFO_API mallinfo2
+#else
+#define HA_MALLINFO_API mallinfo
+#endif
+#define HA_HAVE_MALLINFO
 #endif
 
 /* FreeBSD also has malloc_usable_size() but it requires malloc_np.h */
diff --git a/src/pool.c b/src/pool.c
index af46b4469..16ef76cf0 100644
--- a/src/pool.c
+++ b/src/pool.c
@@ -42,14 +42,15 @@ int mem_poison_byte = -1;
 static int mem_fail_rate = 0;
 #endif
 
-#if defined(HA_HAVE_MALLOC_TRIM)
 static int using_libc_allocator = 0;
 
 /* ask the allocator to trim memory pools */
 static void trim_all_pools(void)
 {
+#ifdef HAVE_MALLOC_TRIM
 	if (using_libc_allocator)
 		malloc_trim(0);
+#endif
 }
 
 /* check if we're using the same allocator as the one that provides
@@ -60,48 +61,23 @@ static void trim_all_pools(void)
  */
 static void detect_allocator(void)
 {

Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Willy Tarreau
On Thu, Nov 25, 2021 at 04:38:27PM +0500,  ??? wrote:
> > Thus I think that instead of focusing on the OS we ought to continue
> > to focus on the allocator and improve runtime detection:
> >
> >   - glibc (currently detected using detect_allocator)
> > => use malloc_trim()
> >   - jemalloc at build time (mallctl != NULL)
> > => use mallctl() as you did
> >   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
> > => use mallctl() as you did
> >   - others
> > => no trimming
> >
> 
> I never imagined earlier that high level applications (such as reverse
> https/tcp proxy) cares about such low level things as allocator behaviour.
> no jokes, really.

Yes it does count a lot. That's also why we spent a lot of time optimizing
the pools, to limit the number of calls to the system's allocator for
everything that uses a fixed size. I've seen some performance graphs in
our internal ticket tracker showing the memory consumption between and
after the switch to jemalloc, and the CPU usage as well, and sometimes
it was very important.

Glibc improved quite a bit recently (2.28 or 2.33 I don't remember) by
implementing a per-thread cache in its ptmalloc. But in our case it's
still not as good as jemalloc, and neither perform as well as our
thread-local pools for fixed sizes.

I'm seeing in a paper about snmalloc that it performs exceptionally well
for small allocations. I just don't know how this degrades depending on
the access patterns. For example some allocators are fast when you free()
in the exact reverse allocation order, but can start to fragment or have
more work to do finding holes if you don't free() in the exact same order.

But that's something to keep an eye on in the future.

Willy



Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread David CARLIER
On Thu, 25 Nov 2021 at 11:38, Илья Шипицин  wrote:
>
>
>
> чт, 25 нояб. 2021 г. в 14:54, Willy Tarreau :
>>
>> Hi David,
>>
>> On Wed, Nov 24, 2021 at 08:08:39PM +, David CARLIER wrote:
>> > Hi
>> >
>> > here a little patch for FreeBSD to support memory arenas trimming.
>> (...)
>> > FreeBSD uses a slighty simplified version of jemalloc as libc allocator
>> > since many years (there is thoughts to eventually switch to snmalloc
>> >  but not before a long time).
>> > We detect the libc in the least hacky way in this case aiming as jemalloc
>> >  specific API then we try to purge arenas as much as we can.
>>
>> This one is interesting because according to the jemalloc doc on
>> my machine it could also work on linux with jemalloc, provided that
>> jemalloc is linked at build time (otherwise mallctl remains NULL).
>> However it remains available uding dlopen().
>>
>> Thus I think that instead of focusing on the OS we ought to continue
>> to focus on the allocator and improve runtime detection:
>>
>>   - glibc (currently detected using detect_allocator)
>> => use malloc_trim()
>>   - jemalloc at build time (mallctl != NULL)
>> => use mallctl() as you did
>>   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
>> => use mallctl() as you did
>>   - others
>> => no trimming
>

Fair enough I thought it was solely about system libc, but indeed the
Linux/jemalloc
 combination is very common, interesting to look at indeed.
Ok will provide the patchsets later on :-)
>
> I never imagined earlier that high level applications (such as reverse 
> https/tcp proxy) cares about such low level things as allocator behaviour.
> no jokes, really.
>

This is how you stand out ;-)
>
>>
>>
>> That would cover the vast majority of use cases where trimming is relevant.
>> I'm quite interested in jemalloc on linux, including when used at runtime
>> using LD_PRELOAD, because I see it quite frequently in high-performance
>> environments. When site operators see their memory usage fall two-fold and
>> the CPU as well by just prepending an LD_PRELOAD in their startup script,
>> they don't need to think twice and it's quickly done.
>>
>> Thus I think we could proceed like this in the pools init code:
>>
>>void detect_allocator()
>>{
>> extern int mallctl(const char *, void *, size_t *, void *, size_t) 
>> __attribute__((weak));
>>
>> my_mallctl = mallctl;
>> if (!my_mallctl)
>> my_mallctl = get_sym_curr_addr("mallctl");
>>
>> if (!my_mallctl) {
>> /* existing tests based on mallinfo/mallinfo2 */
>> }
>>}
>>
>> At this point we'd have:
>>   - my_mallctl not null when using jemalloc, hence trim() would always
>> use your code that purges all arenas
>>
>>   - otherwise if using_libc_allocator is set, we can call malloc_trim()
>>
>>   - otherwise it's an unknown allocator and we don't do anything.
>>
>> Thus could you please turn your patch into a small series that does
>> this ? The steps I'm seeing are:
>>   1. changing the detection logic so that it's always performed, and
>>  not just on HA_HAVE_MALLOC_TRIM. This means the variants of
>>  functions is_trim_enabled, trim_all_pools and detect_allocator()
>>  are all remerged into a single variant. trim_all_pools() will be
>>  the only one to rely on HA_HAVE_MALLOC_TRIM.
>>
>>   2. introduce mallctl availability detection and put its pointer into
>>  a separate one that we can get unsing dlfcn when linked in.
>>
>>   3. modify trim_all_pools() to use mallctl() when available, otherwise
>>  rely on current code with malloc_trim().
>>
>> This should make a significant improvement with plug-n-play detection
>> for the majority of users.
>>
>> Then if you want to introduce runtime detection for snmalloc to do the
>> same, this would make everything much easier.
>>
>> Thanks,
>> Willy
>>



Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Илья Шипицин
чт, 25 нояб. 2021 г. в 14:54, Willy Tarreau :

> Hi David,
>
> On Wed, Nov 24, 2021 at 08:08:39PM +, David CARLIER wrote:
> > Hi
> >
> > here a little patch for FreeBSD to support memory arenas trimming.
> (...)
> > FreeBSD uses a slighty simplified version of jemalloc as libc allocator
> > since many years (there is thoughts to eventually switch to snmalloc
> >  but not before a long time).
> > We detect the libc in the least hacky way in this case aiming as jemalloc
> >  specific API then we try to purge arenas as much as we can.
>
> This one is interesting because according to the jemalloc doc on
> my machine it could also work on linux with jemalloc, provided that
> jemalloc is linked at build time (otherwise mallctl remains NULL).
> However it remains available uding dlopen().
>
> Thus I think that instead of focusing on the OS we ought to continue
> to focus on the allocator and improve runtime detection:
>
>   - glibc (currently detected using detect_allocator)
> => use malloc_trim()
>   - jemalloc at build time (mallctl != NULL)
> => use mallctl() as you did
>   - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
> => use mallctl() as you did
>   - others
> => no trimming
>

I never imagined earlier that high level applications (such as reverse
https/tcp proxy) cares about such low level things as allocator behaviour.
no jokes, really.



>
> That would cover the vast majority of use cases where trimming is relevant.
> I'm quite interested in jemalloc on linux, including when used at runtime
> using LD_PRELOAD, because I see it quite frequently in high-performance
> environments. When site operators see their memory usage fall two-fold and
> the CPU as well by just prepending an LD_PRELOAD in their startup script,
> they don't need to think twice and it's quickly done.
>
> Thus I think we could proceed like this in the pools init code:
>
>void detect_allocator()
>{
> extern int mallctl(const char *, void *, size_t *, void *, size_t)
> __attribute__((weak));
>
> my_mallctl = mallctl;
> if (!my_mallctl)
> my_mallctl = get_sym_curr_addr("mallctl");
>
> if (!my_mallctl) {
> /* existing tests based on mallinfo/mallinfo2 */
> }
>}
>
> At this point we'd have:
>   - my_mallctl not null when using jemalloc, hence trim() would always
> use your code that purges all arenas
>
>   - otherwise if using_libc_allocator is set, we can call malloc_trim()
>
>   - otherwise it's an unknown allocator and we don't do anything.
>
> Thus could you please turn your patch into a small series that does
> this ? The steps I'm seeing are:
>   1. changing the detection logic so that it's always performed, and
>  not just on HA_HAVE_MALLOC_TRIM. This means the variants of
>  functions is_trim_enabled, trim_all_pools and detect_allocator()
>  are all remerged into a single variant. trim_all_pools() will be
>  the only one to rely on HA_HAVE_MALLOC_TRIM.
>
>   2. introduce mallctl availability detection and put its pointer into
>  a separate one that we can get unsing dlfcn when linked in.
>
>   3. modify trim_all_pools() to use mallctl() when available, otherwise
>  rely on current code with malloc_trim().
>
> This should make a significant improvement with plug-n-play detection
> for the majority of users.
>
> Then if you want to introduce runtime detection for snmalloc to do the
> same, this would make everything much easier.
>
> Thanks,
> Willy
>
>


Re: [PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-25 Thread Willy Tarreau
Hi David,

On Wed, Nov 24, 2021 at 08:08:39PM +, David CARLIER wrote:
> Hi
> 
> here a little patch for FreeBSD to support memory arenas trimming.
(...)
> FreeBSD uses a slighty simplified version of jemalloc as libc allocator
> since many years (there is thoughts to eventually switch to snmalloc
>  but not before a long time).
> We detect the libc in the least hacky way in this case aiming as jemalloc
>  specific API then we try to purge arenas as much as we can.

This one is interesting because according to the jemalloc doc on
my machine it could also work on linux with jemalloc, provided that
jemalloc is linked at build time (otherwise mallctl remains NULL).
However it remains available uding dlopen().

Thus I think that instead of focusing on the OS we ought to continue
to focus on the allocator and improve runtime detection:

  - glibc (currently detected using detect_allocator)
=> use malloc_trim()
  - jemalloc at build time (mallctl != NULL)
=> use mallctl() as you did
  - jemalloc at runtime (mallctl == NULL but dlsym("mallctl") != NULL)
=> use mallctl() as you did
  - others
=> no trimming

That would cover the vast majority of use cases where trimming is relevant.
I'm quite interested in jemalloc on linux, including when used at runtime
using LD_PRELOAD, because I see it quite frequently in high-performance
environments. When site operators see their memory usage fall two-fold and
the CPU as well by just prepending an LD_PRELOAD in their startup script,
they don't need to think twice and it's quickly done.

Thus I think we could proceed like this in the pools init code:

   void detect_allocator()
   {
extern int mallctl(const char *, void *, size_t *, void *, size_t) 
__attribute__((weak));

my_mallctl = mallctl;
if (!my_mallctl)
my_mallctl = get_sym_curr_addr("mallctl");

if (!my_mallctl) {
/* existing tests based on mallinfo/mallinfo2 */
}
   }

At this point we'd have:
  - my_mallctl not null when using jemalloc, hence trim() would always
use your code that purges all arenas

  - otherwise if using_libc_allocator is set, we can call malloc_trim()

  - otherwise it's an unknown allocator and we don't do anything.

Thus could you please turn your patch into a small series that does
this ? The steps I'm seeing are:
  1. changing the detection logic so that it's always performed, and
 not just on HA_HAVE_MALLOC_TRIM. This means the variants of
 functions is_trim_enabled, trim_all_pools and detect_allocator()
 are all remerged into a single variant. trim_all_pools() will be
 the only one to rely on HA_HAVE_MALLOC_TRIM.

  2. introduce mallctl availability detection and put its pointer into
 a separate one that we can get unsing dlfcn when linked in.

  3. modify trim_all_pools() to use mallctl() when available, otherwise
 rely on current code with malloc_trim().

This should make a significant improvement with plug-n-play detection
for the majority of users.

Then if you want to introduce runtime detection for snmalloc to do the
same, this would make everything much easier.

Thanks,
Willy



[PATCH]: MEDIUM: pool little FreeBSD support improvement.

2021-11-24 Thread David CARLIER
Hi

here a little patch for FreeBSD to support memory arenas trimming.

Thanks.

regards.
From 1d6386a626f56ca64c25e2dfbf2f9d90a81bd7ae Mon Sep 17 00:00:00 2001
From: David Carlier 
Date: Wed, 24 Nov 2021 20:02:41 +
Subject: [PATCH] MEDIUM: pool: trimming arenas on FreeBSD.

FreeBSD uses a slighty simplified version of jemalloc as libc allocator
since many years (there is thoughts to eventually switch to snmalloc
 but not before a long time).
We detect the libc in the least hacky way in this case aiming as jemalloc
 specific API then we try to purge arenas as much as we can.
---
 include/haproxy/compat.h |  2 +-
 src/pool.c   | 32 
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h
index 25b15a1f0..daa58be5d 100644
--- a/include/haproxy/compat.h
+++ b/include/haproxy/compat.h
@@ -269,7 +269,7 @@ typedef struct { } empty_t;
 #endif
 
 /* FreeBSD also has malloc_usable_size() but it requires malloc_np.h */
-#if defined(USE_MEMORY_PROFILING) && defined(__FreeBSD__) && (__FreeBSD_version >= 72)
+#if defined(__FreeBSD__) && (__FreeBSD_version >= 72)
 #include 
 #endif
 
diff --git a/src/pool.c b/src/pool.c
index af46b4469..f3ea8c7a7 100644
--- a/src/pool.c
+++ b/src/pool.c
@@ -42,8 +42,8 @@ int mem_poison_byte = -1;
 static int mem_fail_rate = 0;
 #endif
 
-#if defined(HA_HAVE_MALLOC_TRIM)
 static int using_libc_allocator = 0;
+#if defined(HA_HAVE_MALLOC_TRIM)
 
 /* ask the allocator to trim memory pools */
 static void trim_all_pools(void)
@@ -82,26 +82,42 @@ static void detect_allocator(void)
 
 	using_libc_allocator = !!memcmp(, , sizeof(mi1));
 }
-
-static int is_trim_enabled(void)
-{
-	return using_libc_allocator;
-}
 #else
 
+#if defined(__FreeBSD__)
+extern void sdallocx(void *, size_t, int) __attribute__((weak));
+#endif
+
 static void trim_all_pools(void)
 {
+#if defined(__FreeBSD__)
+	if (using_libc_allocator) {
+		unsigned int narenas = 0;
+		size_t len = sizeof(narenas);
+
+		if (mallctl("arenas.narenas", , , NULL, 0) == 0) {
+			for (unsigned int i = 0; i < narenas; i ++) {
+char mib[32] = {0};
+snprintf(mib, sizeof(mib), "arena.%u.purge", i);
+(void)mallctl(mib, NULL, NULL, NULL, 0);
+			}
+		}
+	}
+#endif
 }
 
 static void detect_allocator(void)
 {
+#if defined(__FreeBSD__)
+	using_libc_allocator = (sdallocx != NULL);
+#endif
 }
+#endif
 
 static int is_trim_enabled(void)
 {
-	return 0;
+	return using_libc_allocator;
 }
-#endif
 
 /* Try to find an existing shared pool with the same characteristics and
  * returns it, otherwise creates this one. NULL is returned if no memory
-- 
2.33.1