Some places in the startup code use char * values that can sometimes be string literals, and can sometimes be dynamically built using xstrdup or concat.
This isn't a problem for cc1 etc since this is only called once, but for libgccjit they are small per-invocation leaks. There's no clean way to release them: retrofitting logic to decide if we're dealing with a string literal vs a dynamically-allocated buffer (and if something else is pointing to said buffer) is messy and error-prone; they are also unconnected to the GC. Hence the cleanest way to fix the leak is to add a new "long-term" allocator, to gcc::context. This gives back buffers that will persist until the gcc::context is cleaned away. Using this fixes these leaks seen via valgrind: 28 bytes in 4 blocks are definitely lost in loss record 13 of 209 at 0x4A0645D: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x5D76417: xmalloc (xmalloc.c:147) by 0x5D76509: xstrdup (xstrdup.c:34) by 0x532CC38: process_options() (toplev.c:1316) by 0x532DFF4: do_compile() (toplev.c:1976) by 0x532E3B0: toplev::main(int, char**) (toplev.c:2117) by 0x4DE7983: gcc::jit::playback::context::compile() (jit-playback.c:1646) by 0x4DD791A: gcc::jit::recording::context::compile() (jit-recording.c:861) by 0x4DD5E12: gcc_jit_context_compile (libgccjit.c:2014) by 0x401CA4: test_jit (harness.h:190) by 0x401D88: main (harness.h:232) 108 bytes in 12 blocks are definitely lost in loss record 135 of 241 at 0x4A0645D: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x5D75E07: xmalloc (xmalloc.c:147) by 0x5D7098B: concat (concat.c:147) by 0x563A342: build_common_builtin_nodes() (tree.c:10145) by 0x4DC91FC: jit_langhook_init() (dummy-frontend.c:128) by 0x532D3F4: lang_dependent_init(char const*) (toplev.c:1790) by 0x532D9C8: do_compile() (toplev.c:2007) by 0x532DCAC: toplev::main(int, char**) (toplev.c:2118) by 0x4DE76AF: gcc::jit::playback::context::compile() (jit-playback.c:1615) by 0x4DD76DA: gcc::jit::recording::context::compile() (jit-recording.c:861) by 0x4DD5BD2: gcc_jit_context_compile (libgccjit.c:2014) by 0x401DC4: test_jit (harness.h:190) 108 bytes in 12 blocks are definitely lost in loss record 136 of 241 at 0x4A0645D: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x5D75E07: xmalloc (xmalloc.c:147) by 0x5D7098B: concat (concat.c:147) by 0x563A3B6: build_common_builtin_nodes() (tree.c:10151) by 0x4DC91FC: jit_langhook_init() (dummy-frontend.c:128) by 0x532D3F4: lang_dependent_init(char const*) (toplev.c:1790) by 0x532D9C8: do_compile() (toplev.c:2007) by 0x532DCAC: toplev::main(int, char**) (toplev.c:2118) by 0x4DE76AF: gcc::jit::playback::context::compile() (jit-playback.c:1615) by 0x4DD76DA: gcc::jit::recording::context::compile() (jit-recording.c:861) by 0x4DD5BD2: gcc_jit_context_compile (libgccjit.c:2014) by 0x401DC4: test_jit (harness.h:190) 136 bytes in 4 blocks are definitely lost in loss record 129 of 209 at 0x4A0645D: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x5D76417: xmalloc (xmalloc.c:147) by 0x532BE28: init_asm_output(char const*) (toplev.c:936) by 0x532DB34: lang_dependent_init(char const*) (toplev.c:1795) by 0x532E0CC: do_compile() (toplev.c:2006) by 0x532E3B0: toplev::main(int, char**) (toplev.c:2117) by 0x4DE7983: gcc::jit::playback::context::compile() (jit-playback.c:1646) by 0x4DD791A: gcc::jit::recording::context::compile() (jit-recording.c:861) by 0x4DD5E12: gcc_jit_context_compile (libgccjit.c:2014) by 0x401CA4: test_jit (harness.h:190) by 0x401D88: main (harness.h:232) gcc/ChangeLog: PR jit/63854 * context.c (gcc::context::context): Initialize m_longterm_obstack. (gcc::context::~context): Free m_longterm_obstack. (gcc::context::longterm_xalloc): New method. (gcc::context::longterm_xstrdup): New method. * context.h: Include obstack.h (gcc::context::longterm_xalloc): New method. (gcc::context::longterm_xallocvec<T>): New method (gcc::context::longterm_xstrdup): New method. (gcc::context::m_longterm_obstack): New field. * toplev.c (init_asm_output): Use g->longterm_xallocvec <char> rather than XNEWVEC when allocating "dumpname" to avoid a leak. (process_options): Likewise, use g->longterm_xstrdup rather than xstrdup when allocating "name" (and hence aux_base_name). * tree.c: Include context.h. (build_common_builtin_nodes): When writing dynamically-allocated entries into built_in_names, use g->longterm_xstrdup to take a copy of the buffer acquired by concat to avoid a leak. --- gcc/context.c | 18 ++++++++++++++++++ gcc/context.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/toplev.c | 4 ++-- gcc/tree.c | 17 +++++++++++------ 4 files changed, 77 insertions(+), 8 deletions(-) diff --git a/gcc/context.c b/gcc/context.c index d132946..ead9f33 100644 --- a/gcc/context.c +++ b/gcc/context.c @@ -37,10 +37,28 @@ gcc::context::context () before the pass manager. */ m_dumps = new gcc::dump_manager (); m_passes = new gcc::pass_manager (this); + obstack_init (&m_longterm_obstack); } gcc::context::~context () { delete m_passes; delete m_dumps; + obstack_free (&m_longterm_obstack, NULL); +} + +void * +gcc::context::longterm_xalloc (size_t sz) +{ + return obstack_alloc (&m_longterm_obstack, sz); +} + +char * +gcc::context::longterm_xstrdup (const char *str) +{ + gcc_assert (str); + size_t sz = strlen (str) + 1; + char *buf = reinterpret_cast <char *> (longterm_xalloc (sz)); + memcpy (buf, str, sz); + return buf; } diff --git a/gcc/context.h b/gcc/context.h index 2bf28a7..068f973 100644 --- a/gcc/context.h +++ b/gcc/context.h @@ -20,6 +20,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_CONTEXT_H #define GCC_CONTEXT_H +#include <obstack.h> + namespace gcc { class pass_manager; @@ -45,6 +47,40 @@ public: dump_manager *get_dumps () {gcc_assert (m_dumps); return m_dumps; } + /* Long-term allocation. + + GCC's startup code sometimes uses a mix of static and dynamic memory + allocation, with no cleanup of the latter. Such allocations were + written under the assumption that the buffers can persist for the + rest of the compile and be effectively freed when the process + terminates. + + This works for cc1 etc since these allocations are only called once, + but for libgccjit they are small per-invocation leaks. + + The following functions provide a simple way to retrofit cleanup + into such places that weren't expecting the compiler to be called + more than once in-process. + + They allocate buffers that will persist until the context is + deleted, when they are all cleaned up at once. */ + + /* Allocate a buffer, to persist until the context is deleted, without + needing explicit cleanup. */ + void *longterm_xalloc (size_t) ATTRIBUTE_RETURNS_NONNULL; + + /* Allocate an array of T, to persist until the context is deleted, + without needing explicit cleanup. */ + template <typename T> + T *longterm_xallocvec (size_t len) ATTRIBUTE_RETURNS_NONNULL; + + /* Copy a string into a memory buffer without fail. + + As per libiberty's xstrdup, but the buffer is longterm-allocated so + that it persists until the context is deleted, without needing + explicit cleanup. */ + char *longterm_xstrdup (const char *) ATTRIBUTE_RETURNS_NONNULL; + private: /* Pass-management. */ pass_manager *m_passes; @@ -52,10 +88,20 @@ private: /* Dump files. */ dump_manager *m_dumps; + /* Long-term allocation is implemented using an obstack. */ + struct obstack m_longterm_obstack; + }; // class context } // namespace gcc +template <typename T> +inline T * +gcc::context::longterm_xallocvec (size_t len) +{ + return reinterpret_cast <T *> (longterm_xalloc (sizeof (T) * len)); +} + /* The global singleton context aka "g". (the name is chosen to be easy to type in a debugger). */ extern gcc::context *g; diff --git a/gcc/toplev.c b/gcc/toplev.c index 0181a3f..c0c418c 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -936,7 +936,7 @@ init_asm_output (const char *name) if (asm_file_name == 0) { int len = strlen (dump_base_name); - char *dumpname = XNEWVEC (char, len + 6); + char *dumpname = g->longterm_xallocvec <char> (len + 6); memcpy (dumpname, dump_base_name, len + 1); strip_off_ending (dumpname, len); @@ -1332,7 +1332,7 @@ process_options (void) ; else if (main_input_filename) { - char *name = xstrdup (lbasename (main_input_filename)); + char *name = g->longterm_xstrdup (lbasename (main_input_filename)); strip_off_ending (name, strlen (name)); aux_base_name = name; diff --git a/gcc/tree.c b/gcc/tree.c index 3d1d637..ac358bc 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -88,6 +88,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "wide-int.h" #include "builtins.h" +#include "context.h" /* Tree code classes. */ @@ -10126,6 +10127,7 @@ build_common_builtin_nodes (void) enum built_in_function mcode, dcode; tree type, inner_type; const char *prefix = "__"; + char *name; if (targetm.libfunc_gnu_prefix) prefix = "__gnu_"; @@ -10147,15 +10149,18 @@ build_common_builtin_nodes (void) *q = TOLOWER (*p); *q = '\0'; - built_in_names[mcode] = concat (prefix, "mul", mode_name_buf, "3", - NULL); - local_define_builtin (built_in_names[mcode], ftype, mcode, + name = concat (prefix, "mul", mode_name_buf, "3", NULL); + built_in_names[mcode] = g->longterm_xstrdup (name); + free (name); + local_define_builtin (built_in_names[mcode], ftype, mcode, built_in_names[mcode], ECF_CONST | ECF_NOTHROW | ECF_LEAF); - built_in_names[dcode] = concat (prefix, "div", mode_name_buf, "3", - NULL); - local_define_builtin (built_in_names[dcode], ftype, dcode, + name = concat (prefix, "div", mode_name_buf, "3", NULL); + built_in_names[dcode] = g->longterm_xstrdup (name); + free (name); + + local_define_builtin (built_in_names[dcode], ftype, dcode, built_in_names[dcode], ECF_CONST | ECF_NOTHROW | ECF_LEAF); } -- 1.8.5.3