Re: [PATCH 2/3] posix: Remove alloca usage on regex build_trtable

2021-01-11 Thread Adhemerval Zanella



On 08/01/2021 19:30, Paul Eggert wrote:
> On 1/6/21 10:17 AM, Adhemerval Zanella wrote:
>> __libc_use_alloca/alloca is replaced with malloc regardless.
> 
> These allocations are so small that they should be put on the stack instead 
> of using malloc. I did that in Gnulib by installing the attached patch. The 
> idea is that the resulting regexec.c file should be copyable unchanged into 
> glibc.
> 
> From a Gnulib point of view this code uses a 20 KiB frame (on a 64-bit host) 
> which goes past the usual 4032-byte limit for stack frames, but I think we 
> can stretch the point here.  In glibc the limit is 64 KiB so there's no 
> problem.

Right, I think we can maybe use scratch_buffer for these or even a dynamic array
with a more proper initial size as an improvement.



Re: [PATCH 2/3] posix: Remove alloca usage on regex build_trtable

2021-01-08 Thread Paul Eggert

On 1/6/21 10:17 AM, Adhemerval Zanella wrote:

__libc_use_alloca/alloca is replaced with malloc regardless.


These allocations are so small that they should be put on the stack 
instead of using malloc. I did that in Gnulib by installing the attached 
patch. The idea is that the resulting regexec.c file should be copyable 
unchanged into glibc.


From a Gnulib point of view this code uses a 20 KiB frame (on a 64-bit 
host) which goes past the usual 4032-byte limit for stack frames, but I 
think we can stretch the point here.  In glibc the limit is 64 KiB so 
there's no problem.
From 025e89118912adaa309374201c9012f6fb46d583 Mon Sep 17 00:00:00 2001
From: Paul Eggert 
Date: Fri, 8 Jan 2021 14:22:15 -0800
Subject: [PATCH] regexec: remove alloca usage in build_trtable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prompted by this different change proposed by Adhemerval Zanella:
https://sourceware.org/pipermail/libc-alpha/2021-January/121373.html
* lib/regexec.c (build_trtable): Prevent inlining,
so that it doesn’t bloat the caller’s stack.
Use auto variables instead of alloca/malloc.
After these changes, build_trtable’s total stack allocation is
only 20 KiB on a 64-bit machine, and this is less than glibc’s 64
KiB cutoff so there’s little point to using alloca to shrink it.
Although Gnulib traditionally has used a 4 KiB cutoff, going to 20
KiB here should not be a significant problem in practice;
Gnulib-using packages concerned about overflow of tiny stacks can
compile with something like gcc -fstack-clash-protection.
* config/srclist.txt: Comment out regexec.c for now.
---
 ChangeLog  | 14 +
 config/srclist.txt |  2 +-
 lib/regexec.c  | 75 --
 3 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 540f15a3c..708a266b0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2021-01-08  Paul Eggert  
 
+	regexec: remove alloca usage in build_trtable
+	Prompted by this different change proposed by Adhemerval Zanella:
+	https://sourceware.org/pipermail/libc-alpha/2021-January/121373.html
+	* lib/regexec.c (build_trtable): Prevent inlining,
+	so that it doesn’t bloat the caller’s stack.
+	Use auto variables instead of alloca/malloc.
+	After these changes, build_trtable’s total stack allocation is
+	only 20 KiB on a 64-bit machine, and this is less than glibc’s 64
+	KiB cutoff so there’s little point to using alloca to shrink it.
+	Although Gnulib traditionally has used a 4 KiB cutoff, going to 20
+	KiB here should not be a significant problem in practice;
+	Gnulib-using packages concerned about overflow of tiny stacks can
+	compile with something like gcc -fstack-clash-protection.
+
 	scratch_buffer: add scratch_buffer_dupfree macro
 	* lib/scratch_buffer.h (__libc_scratch_buffer_dupfree):
 	New macro, needed to support recent changes in this module.
diff --git a/config/srclist.txt b/config/srclist.txt
index e7ceeb088..d669fd8f0 100644
--- a/config/srclist.txt
+++ b/config/srclist.txt
@@ -69,7 +69,7 @@ $LIBCSRC posix/regex.c			lib
 #$LIBCSRC posix/regex.h			lib
 #$LIBCSRC posix/regex_internal.c	lib
 #$LIBCSRC posix/regex_internal.h		lib
-$LIBCSRC posix/regexec.c		lib
+#$LIBCSRC posix/regexec.c		lib
 #$LIBCSRC stdlib/canonicalize   lib/canonicalize-lgpl.c
 #$LIBCSRC sysdeps/generic/eloop-threshold.h	lib
 $LIBCSRC time/timegm.c			lib
diff --git a/lib/regexec.c b/lib/regexec.c
index 889b10be9..f7b4f9cfc 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -3247,7 +3247,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
 /* Build transition table for the state.
Return true if successful.  */
 
-static bool
+static bool __attribute_noinline__
 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
 {
   reg_errcode_t err;
@@ -3255,36 +3255,20 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   int ch;
   bool need_word_trtable = false;
   bitset_word_t elem, mask;
-  bool dests_node_malloced = false;
-  bool dest_states_malloced = false;
   Idx ndests; /* Number of the destination states from 'state'.  */
   re_dfastate_t **trtable;
-  re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
-  re_node_set follows, *dests_node;
-  bitset_t *dests_ch;
+  re_dfastate_t *dest_states[SBC_MAX];
+  re_dfastate_t *dest_states_word[SBC_MAX];
+  re_dfastate_t *dest_states_nl[SBC_MAX];
+  re_node_set follows;
   bitset_t acceptable;
 
-  struct dests_alloc
-  {
-re_node_set dests_node[SBC_MAX];
-bitset_t dests_ch[SBC_MAX];
-  } *dests_alloc;
-
   /* We build DFA states which corresponds to the destination nodes
  from 'state'.  'dests_node[i]' represents the nodes which i-th
  destination state contains, and 'dests_ch[i]' represents the
  characters which i-th destination state accepts.  */
-  if (__libc_use_alloca (sizeof (struct dests_alloc)))
-dests_alloc = (struct dests_alloc *) alloca 

[PATCH 2/3] posix: Remove alloca usage on regex build_trtable

2021-01-06 Thread Adhemerval Zanella
__libc_use_alloca/alloca is replaced with malloc regardless.

Checked on x86_64-linux-gnu.
---
 posix/regexec.c | 50 +++--
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/posix/regexec.c b/posix/regexec.c
index 5e22f90842..a8e9a9cd01 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3259,8 +3259,6 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   int ch;
   bool need_word_trtable = false;
   bitset_word_t elem, mask;
-  bool dests_node_malloced = false;
-  bool dest_states_malloced = false;
   Idx ndests; /* Number of the destination states from 'state'.  */
   re_dfastate_t **trtable;
   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
@@ -3278,15 +3276,9 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
  from 'state'.  'dests_node[i]' represents the nodes which i-th
  destination state contains, and 'dests_ch[i]' represents the
  characters which i-th destination state accepts.  */
-  if (__libc_use_alloca (sizeof (struct dests_alloc)))
-dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
-  else
-{
-  dests_alloc = re_malloc (struct dests_alloc, 1);
-  if (__glibc_unlikely (dests_alloc == NULL))
-   return false;
-  dests_node_malloced = true;
-}
+  dests_alloc = re_malloc (struct dests_alloc, 1);
+  if (__glibc_unlikely (dests_alloc == NULL))
+return false;
   dests_node = dests_alloc->dests_node;
   dests_ch = dests_alloc->dests_ch;
 
@@ -3298,8 +3290,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
   if (__glibc_unlikely (ndests <= 0))
 {
-  if (dests_node_malloced)
-   re_free (dests_alloc);
+  re_free (dests_alloc);
   /* Return false in case of an error, true otherwise.  */
   if (ndests == 0)
{
@@ -3323,26 +3314,17 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t 
*state)
   if (__glibc_unlikely (ndests_max < ndests))
 goto out_free;
 
-  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
-+ ndests * 3 * sizeof (re_dfastate_t *)))
-dest_states = (re_dfastate_t **)
-  alloca (ndests * 3 * sizeof (re_dfastate_t *));
-  else
+  dest_states =
+re_malloc (re_dfastate_t *, ndests * 3 * sizeof (re_dfastate_t *));
+  if (__glibc_unlikely (dest_states == NULL))
 {
-  dest_states = re_malloc (re_dfastate_t *, ndests * 3);
-  if (__glibc_unlikely (dest_states == NULL))
-   {
 out_free:
- if (dest_states_malloced)
-   re_free (dest_states);
- re_node_set_free ();
- for (i = 0; i < ndests; ++i)
-   re_node_set_free (dests_node + i);
- if (dests_node_malloced)
-   re_free (dests_alloc);
- return false;
-   }
-  dest_states_malloced = true;
+  re_free (dest_states);
+  re_node_set_free ();
+  for (i = 0; i < ndests; ++i)
+   re_node_set_free (dests_node + i);
+  re_free (dests_alloc);
+  return false;
 }
   dest_states_word = dest_states + ndests;
   dest_states_nl = dest_states_word + ndests;
@@ -3470,15 +3452,13 @@ out_free:
  }
 }
 
-  if (dest_states_malloced)
-re_free (dest_states);
+  re_free (dest_states);
 
   re_node_set_free ();
   for (i = 0; i < ndests; ++i)
 re_node_set_free (dests_node + i);
 
-  if (dests_node_malloced)
-re_free (dests_alloc);
+  re_free (dests_alloc);
 
   return true;
 }
-- 
2.25.1