wingo pushed a commit to branch wip-whippet
in repository guile.

commit ace42e191e43a1c589dabe99cad7c3d2ac94cf70
Author: Andy Wingo <wi...@pobox.com>
AuthorDate: Tue Jun 17 14:35:51 2025 +0200

    Convert regexps to use statically-allocated tc16
    
    * libguile/scm.h: Allocate tc16.
    * libguile/regex-posix.h:
    * libguile/regex-posix.c:
    * libguile/finalizers.h:
    * libguile/finalizers.c: Adapt.
---
 libguile/finalizers.c  | 17 +++++++++++
 libguile/finalizers.h  |  2 ++
 libguile/regex-posix.c | 81 ++++++++++++++++++++++++++++----------------------
 libguile/regex-posix.h |  1 +
 libguile/scm.h         |  5 ++--
 5 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/libguile/finalizers.c b/libguile/finalizers.c
index 54c1bc38a..5bf68602a 100644
--- a/libguile/finalizers.c
+++ b/libguile/finalizers.c
@@ -43,6 +43,9 @@
 #include "init.h"
 #include "numbers.h"
 #include "ports.h"
+#ifdef ENABLE_REGEX
+#include "regex-posix.h"
+#endif
 #include "smob.h"
 #include "struct.h"
 #include "symbols.h"
@@ -78,6 +81,7 @@ enum builtin_finalizer_kind
     FINALIZE_KIND_SMOB,
     FINALIZE_KIND_PORT,
     FINALIZE_KIND_DIRECTORY,
+    FINALIZE_KIND_REGEXP,
   };
 
 static SCM
@@ -133,6 +137,12 @@ scm_i_add_directory_finalizer (struct scm_thread *thread, 
SCM obj)
   return add_builtin_finalizer (thread, obj, FINALIZE_KIND_DIRECTORY);
 }
 
+SCM
+scm_i_add_regexp_finalizer (struct scm_thread *thread, SCM obj)
+{
+  return add_builtin_finalizer (thread, obj, FINALIZE_KIND_REGEXP);
+}
+
 SCM
 scm_i_add_pointer_finalizer (struct scm_thread *thread, SCM obj, SCM free)
 {
@@ -184,6 +194,13 @@ run_finalizer (struct scm_thread *thread, SCM obj, SCM 
closure)
         case FINALIZE_KIND_DIRECTORY:
           scm_i_finalize_directory (thread, obj);
           break;
+        case FINALIZE_KIND_REGEXP:
+#ifdef ENABLE_REGEX
+          scm_i_finalize_regexp (thread, obj);
+#else
+          abort ();
+#endif
+          break;
         default:
           abort ();
         }
diff --git a/libguile/finalizers.h b/libguile/finalizers.h
index 9ceb9e2d4..b3dd8160a 100644
--- a/libguile/finalizers.h
+++ b/libguile/finalizers.h
@@ -34,6 +34,8 @@ SCM_INTERNAL SCM scm_i_add_port_finalizer (struct scm_thread 
*thread,
                                            SCM obj);
 SCM_INTERNAL SCM scm_i_add_directory_finalizer (struct scm_thread *thread,
                                                 SCM obj);
+SCM_INTERNAL SCM scm_i_add_regexp_finalizer (struct scm_thread *thread,
+                                             SCM obj);
 SCM_INTERNAL SCM scm_i_add_pointer_finalizer (struct scm_thread *thread,
                                               SCM obj, SCM free);
 SCM_INTERNAL SCM scm_i_add_finalizer (struct scm_thread *thread, SCM obj,
diff --git a/libguile/regex-posix.c b/libguile/regex-posix.c
index b27a8daa7..b064c2b65 100644
--- a/libguile/regex-posix.c
+++ b/libguile/regex-posix.c
@@ -40,13 +40,13 @@
 #include "async.h"
 #include "extensions.h"
 #include "feature.h"
+#include "finalizers.h"
 #include "gsubr.h"
 #include "list.h"
 #include "modules.h"
 #include "numbers.h"
 #include "pairs.h"
 #include "ports.h"
-#include "smob.h"
 #include "strings.h"
 #include "strports.h"
 #include "symbols.h"
@@ -60,24 +60,41 @@
 #define REG_BASIC 0
 #endif
 
-scm_t_bits scm_tc16_regex;
+struct scm_regexp
+{
+  scm_t_bits tag;
+  regex_t regex;
+};
 
 static inline int
 scm_is_regexp (SCM x)
 {
-  return SCM_HAS_TYP16 (x, scm_tc16_regex);
+  return SCM_HAS_TYP16 (x, scm_tc16_regexp);
+}
+
+static inline struct scm_regexp*
+scm_to_regexp (SCM x)
+{
+  if (!scm_is_regexp (x))
+    abort ();
+  return (struct scm_regexp *) SCM_UNPACK_POINTER (x);
+}
+
+static inline SCM
+scm_from_regexp (struct scm_regexp *x)
+{
+  return SCM_PACK_POINTER (x);
 }
 
 #define SCM_REGEXP_P(x)        (scm_is_regexp (x))
-#define SCM_RGX(X)     ((regex_t *) SCM_SMOB_DATA (X))
-#define SCM_VALIDATE_RGXP(pos, a) \
+#define SCM_VALIDATE_REGEXP(pos, a) \
   SCM_MAKE_VALIDATE_MSG (pos, a, REGEXP_P, "regexp")
 
-static size_t
-regex_free (SCM obj)
+void
+scm_i_finalize_regexp (struct scm_thread *thread, SCM obj)
 {
-  regfree (SCM_RGX (obj));
-  return 0;
+  struct scm_regexp *rx = scm_to_regexp (obj);
+  regfree (&rx->regex);
 }
 
 
@@ -87,18 +104,10 @@ SCM_SYMBOL (scm_regexp_error_key, 
"regular-expression-syntax");
 static SCM
 scm_regexp_error_msg (int regerrno, regex_t *rx)
 {
-  char *errmsg;
-  int l;
-
-  errmsg = scm_malloc (80);
-  l = regerror (regerrno, rx, errmsg, 80);
-  if (l > 80)
-    {
-      free (errmsg);
-      errmsg = scm_malloc (l);
-      regerror (regerrno, rx, errmsg, l);
-    }
-  return scm_take_locale_string (errmsg);
+  size_t len = regerror (regerrno, rx, NULL, 0);
+  char *errmsg = scm_malloc (len);
+  regerror (regerrno, rx, errmsg, len);
+  return scm_take_locale_stringn (errmsg, len - 1);
 }
 
 SCM_DEFINE_STATIC (regexp_p, "regexp?", 1, 0, 0,
@@ -153,7 +162,7 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
 #define FUNC_NAME s_make_regexp
 {
   SCM flag;
-  regex_t *rx;
+  struct scm_regexp *rx;
   int status, cflags;
   char *c_pat;
 
@@ -173,16 +182,17 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
       flag = SCM_CDR (flag);
     }
 
-  rx = scm_gc_malloc_pointerless (sizeof (regex_t), "regex");
+  rx = scm_gc_malloc_pointerless (sizeof (*rx), "regex");
+  rx->tag = scm_tc16_regexp;
   c_pat = scm_to_locale_string (pat);
-  status = regcomp (rx, c_pat,
+  status = regcomp (&rx->regex, c_pat,
                    /* Make sure they're not passing REG_NOSUB;
                        regexp-exec assumes we're getting match data.  */
                    cflags & ~REG_NOSUB);
   free (c_pat);
   if (status != 0)
     {
-      SCM errmsg = scm_regexp_error_msg (status, rx);
+      SCM errmsg = scm_regexp_error_msg (status, &rx->regex);
       scm_error_scm (scm_regexp_error_key,
                     scm_from_utf8_string (FUNC_NAME),
                     errmsg,
@@ -191,7 +201,9 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
       
       /* never returns */
     }
-  SCM_RETURN_NEWSMOB (scm_tc16_regex, rx);
+  SCM ret = scm_from_regexp (rx);
+  scm_i_add_regexp_finalizer (SCM_I_CURRENT_THREAD, ret);
+  return ret;
 }
 #undef FUNC_NAME
 
@@ -231,8 +243,8 @@ fixup_multibyte_match (regmatch_t *matches, int nmatches, 
char *str)
 }
 
 SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
-                   (SCM rx, SCM str, SCM start, SCM flags),
-                   "Match the compiled regular expression @var{rx} against\n"
+                   (SCM regexp, SCM str, SCM start, SCM flags),
+                   "Match the compiled regular expression @var{regexp} 
against\n"
                    "@code{str}.  If the optional integer @var{start} argument 
is\n"
                    "provided, begin matching from that position in the 
string.\n"
                    "Return a match structure describing the results of the 
match,\n"
@@ -259,9 +271,11 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
   SCM mvec = SCM_BOOL_F;
   SCM substr;
 
-  SCM_VALIDATE_RGXP (1, rx);
+  SCM_VALIDATE_REGEXP (1, regexp);
   SCM_VALIDATE_STRING (2, str);
 
+  struct scm_regexp *rx = scm_to_regexp (regexp);
+
   if (SCM_UNBNDP (start))
     {
       substr = str;
@@ -281,9 +295,9 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
 
   c_str = scm_to_locale_string (substr);
 
-  nmatches = SCM_RGX(rx)->re_nsub + 1;
+  nmatches = rx->regex.re_nsub + 1;
   matches = scm_malloc (sizeof (regmatch_t) * nmatches);
-  status = regexec (SCM_RGX (rx), c_str, nmatches, matches,
+  status = regexec (&rx->regex, c_str, nmatches, matches,
                    scm_to_int (flags));
 
   if (!status)
@@ -312,7 +326,7 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
   if (status != 0 && status != REG_NOMATCH)
     scm_error_scm (scm_regexp_error_key,
                   scm_from_utf8_string (FUNC_NAME),
-                  scm_regexp_error_msg (status, SCM_RGX (rx)),
+                  scm_regexp_error_msg (status, &rx->regex),
                   SCM_BOOL_F, SCM_BOOL_F);
   return mvec;
 }
@@ -321,9 +335,6 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
 static void
 scm_init_ice_9_regex (void *unused)
 {
-  scm_tc16_regex = scm_make_smob_type ("regexp", sizeof (regex_t));
-  scm_set_smob_free (scm_tc16_regex, regex_free);
-
   /* Compilation flags.  */
   scm_c_define ("regexp/basic",    scm_from_int (REG_BASIC));
   scm_c_define ("regexp/extended", scm_from_int (REG_EXTENDED));
diff --git a/libguile/regex-posix.h b/libguile/regex-posix.h
index 5af7c15d5..b0566e55f 100644
--- a/libguile/regex-posix.h
+++ b/libguile/regex-posix.h
@@ -24,6 +24,7 @@
 
 #include <libguile/scm.h>
 
+SCM_INTERNAL void scm_i_finalize_regexp (struct scm_thread*, SCM);
 SCM_INTERNAL void scm_init_regex_posix (void);
 
 #endif  /* SCM_REGEX_POSIX_H */
diff --git a/libguile/scm.h b/libguile/scm.h
index 10aa0d2c8..9f7054d43 100644
--- a/libguile/scm.h
+++ b/libguile/scm.h
@@ -515,9 +515,8 @@ typedef uintptr_t scm_t_bits;
 #define scm_tc16_directory             0x047f
 #define scm_tc16_syntax_transformer    0x057f
 #define scm_tc16_random_state          0x067f
-/*
-#define scm_tc16_regexp                        0x107f
-*/
+#define scm_tc16_regexp                        0x077f
+
 
 /* Definitions for tc16: */
 #define SCM_TYP16(x)           (0xffff & SCM_CELL_TYPE (x))

Reply via email to