wingo pushed a commit to branch wip-whippet in repository guile. commit 521662d8b75ceec72e1b882c08ad29494aaf8399 Author: Andy Wingo <wi...@pobox.com> AuthorDate: Tue Jun 17 14:10:12 2025 +0200
Move make-regexp, regexp?, regexp-exec to (ice-9 regex) Also deprecate the C interface. * libguile/Makefile.am: Don't install regex-posix.h. * libguile/deprecated.c: * libguile/deprecated.h: Add deprecated shims for scm_make_regexp et al. * libguile/init.c: Fix comment. * libguile/regex-posix.c: Privatize some of the implementation details. Arrange to install into (ice-9 regex) instead of default environment. * module/ice-9/deprecated.scm: Add deprecation shims. * module/ice-9/regex.scm: Add new definitions. * module/ice-9/sandbox.scm: * module/scripts/read-scheme-source.scm: * module/system/repl/server.scm: * module/texinfo/reflection.scm: * test-suite/tests/r6rs-exceptions.test: * test-suite/tests/srfi-10.test: Import (ice-9 regex). --- libguile/Makefile.am | 2 +- libguile/deprecated.c | 49 ++++++++++ libguile/deprecated.h | 7 ++ libguile/init.c | 2 +- libguile/regex-posix.c | 163 +++++++++++++++++++--------------- libguile/regex-posix.h | 13 +-- module/ice-9/deprecated.scm | 30 +++++++ module/ice-9/regex.scm | 34 +++++-- module/ice-9/sandbox.scm | 2 +- module/scripts/read-scheme-source.scm | 3 +- module/system/repl/server.scm | 1 + module/texinfo/reflection.scm | 3 +- test-suite/tests/r6rs-exceptions.test | 3 +- test-suite/tests/srfi-10.test | 3 +- 14 files changed, 222 insertions(+), 93 deletions(-) diff --git a/libguile/Makefile.am b/libguile/Makefile.am index 23d18f40a..141083f21 100644 --- a/libguile/Makefile.am +++ b/libguile/Makefile.am @@ -524,6 +524,7 @@ noinst_HEADERS = custom-ports.h \ private-options.h \ programs.h \ ports-internal.h \ + regex-posix.h \ syntax.h \ trace.h \ whippet-embedder.h @@ -653,7 +654,6 @@ modinclude_HEADERS = \ random.h \ rdelim.h \ read.h \ - regex-posix.h \ rw.h \ scmsigs.h \ script.h \ diff --git a/libguile/deprecated.c b/libguile/deprecated.c index cfec1d729..3b6290d9e 100644 --- a/libguile/deprecated.c +++ b/libguile/deprecated.c @@ -686,6 +686,55 @@ scm_force (SCM promise) return scm_call_1 (scm_variable_ref (force_var), promise); } + + +static SCM make_regexp_var; +static SCM regexp_p_var; +static SCM regexp_exec_var; + +static void +init_regexp_vars (void) +{ + make_regexp_var = scm_c_public_lookup ("ice-9 regex", "make-regexp"); + regexp_p_var = scm_c_public_lookup ("ice-9 regex", "regexp?"); + regexp_exec_var = scm_c_public_lookup ("ice-9 regex", "regexp-exec"); +} + +static void +init_regexp_functions (void) +{ + static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT; + scm_c_issue_deprecation_warning + ("Using the SCM regexp functions from C is deprecated. Invoke" + "make-regexp, etc. from (ice-9 regex) instead."); + scm_i_pthread_once (&once, init_regexp_vars); +} + + +SCM +scm_make_regexp (SCM pat, SCM flags) +{ + init_regexp_functions (); + return scm_apply_1 (scm_variable_ref (make_regexp_var), pat, flags); +} + +SCM +scm_regexp_p (SCM x) +{ + init_regexp_functions (); + return scm_call_1 (scm_variable_ref (regexp_p_var), x); +} + +SCM +scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags) +{ + init_regexp_functions (); + if (SCM_UNBNDP (start)) + start = SCM_INUM0; + if (SCM_UNBNDP (flags)) + flags = SCM_INUM0; + return scm_call_4 (scm_variable_ref (regexp_exec_var), rx, str, start, flags); +} diff --git a/libguile/deprecated.h b/libguile/deprecated.h index 0403f7e59..fed5bc94b 100644 --- a/libguile/deprecated.h +++ b/libguile/deprecated.h @@ -113,6 +113,13 @@ SCM_DEPRECATED SCM scm_make_promise (SCM thunk); SCM_DEPRECATED SCM scm_force (SCM x); SCM_DEPRECATED SCM scm_promise_p (SCM x); +SCM_DEPRECATED SCM scm_make_regexp (SCM pat, SCM flags); +SCM_DEPRECATED SCM scm_regexp_p (SCM x); +SCM_DEPRECATED SCM scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags); + +#define SCM_RGXP(X) (scm_is_true (scm_regexp_p (x))) +#define SCM_VALIDATE_RGXP(pos, a) SCM_MAKE_VALIDATE_MSG (pos, a, RGXP, "regexp") + /* Deprecated declarations go here. */ void scm_i_init_deprecated (void); diff --git a/libguile/init.c b/libguile/init.c index 0118c8a99..9e74b9df5 100644 --- a/libguile/init.c +++ b/libguile/init.c @@ -411,7 +411,7 @@ scm_i_init_guile (struct gc_stack_addr base) scm_init_posix (); #endif #ifdef ENABLE_REGEX - scm_init_regex_posix (); /* Requires smob_prehistory */ + scm_init_regex_posix (); #endif scm_init_procs (); scm_init_scmsigs (); diff --git a/libguile/regex-posix.c b/libguile/regex-posix.c index 1c40f3cfa..b27a8daa7 100644 --- a/libguile/regex-posix.c +++ b/libguile/regex-posix.c @@ -38,6 +38,7 @@ #include <wchar.h> #include "async.h" +#include "extensions.h" #include "feature.h" #include "gsubr.h" #include "list.h" @@ -50,6 +51,7 @@ #include "strports.h" #include "symbols.h" #include "vectors.h" +#include "version.h" #include "regex-posix.h" @@ -60,6 +62,17 @@ scm_t_bits scm_tc16_regex; +static inline int +scm_is_regexp (SCM x) +{ + return SCM_HAS_TYP16 (x, scm_tc16_regex); +} + +#define SCM_REGEXP_P(x) (scm_is_regexp (x)) +#define SCM_RGX(X) ((regex_t *) SCM_SMOB_DATA (X)) +#define SCM_VALIDATE_RGXP(pos, a) \ + SCM_MAKE_VALIDATE_MSG (pos, a, REGEXP_P, "regexp") + static size_t regex_free (SCM obj) { @@ -88,56 +101,56 @@ scm_regexp_error_msg (int regerrno, regex_t *rx) return scm_take_locale_string (errmsg); } -SCM_DEFINE (scm_regexp_p, "regexp?", 1, 0, 0, - (SCM obj), - "Return @code{#t} if @var{obj} is a compiled regular expression,\n" - "or @code{#f} otherwise.") -#define FUNC_NAME s_scm_regexp_p +SCM_DEFINE_STATIC (regexp_p, "regexp?", 1, 0, 0, + (SCM obj), + "Return @code{#t} if @var{obj} is a compiled regular expression,\n" + "or @code{#f} otherwise.") +#define FUNC_NAME s_regexp_p { - return scm_from_bool(SCM_RGXP (obj)); + return scm_from_bool (scm_is_regexp (obj)); } #undef FUNC_NAME -SCM_DEFINE (scm_make_regexp, "make-regexp", 1, 0, 1, - (SCM pat, SCM flags), - "Compile the regular expression described by @var{pat}, and\n" - "return the compiled regexp structure. If @var{pat} does not\n" - "describe a legal regular expression, @code{make-regexp} throws\n" - "a @code{regular-expression-syntax} error.\n" - "\n" - "The @var{flags} arguments change the behavior of the compiled\n" - "regular expression. The following flags may be supplied:\n" - "\n" - "@table @code\n" - "@item regexp/icase\n" - "Consider uppercase and lowercase letters to be the same when\n" - "matching.\n" - "@item regexp/newline\n" - "If a newline appears in the target string, then permit the\n" - "@samp{^} and @samp{$} operators to match immediately after or\n" - "immediately before the newline, respectively. Also, the\n" - "@samp{.} and @samp{[^...]} operators will never match a newline\n" - "character. The intent of this flag is to treat the target\n" - "string as a buffer containing many lines of text, and the\n" - "regular expression as a pattern that may match a single one of\n" - "those lines.\n" - "@item regexp/basic\n" - "Compile a basic (``obsolete'') regexp instead of the extended\n" - "(``modern'') regexps that are the default. Basic regexps do\n" - "not consider @samp{|}, @samp{+} or @samp{?} to be special\n" - "characters, and require the @samp{@{...@}} and @samp{(...)}\n" - "metacharacters to be backslash-escaped (@pxref{Backslash\n" - "Escapes}). There are several other differences between basic\n" - "and extended regular expressions, but these are the most\n" - "significant.\n" - "@item regexp/extended\n" - "Compile an extended regular expression rather than a basic\n" - "regexp. This is the default behavior; this flag will not\n" - "usually be needed. If a call to @code{make-regexp} includes\n" - "both @code{regexp/basic} and @code{regexp/extended} flags, the\n" - "one which comes last will override the earlier one.\n" - "@end table") -#define FUNC_NAME s_scm_make_regexp +SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1, + (SCM pat, SCM flags), + "Compile the regular expression described by @var{pat}, and\n" + "return the compiled regexp structure. If @var{pat} does not\n" + "describe a legal regular expression, @code{make-regexp} throws\n" + "a @code{regular-expression-syntax} error.\n" + "\n" + "The @var{flags} arguments change the behavior of the compiled\n" + "regular expression. The following flags may be supplied:\n" + "\n" + "@table @code\n" + "@item regexp/icase\n" + "Consider uppercase and lowercase letters to be the same when\n" + "matching.\n" + "@item regexp/newline\n" + "If a newline appears in the target string, then permit the\n" + "@samp{^} and @samp{$} operators to match immediately after or\n" + "immediately before the newline, respectively. Also, the\n" + "@samp{.} and @samp{[^...]} operators will never match a newline\n" + "character. The intent of this flag is to treat the target\n" + "string as a buffer containing many lines of text, and the\n" + "regular expression as a pattern that may match a single one of\n" + "those lines.\n" + "@item regexp/basic\n" + "Compile a basic (``obsolete'') regexp instead of the extended\n" + "(``modern'') regexps that are the default. Basic regexps do\n" + "not consider @samp{|}, @samp{+} or @samp{?} to be special\n" + "characters, and require the @samp{@{...@}} and @samp{(...)}\n" + "metacharacters to be backslash-escaped (@pxref{Backslash\n" + "Escapes}). There are several other differences between basic\n" + "and extended regular expressions, but these are the most\n" + "significant.\n" + "@item regexp/extended\n" + "Compile an extended regular expression rather than a basic\n" + "regexp. This is the default behavior; this flag will not\n" + "usually be needed. If a call to @code{make-regexp} includes\n" + "both @code{regexp/basic} and @code{regexp/extended} flags, the\n" + "one which comes last will override the earlier one.\n" + "@end table") +#define FUNC_NAME s_make_regexp { SCM flag; regex_t *rx; @@ -217,28 +230,28 @@ fixup_multibyte_match (regmatch_t *matches, int nmatches, char *str) } -SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0, - (SCM rx, SCM str, SCM start, SCM flags), - "Match the compiled regular expression @var{rx} against\n" - "@code{str}. If the optional integer @var{start} argument is\n" - "provided, begin matching from that position in the string.\n" - "Return a match structure describing the results of the match,\n" - "or @code{#f} if no match could be found.\n" - "\n" - "The @var{flags} arguments change the matching behavior.\n" - "The following flags may be supplied:\n" - "\n" - "@table @code\n" - "@item regexp/notbol\n" - "Operator @samp{^} always fails (unless @code{regexp/newline}\n" - "is used). Use this when the beginning of the string should\n" - "not be considered the beginning of a line.\n" - "@item regexp/noteol\n" - "Operator @samp{$} always fails (unless @code{regexp/newline}\n" - "is used). Use this when the end of the string should not be\n" - "considered the end of a line.\n" - "@end table") -#define FUNC_NAME s_scm_regexp_exec +SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0, + (SCM rx, SCM str, SCM start, SCM flags), + "Match the compiled regular expression @var{rx} against\n" + "@code{str}. If the optional integer @var{start} argument is\n" + "provided, begin matching from that position in the string.\n" + "Return a match structure describing the results of the match,\n" + "or @code{#f} if no match could be found.\n" + "\n" + "The @var{flags} arguments change the matching behavior.\n" + "The following flags may be supplied:\n" + "\n" + "@table @code\n" + "@item regexp/notbol\n" + "Operator @samp{^} always fails (unless @code{regexp/newline}\n" + "is used). Use this when the beginning of the string should\n" + "not be considered the beginning of a line.\n" + "@item regexp/noteol\n" + "Operator @samp{$} always fails (unless @code{regexp/newline}\n" + "is used). Use this when the end of the string should not be\n" + "considered the end of a line.\n" + "@end table") +#define FUNC_NAME s_regexp_exec { int status, nmatches, offset; regmatch_t *matches; @@ -305,8 +318,8 @@ SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0, } #undef FUNC_NAME -void -scm_init_regex_posix () +static void +scm_init_ice_9_regex (void *unused) { scm_tc16_regex = scm_make_smob_type ("regexp", sizeof (regex_t)); scm_set_smob_free (scm_tc16_regex, regex_free); @@ -321,7 +334,17 @@ scm_init_regex_posix () scm_c_define ("regexp/notbol", scm_from_int (REG_NOTBOL)); scm_c_define ("regexp/noteol", scm_from_int (REG_NOTEOL)); +#ifndef SCM_MAGIC_SNARFER #include "regex-posix.x" +#endif +} +void +scm_init_regex_posix () +{ + scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION, + "scm_init_ice_9_regex", + scm_init_ice_9_regex, + NULL); scm_add_feature ("regex"); } diff --git a/libguile/regex-posix.h b/libguile/regex-posix.h index c2425603b..5af7c15d5 100644 --- a/libguile/regex-posix.h +++ b/libguile/regex-posix.h @@ -1,7 +1,7 @@ #ifndef SCM_REGEX_POSIX_H #define SCM_REGEX_POSIX_H -/* Copyright 1997-1998,2000-2001,2006,2008,2018 +/* Copyright 1997-1998,2000-2001,2006,2008,2018,2025 Free Software Foundation, Inc. This file is part of Guile. @@ -22,17 +22,8 @@ -#include <libguile/error.h> +#include <libguile/scm.h> -SCM_API scm_t_bits scm_tc16_regex; -#define SCM_RGX(X) ((regex_t *) SCM_SMOB_DATA (X)) -#define SCM_RGXP(X) (SCM_SMOB_PREDICATE (scm_tc16_regex, (X))) - -#define SCM_VALIDATE_RGXP(pos, a) SCM_MAKE_VALIDATE_MSG (pos, a, RGXP, "regexp") - -SCM_API SCM scm_make_regexp (SCM pat, SCM flags); -SCM_API SCM scm_regexp_p (SCM x); -SCM_API SCM scm_regexp_exec (SCM rx, SCM str, SCM start, SCM flags); SCM_INTERNAL void scm_init_regex_posix (void); #endif /* SCM_REGEX_POSIX_H */ diff --git a/module/ice-9/deprecated.scm b/module/ice-9/deprecated.scm index bdf3beb75..64ceb7575 100644 --- a/module/ice-9/deprecated.scm +++ b/module/ice-9/deprecated.scm @@ -385,3 +385,33 @@ (make-promise thunk) (force promise)) (define-deprecated*/stx (ice-9 promises) delay) + +(cond-expand + ;; FIXME: Don't include this if there is no regexp support! + ((or regex guile) + (use-modules (ice-9 regex)) + + (define-deprecated-trampoline (((ice-9 regex) make-regexp) pat . flags) + (apply make-regexp pat flags)) + (define-deprecated-trampoline (((ice-9 regex) regexp?) x) + (regexp? x)) + (define-deprecated-trampoline (((ice-9 regex) regexp-exec) rx str #:optional (start 0) (flags 0)) + (regexp-exec rx str start flags)) + (define-deprecated*/stx (ice-9 regex) + regexp/basic + regexp/extended + regexp/icase + regexp/newline + regexp/notbol + regexp/noteol) + + (export (make-regexp* . make-regexp) + (regexp?* . regexp?) + (regexp-exec* . regexp-exec) + (regexp/basic* . regexp/basic) + (regexp/extended* . regexp/extended) + (regexp/icase* . regexp/icase) + (regexp/newline* . regexp/newline) + (regexp/notbol* . regexp/notbol) + (regexp/noteol* . regexp/noteol))) + (else)) diff --git a/module/ice-9/regex.scm b/module/ice-9/regex.scm index 08ae2c2f5..6d3cfb9e6 100644 --- a/module/ice-9/regex.scm +++ b/module/ice-9/regex.scm @@ -1,4 +1,4 @@ -;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006, 2008, 2010 Free Software Foundation, Inc. +;;;; Copyright (C) 1997, 1999, 2001, 2004, 2005, 2006, 2008, 2010, 2025 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -38,10 +38,34 @@ ;;;; POSIX regex support functions. (define-module (ice-9 regex) - #:export (match:count match:string match:prefix match:suffix - regexp-match? regexp-quote match:start match:end match:substring - string-match regexp-substitute fold-matches list-matches - regexp-substitute/global)) + ;; FIXME: #:export instead of #:replace when deprecated code removed. + #:replace (make-regexp + regexp? + regexp-exec + regexp/basic + regexp/extended + regexp/icase + regexp/newline + regexp/notbol + regexp/noteol) + #:export (fold-matches + list-matches + match:count + match:end + match:prefix + match:start + match:string + match:substring + match:suffix + regexp-match? + regexp-quote + regexp-substitute + regexp-substitute/global + string-match)) + +(eval-when (expand load eval) + (load-extension (string-append "libguile-" (effective-version)) + "scm_init_ice_9_regex")) ;; References: ;; diff --git a/module/ice-9/sandbox.scm b/module/ice-9/sandbox.scm index e633c9a99..3a441402b 100644 --- a/module/ice-9/sandbox.scm +++ b/module/ice-9/sandbox.scm @@ -590,7 +590,7 @@ allocation limit is exceeded, an exception will be thrown to the restricted-vector-sort!))) (define regexp-bindings - '(((guile) + '(((ice-9 regex) make-regexp regexp-exec regexp/basic diff --git a/module/scripts/read-scheme-source.scm b/module/scripts/read-scheme-source.scm index 1bca6a4c4..2831cd334 100644 --- a/module/scripts/read-scheme-source.scm +++ b/module/scripts/read-scheme-source.scm @@ -1,6 +1,6 @@ ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments -;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2006, 2011, 2025 Free Software Foundation, Inc. ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public License @@ -85,6 +85,7 @@ ;;; Code: (define-module (scripts read-scheme-source) + :use-module (ice-9 regex) :use-module (ice-9 rdelim) :export (read-scheme-source read-scheme-source-silently diff --git a/module/system/repl/server.scm b/module/system/repl/server.scm index af0d50ed5..aa88fd78a 100644 --- a/module/system/repl/server.scm +++ b/module/system/repl/server.scm @@ -24,6 +24,7 @@ #:use-module (system repl hooks) #:use-module (ice-9 threads) #:use-module (ice-9 rdelim) + #:use-module (ice-9 regex) #:use-module (ice-9 match) #:use-module (ice-9 iconv) #:use-module (rnrs bytevectors) diff --git a/module/texinfo/reflection.scm b/module/texinfo/reflection.scm index 8033eb047..4f609692d 100644 --- a/module/texinfo/reflection.scm +++ b/module/texinfo/reflection.scm @@ -1,6 +1,6 @@ ;;;; (texinfo reflection) -- documenting Scheme as stexinfo ;;;; -;;;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2009, 2010, 2011, 2025 Free Software Foundation, Inc. ;;;; Copyright (C) 2003,2004,2009 Andy Wingo <wingo at pobox dot com> ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -33,6 +33,7 @@ #:use-module (oop goops) #:use-module (texinfo) #:use-module (texinfo plain-text) + #:use-module (ice-9 regex) #:use-module (ice-9 session) #:use-module (ice-9 documentation) #:use-module ((sxml transform) #:select (pre-post-order)) diff --git a/test-suite/tests/r6rs-exceptions.test b/test-suite/tests/r6rs-exceptions.test index 8ad6bdca8..772e72836 100644 --- a/test-suite/tests/r6rs-exceptions.test +++ b/test-suite/tests/r6rs-exceptions.test @@ -1,6 +1,6 @@ ;;; r6rs-exceptions.test --- Test suite for R6RS (rnrs exceptions) -*- scheme -*- -;; Copyright (C) 2010, 2013 Free Software Foundation, Inc. +;; Copyright (C) 2010, 2013, 2025, 2025 Free Software Foundation, Inc. ;; ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -20,6 +20,7 @@ (define-module (test-suite test-rnrs-exceptions) :use-module ((rnrs conditions) :version (6)) :use-module ((rnrs exceptions) :version (6)) + :use-module (ice-9 regex) :use-module (system foreign) :use-module (test-suite lib)) diff --git a/test-suite/tests/srfi-10.test b/test-suite/tests/srfi-10.test index bc7a79157..0ae135bef 100644 --- a/test-suite/tests/srfi-10.test +++ b/test-suite/tests/srfi-10.test @@ -1,7 +1,7 @@ ;;;; srfi-10.test --- Test suite for Guile's SRFI-10 functions. -*- scheme -*- ;;;; Martin Grabmueller, 2001-05-10 ;;;; -;;;; Copyright (C) 2001, 2006, 2009 Free Software Foundation, Inc. +;;;; Copyright (C) 2001, 2006, 2009, 2025 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -19,6 +19,7 @@ (use-modules (srfi srfi-10) + (ice-9 regex) ((test-suite lib) #:select (pass-if with-test-prefix))) (define-reader-ctor 'rx make-regexp)