wingo pushed a commit to branch wip-whippet
in repository guile.

commit 7a1406891fbcf99e9d8fe3794bc7a816a1f6fb46
Author: Andy Wingo <wi...@pobox.com>
AuthorDate: Fri Jun 13 11:10:22 2025 +0200

    Move char-set-cursor implementation to Scheme
    
    Also deprecate the C interface.
    
    * libguile/deprecated.h:
    * libguile/deprecated.c (scm_char_set_cursor):
    (scm_char_set_ref):
    (scm_char_set_cursor_next):
    (scm_end_of_char_set_p): Deprecate.
    * libguile/srfi-14.c (charset-mutable?, char-set-ranges)
    (charset-set-ranges!): New accessors, exposed internally to srfi-14.scm.
    * libguile/srfi-14.c (scm_boot_srfi_14): Remove scm_tc16_charset_cursor.
    * module/srfi/srfi-14.scm (<char-set-cursor>): Implement as a record.
---
 libguile/deprecated.c   |  55 ++++++++++++++
 libguile/deprecated.h   |   5 ++
 libguile/srfi-14.c      | 194 ++++++++++--------------------------------------
 libguile/srfi-14.h      |   4 -
 module/srfi/srfi-14.scm |  82 ++++++++++++++++++++
 5 files changed, 182 insertions(+), 158 deletions(-)

diff --git a/libguile/deprecated.c b/libguile/deprecated.c
index 7da74fb8a..2423410f1 100644
--- a/libguile/deprecated.c
+++ b/libguile/deprecated.c
@@ -417,6 +417,61 @@ scm_array_cell_set_x (SCM array, SCM val, SCM indices)
 
 
 
+static SCM char_set_cursor_var;
+static SCM char_set_ref_var;
+static SCM char_set_cursor_next_var;
+static SCM end_of_char_set_p_var;
+
+static void
+init_char_set_cursor_vars (void)
+{
+  char_set_cursor_var = scm_c_public_lookup ("srfi srfi-14", 
"char-set-cursor");
+  char_set_ref_var = scm_c_public_lookup ("srfi srfi-14", "char-set-ref");
+  char_set_cursor_next_var = scm_c_public_lookup ("srfi srfi-14", 
"char-set-cursor-next");
+  end_of_char_set_p_var = scm_c_public_lookup ("srfi srfi-14", 
"end-of-char-set?");
+}
+
+static void
+init_char_set_cursor_functions (void)
+{
+  static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT;
+  scm_c_issue_deprecation_warning
+    ("Using the char set cursor functions from C is deprecated.  Invoke"
+     "char-set-cursor, etc. from (srfi srfi-14) instead.");
+  scm_i_pthread_once (&once, init_char_set_cursor_vars);
+}
+
+SCM
+scm_char_set_cursor (SCM cs)
+{
+  init_char_set_cursor_functions ();
+  return scm_call_1 (scm_variable_ref (char_set_cursor_var), cs);
+}
+
+SCM
+scm_char_set_ref (SCM cs, SCM cursor)
+{
+  init_char_set_cursor_functions ();
+  return scm_call_2 (scm_variable_ref (char_set_ref_var), cs, cursor);
+}
+
+SCM
+scm_char_set_cursor_next (SCM cs, SCM cursor)
+{
+  init_char_set_cursor_functions ();
+  return scm_call_2 (scm_variable_ref (char_set_cursor_next_var), cs, cursor);
+}
+
+SCM
+scm_end_of_char_set_p (SCM cursor)
+{
+  init_char_set_cursor_functions ();
+  return scm_call_1 (scm_variable_ref (end_of_char_set_p_var), cursor);
+}
+
+
+
+
 void
 scm_i_init_deprecated ()
 {
diff --git a/libguile/deprecated.h b/libguile/deprecated.h
index bade32244..905792970 100644
--- a/libguile/deprecated.h
+++ b/libguile/deprecated.h
@@ -75,6 +75,11 @@ SCM_DEPRECATED SCM scm_array_slice_for_each_in_order (SCM 
frank, SCM op, SCM arg
 SCM_DEPRECATED SCM scm_array_cell_ref (SCM array, SCM indices);
 SCM_DEPRECATED SCM scm_array_cell_set_x (SCM array, SCM val, SCM indices);
 
+SCM_DEPRECATED SCM scm_char_set_cursor (SCM cs);
+SCM_DEPRECATED SCM scm_char_set_ref (SCM cs, SCM cursor);
+SCM_DEPRECATED SCM scm_char_set_cursor_next (SCM cs, SCM cursor);
+SCM_DEPRECATED SCM scm_end_of_char_set_p (SCM cursor);
+
 /* Deprecated declarations go here.  */
 
 void scm_i_init_deprecated (void);
diff --git a/libguile/srfi-14.c b/libguile/srfi-14.c
index f38e305a9..265d8914b 100644
--- a/libguile/srfi-14.c
+++ b/libguile/srfi-14.c
@@ -63,12 +63,6 @@ struct scm_charset
   struct scm_bytevector *ranges;
 };
 
-typedef struct
-{
-  size_t range;
-  scm_t_wchar n;
-} scm_t_char_set_cursor;
-
 static inline struct scm_charset*
 scm_to_charset (SCM scm)
 {
@@ -680,29 +674,6 @@ scm_i_print_char_set (SCM charset, SCM port, 
scm_print_state *pstate SCM_UNUSED)
   return 1;
 }
 
-/* Smob print hook for character sets cursors.  */
-int scm_tc16_charset_cursor = 0;
-static int
-charset_cursor_print (SCM cursor, SCM port,
-                      scm_print_state *pstate SCM_UNUSED)
-{
-  scm_t_char_set_cursor *cur;
-
-  cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
-
-  scm_puts ("#<charset-cursor ", port);
-  if (cur->range == (size_t) (-1))
-    scm_puts ("(empty)", port);
-  else
-    {
-      scm_write (scm_from_size_t (cur->range), port);
-      scm_puts (":", port);
-      scm_write (scm_from_int32 (cur->n), port);
-    }
-  scm_puts (">", port);
-  return 1;
-}
-
 SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
             (SCM obj),
            "Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
@@ -713,6 +684,46 @@ SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
 }
 #undef FUNC_NAME
 
+SCM_DEFINE_STATIC (scm_charset_mutable_p, "charset-mutable?", 1, 0, 0,
+                   (SCM cs),
+                   "Return @code{#t} if the character set @var{cs} is 
mutable,\n"
+                   "@code{#f} otherwise.")
+#define FUNC_NAME s_scm_char_set_p
+{
+  SCM_VALIDATE_CHARSET (1, cs);
+  return scm_from_bool (!charset_is_immutable (cs));
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_STATIC (scm_charset_ranges, "charset-ranges", 1, 0, 0,
+                   (SCM cs),
+                   "Return the {lo, hi} packed sorted array of inclusive\n"
+                   "ranges of the character set @var{cs}, as a u32vector.")
+#define FUNC_NAME s_scm_char_set_p
+{
+  SCM_VALIDATE_CHARSET (1, cs);
+  return scm_from_bytevector (scm_to_charset (cs)->ranges);
+}
+#undef FUNC_NAME
+
+SCM_DEFINE_STATIC (scm_charset_set_ranges_x, "charset-set-ranges!", 2, 0, 0,
+                   (SCM cs, SCM ranges),
+                   "Replace the {lo, hi} packed sorted array of inclusive\n"
+                   "ranges of the character set @var{cs} with the given\n"
+                   "u32vector.")
+#define FUNC_NAME s_scm_char_set_p
+{
+  SCM_VALIDATE_MUTABLE_CHARSET (1, cs);
+  SCM_VALIDATE_BYTEVECTOR (2, ranges);
+  struct scm_bytevector *bv = scm_to_bytevector (ranges);
+  SCM_ASSERT_TYPE
+    ((bv->length % sizeof (scm_t_char_range)) == 0
+     && scm_bytevector_element_type (bv) == SCM_ARRAY_ELEMENT_TYPE_U32,
+     ranges, 2, FUNC_NAME, "char ranges");
+  scm_to_charset (cs)->ranges = bv;
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
 
 int
 scm_i_char_sets_equal (SCM a, SCM b)
@@ -817,128 +828,6 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
 #undef FUNC_NAME
 
 
-SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
-            (SCM cs), "Return a cursor into the character set @var{cs}.")
-#define FUNC_NAME s_scm_char_set_cursor
-{
-  struct scm_charset *cs_data;
-  scm_t_char_set_cursor *cur_data;
-
-  SCM_VALIDATE_CHARSET (1, cs);
-  cs_data = scm_to_charset (cs);
-  cur_data =
-    (scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
-                                             "charset-cursor");
-  if (charset_len (cs_data) == 0)
-    {
-      cur_data->range = (size_t) (-1);
-      cur_data->n = 0;
-    }
-  else
-    {
-      cur_data->range = 0;
-      cur_data->n = charset_range_lo (cs_data, 0);
-    }
-  SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
-}
-#undef FUNC_NAME
-
-
-SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
-            (SCM cs, SCM cursor),
-            "Return the character at the current cursor position\n"
-            "@var{cursor} in the character set @var{cs}.  It is an error to\n"
-            "pass a cursor for which @code{end-of-char-set?} returns true.")
-#define FUNC_NAME s_scm_char_set_ref
-{
-  struct scm_charset *cs_data;
-  scm_t_char_set_cursor *cur_data;
-  size_t i;
-
-  SCM_VALIDATE_CHARSET (1, cs);
-  SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
-
-  cs_data = scm_to_charset (cs);
-  cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
-
-  /* Validate that this cursor is still true.  */
-  i = cur_data->range;
-  if (i == (size_t) (-1)
-      || i >= charset_len (cs_data)
-      || cur_data->n < charset_range_lo (cs_data, i)
-                                       || cur_data->n > charset_range_hi 
(cs_data, i))
-    SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
-  return SCM_MAKE_CHAR (cur_data->n);
-}
-#undef FUNC_NAME
-
-
-SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
-            (SCM cs, SCM cursor),
-            "Advance the character set cursor @var{cursor} to the next\n"
-            "character in the character set @var{cs}.  It is an error if the\n"
-            "cursor given satisfies @code{end-of-char-set?}.")
-#define FUNC_NAME s_scm_char_set_cursor_next
-{
-  struct scm_charset *cs_data;
-  scm_t_char_set_cursor *cur_data;
-  size_t i;
-
-  SCM_VALIDATE_CHARSET (1, cs);
-  SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
-
-  cs_data = scm_to_charset (cs);
-  cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
-
-  /* Validate that this cursor is still true.  */
-  i = cur_data->range;
-  if (i == (size_t) (-1)
-      || i >= charset_len (cs_data)
-      || !charset_range_contains (cs_data, i, cur_data->n))
-    SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
-  /* Increment the cursor.  */
-  if (cur_data->n == charset_range_hi (cs_data, i))
-    {
-      if (i + 1 < charset_len (cs_data))
-        {
-          cur_data->range = i + 1;
-          cur_data->n = charset_range_lo (cs_data, i + 1);
-        }
-      else
-        {
-          /* This is the end of the road.  */
-          cur_data->range = (size_t) (-1);
-          cur_data->n = 0;
-        }
-    }
-  else
-    {
-      cur_data->n = cur_data->n + 1;
-    }
-
-  return cursor;
-}
-#undef FUNC_NAME
-
-
-SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
-            (SCM cursor),
-            "Return @code{#t} if @var{cursor} has reached the end of a\n"
-            "character set, @code{#f} otherwise.")
-#define FUNC_NAME s_scm_end_of_char_set_p
-{
-  scm_t_char_set_cursor *cur_data;
-  SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
-
-  cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
-  if (cur_data->range == (size_t) (-1))
-    return SCM_BOOL_T;
-
-  return SCM_BOOL_F;
-}
-#undef FUNC_NAME
-
-
 SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
             (SCM kons, SCM knil, SCM cs),
             "Fold the procedure @var{kons} over the character set @var{cs},\n"
@@ -2079,9 +1968,6 @@ scm_boot_srfi_14 (void)
   empty_charset_ranges =
     scm_i_make_typed_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_U32);
 
-  scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
-  scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
-
   FOR_EACH_STANDARD_CHARSET (DEFINE_C_CHARSET);
 
   scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
diff --git a/libguile/srfi-14.h b/libguile/srfi-14.h
index 2cc5aa8f5..3496ba46d 100644
--- a/libguile/srfi-14.h
+++ b/libguile/srfi-14.h
@@ -35,10 +35,6 @@ SCM_API SCM scm_char_set_p (SCM obj);
 SCM_API SCM scm_char_set_eq (SCM char_sets);
 SCM_API SCM scm_char_set_leq (SCM char_sets);
 SCM_API SCM scm_char_set_hash (SCM cs, SCM bound);
-SCM_API SCM scm_char_set_cursor (SCM cs);
-SCM_API SCM scm_char_set_ref (SCM cs, SCM cursor);
-SCM_API SCM scm_char_set_cursor_next (SCM cs, SCM cursor);
-SCM_API SCM scm_end_of_char_set_p (SCM cursor);
 SCM_API SCM scm_char_set_fold (SCM kons, SCM knil, SCM cs);
 SCM_API SCM scm_char_set_unfold (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);
 SCM_API SCM scm_char_set_unfold_x (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);
diff --git a/module/srfi/srfi-14.scm b/module/srfi/srfi-14.scm
index 97dcdf328..4fad5b721 100644
--- a/module/srfi/srfi-14.scm
+++ b/module/srfi/srfi-14.scm
@@ -25,6 +25,9 @@
 (define-module (srfi srfi-14)
   ;; FIXME: Use #:export instead of #:replace once deprecated bindings
   ;; are removed.
+  #:use-module (ice-9 match)
+  #:use-module (srfi srfi-9)
+  #:use-module (rnrs bytevectors)
   #:replace (;; General procedures
              char-set?
              char-set=
@@ -98,6 +101,85 @@
   (load-extension (string-append "libguile-" (effective-version))
                   "scm_init_srfi_14"))
 
+(define-record-type <char-set-cursor>
+  (make-char-set-cursor cs range n limit)
+  char-set-cursor?
+  (cs char-set-cursor-charset)
+  (range char-set-cursor-range set-char-set-cursor-range!)
+  (n char-set-cursor-n set-char-set-cursor-n!)
+  (limit char-set-cursor-limit set-char-set-cursor-limit!))
+
+(define (charset-ranges-len ranges)
+  (ash (bytevector-length ranges) -3))
+
+(define (charset-range-lo ranges idx)
+  (bytevector-u32-native-ref ranges (ash idx 3)))
+(define (charset-range-hi ranges idx)
+  (bytevector-u32-native-ref ranges (+ (ash idx 3) 4)))
+
+(define (char-set-cursor cs)
+  "Return a cursor into the character set @var{cs}."
+  (let* ((ranges (charset-ranges cs))
+         (len (charset-ranges-len ranges)))
+    (if (zero? len)
+        (make-char-set-cursor cs #f #f #f)
+        (make-char-set-cursor cs 0 (charset-range-lo ranges 0)
+                              (charset-range-hi ranges 0)))))
+
+(define (char-set-ref cs cursor)
+  "Return the character at the current cursor position @var{cursor} in the
+character set @var{cs}.  It is an error to pass a cursor for which
+@code{end-of-char-set?} returns true."
+  (match cursor
+    (($ <char-set-cursor> cs* range n limit)
+     (unless (eq? cs cs*)
+       (error "charset cursors can only be used with their original charsets"
+              cursor))
+     (unless n
+       (error "char-set-ref on cursor that is end-of-char-set?" cursor))
+     (integer->char n))
+    (_
+     (scm-error 'wrong-type-arg "char-set-ref" "Wrong type argument: ~S" 
+               (list cursor)
+               '()))))
+
+(define (char-set-cursor-next cs cursor)
+  "Advance the character set cursor @var{cursor} to the next character in
+the character set @var{cs}.  It is an error if the cursor given
+satisfies @code{end-of-char-set?}."
+  (match cursor
+    (($ <char-set-cursor> cs* range n limit)
+     (unless (eq? cs cs*)
+       (error "charset cursors can only be used with their original charsets"
+              cursor))
+     (unless n
+       (error "char-set-next on cursor that is end-of-char-set?" cursor))
+     (cond
+      ((< n limit)
+       (set-char-set-cursor-n! cursor (1+ n)))
+      (else
+       (let* ((ranges (charset-ranges cs))
+              (len (charset-ranges-len ranges)))
+         (cond
+          ((< (1+ range) len)
+           (set-char-set-cursor-range! cursor (1+ range))
+           (set-char-set-cursor-n! cursor (charset-range-lo cs range))
+           (set-char-set-cursor-limit! cursor (charset-range-hi cs range)))
+          (else
+           (set-char-set-cursor-range! cursor #f)
+           (set-char-set-cursor-n! cursor #f)
+           (set-char-set-cursor-limit! cursor #f))))))
+     cursor)
+    (_
+     (scm-error 'wrong-type-arg "char-set-ref" "Wrong type argument: ~S" 
+               (list cursor)
+               '()))))
+
+(define (end-of-char-set? cursor)
+  "Return @code{#t} if @var{cursor} has reached the end of a character set,
+@code{#f} otherwise."
+  (not (char-set-cursor-range cursor)))
+
 (cond-expand-provide (current-module) '(srfi-14))
 
 ;;; srfi-14.scm ends here

Reply via email to