lloda pushed a commit to branch main in repository guile. commit 79c22b16752e70a9928c58626bd81e6b2f90d365 Author: Rob Browning <r...@defaultvalue.org> AuthorDate: Thu Apr 10 14:08:22 2025 -0500
Add (srfi srfi-207) with read-textual-bytestring; enable tests https://srfi.schemers.org/srfi-207/srfi-207.html Add a (srfi srfi-207) module to integrate the upstream code and provide the remaining elements like the parser which will eventually be moved to and shared with the reader in (ice-9 read). Rewrite the relevant functions to avoid needing list->generator, u8vector-for-each, and u8vector-unfold. * am/bootstrap.am: Add srfi-207 include-related deps. (SOURCES): Add srfi-207.scm. * module/srfi/srfi-207.scm: Add (srfi srfi-207) module. * module/srfi/srfi-207/upstream/bytestrings-impl.scm (hex-string->bytevector): Replace u8vector-unfold with loop. (make-bytestring-generator): Drop list->generator. (write-textual-bytestring): drop u8vector-for-each. * test-suite/Makefile.am: Add srfi-207.test. * test-suite/tests/srfi-207.test: port to (test-suite lib). --- am/bootstrap.am | 6 + module/srfi/srfi-207.scm | 220 +++++++++++++++++++++ module/srfi/srfi-207/upstream/bytestrings-impl.scm | 99 +++++----- test-suite/Makefile.am | 1 + test-suite/tests/srfi-207.test | 155 ++++++++------- 5 files changed, 361 insertions(+), 120 deletions(-) diff --git a/am/bootstrap.am b/am/bootstrap.am index 8faed0934..bec34ee1f 100644 --- a/am/bootstrap.am +++ b/am/bootstrap.am @@ -59,6 +59,11 @@ ice-9/match.go: ice-9/match.scm ice-9/match.upstream.scm ice-9/psyntax-pp.go: ice-9/psyntax.scm ice-9/psyntax-pp.scm $(COMPILE) -o "$@" "$<" +srfi/srfi-207.go: \ + srfi/srfi-207.scm \ + srfi/srfi-207/upstream/base64.scm \ + srfi/srfi-207/upstream/bytestrings-impl.scm + # All sources. We can compile these in any order; the order below is # designed to hopefully result in the lowest total compile time. SOURCES = \ @@ -352,6 +357,7 @@ SOURCES = \ srfi/srfi-171/gnu.scm \ srfi/srfi-171/meta.scm \ srfi/srfi-197.scm \ + srfi/srfi-207.scm \ \ statprof.scm \ \ diff --git a/module/srfi/srfi-207.scm b/module/srfi/srfi-207.scm new file mode 100644 index 000000000..b77016f3c --- /dev/null +++ b/module/srfi/srfi-207.scm @@ -0,0 +1,220 @@ +;;;; SRFI 207: String-notated bytevectors +;;;; +;;;; Copyright (C) 2025 Free Software Foundation, Inc. +;;;; +;;;; This library is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public License +;;;; as published by the Free Software Foundation, either version 3 of +;;;; the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, but +;;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this program. If not, see +;;;; <http://www.gnu.org/licenses/>. + +;;; Commentary: +;;; +;;; This is an implementation of SRFI 207: String-notated bytevectors. +;;; +;;; Code: + +(define-module (srfi srfi-207) + #:use-module ((ice-9 exceptions) + #:select (&error + define-exception-type + make-exception-with-message + make-exception-with-irritants)) + #:use-module ((rnrs arithmetic bitwise) #:select (bitwise-and bitwise-ior)) + #:use-module ((rnrs bytevectors) + #:select (bytevector->u8-list u8-list->bytevector)) + #:use-module ((rnrs io ports) #:select (string->bytevector)) + #:use-module ((scheme base) + #:select (binary-port? + bytevector + bytevector-copy + bytevector-copy! + bytevector-length + bytevector-u8-ref + bytevector-u8-set! + bytevector? + define-record-type + eof-object + get-output-bytevector + let-values + make-bytevector + open-output-bytevector + read-string + utf8->string + write-bytevector + write-string + write-u8)) + #:use-module ((srfi srfi-1) + #:select (fold list-tabulate fold-right unfold unfold-right)) + #:use-module ((srfi srfi-43) #:select (vector-unfold)) + #:use-module ((srfi srfi-60) #:select (arithmetic-shift bit-field)) + #:export (base64->bytevector + bytestring + bytestring->list + bytestring-break + bytestring-error? + bytestring-index + bytestring-index-right + bytestring-join + bytestring-pad + bytestring-pad-right + bytestring-replace + bytestring-span + bytestring-split + bytestring-trim + bytestring-trim-both + bytestring-trim-right + bytestring<=? + bytestring<? + bytestring>=? + bytestring>? + bytevector->base64 + bytevector->hex-string + hex-string->bytevector + make-bytestring + make-bytestring! + make-bytestring-generator + read-textual-bytestring + write-binary-bytestring + write-textual-bytestring)) + +(cond-expand-provide (current-module) '(srfi-207)) + +;; From the upstream 207.sld library definition +(define-syntax assume + (syntax-rules () + ((_ pred) (unless pred (error "invalid assumption:" (quote pred)))) + ((_ pred msg ...) (unless pred (error msg ...))))) + +(define-exception-type &bytestring-error &error + make-bytestring-error bytestring-error?) + +(define (bytestring-error message . irritants) + (raise-exception (make-exception (make-bytestring-error) + (make-exception-with-message message) + (make-exception-with-irritants irritants)))) + +(include-from-path "srfi/srfi-207/upstream/base64.scm") +(include-from-path "srfi/srfi-207/upstream/bytestrings-impl.scm") + +(define (read-bytestring-content port) + ;; Must use port, not (peek)/(next). + (let ((ch (read-char port))) + (when (eof-object? ch) + (bytestring-error "end of input instead of bytestring opening #\\\"")) + (unless (eqv? ch #\") + (bytestring-error "expected bytestring opening #\\\"" ch))) + (let lp ((out '())) + (let ((ch (read-char port))) + (cond + ((eof-object? ch) + (bytestring-error "unexpected end of input while reading bytestring")) + ((eqv? ch #\") + (list->typed-array 'vu8 1 (reverse! out))) + ((eqv? ch #\\) + (let* ((ch (read-char port))) + (when (eof-object? ch) + (bytestring-error "unexpected end of input within escape sequence")) + (case ch + ((#\a) (lp (cons 7 out))) + ((#\b) (lp (cons 8 out))) + ((#\t) (lp (cons 9 out))) + ((#\n) (lp (cons 10 out))) + ((#\r) (lp (cons 13 out))) + ((#\") (lp (cons 34 out))) + ((#\\) (lp (cons 92 out))) + ((#\|) (lp (cons 124 out))) + ((#\x) + (define (skip-prefix-zeros) + ;; Leave one zero before a ; to handle \x0; + (let ((ch (peek-char port))) + (cond + ((eof-object? ch) ch) + ((char=? ch #\0) + (let ((zero (read-char port))) + (if (char=? (peek-char port) #\;) + (unread-char zero port) + (skip-prefix-zeros))))))) + (define (read-hex which) + (let* ((h (read-char port))) + (when (eof-object? h) + (bytestring-error + (format #f "end of input at ~s bytestring hex escape char" which))) + (case h + ((#\;) h) + ((#\0) 0) + ((#\1) 1) + ((#\2) 2) + ((#\3) 3) + ((#\4) 4) + ((#\5) 5) + ((#\6) 6) + ((#\7) 7) + ((#\8) 8) + ((#\9) 9) + ((#\a #\A) 10) + ((#\b #\B) 11) + ((#\c #\C) 12) + ((#\d #\D) 13) + ((#\e #\E) 14) + ((#\f #\F) 15) + (else + (bytestring-error + (format #f "non-hex ~a character in bytestring hex escape" which) + h))))) + (skip-prefix-zeros) + (let* ((h1 (read-hex "first")) + (h2 (read-hex "second"))) + (if (eqv? h2 #\;) + (lp (cons h1 out)) + (let ((term (read-char port))) + + (unless (char=? term #\;) + (bytestring-error "not bytestring hex escape semicolon" term)) + (lp (cons (+ (* 16 h1) h2) out)))))) + (else ;; newline surrounded by optional interline blanks + (define (intraline? ch) + (and (char-whitespace? ch) (not (char=? ch #\newline)))) + (define (skip-intraline) + (let ((ch (peek-char port))) + (when (and (not (eof-object? ch)) (intraline? ch)) + (read-char port) + (skip-intraline)))) + (cond + ((char=? ch #\newline) (skip-intraline) (lp out)) + ((char-whitespace? ch) + (skip-intraline) + (unless (char=? (read-char port) #\newline) + (bytestring-error "expected newline after backslash and optional spaces" ch)) + (skip-intraline) + (lp out)) + (else + (bytestring-error "unexpected character after bytesstring backslash" ch))))))) + (else + (let ((i (char->integer ch))) + (unless (<= 20 i 127) + (bytestring-error "bytestring char not in valid ASCII range" ch)) + (lp (cons i out)))))))) + +(define read-textual-bytestring + (case-lambda + ((prefix) (read-textual-bytestring prefix (current-input-port))) + ((prefix in) + (unless (boolean? prefix) + (scm-error 'wrong-type-arg "read-textual-bytestring" + "Non-boolean prefix argument: ~s" (list prefix) (list prefix))) + (when prefix + (let ((s (read-string 3 in))) + (cond ((eof-object? s) + (bytestring-error "end of input within bytestring content")) + ((string=? s "#u8") #t) + (else (bytestring-error "invalid bytestring prefix" s))))) + (read-bytestring-content in)))) diff --git a/module/srfi/srfi-207/upstream/bytestrings-impl.scm b/module/srfi/srfi-207/upstream/bytestrings-impl.scm index d30424867..fb8908b60 100644 --- a/module/srfi/srfi-207/upstream/bytestrings-impl.scm +++ b/module/srfi/srfi-207/upstream/bytestrings-impl.scm @@ -120,20 +120,21 @@ (integer->hex-string (bytevector-u8-ref bv i)))))) (define (hex-string->bytevector hex-str) - (assume (string? hex-str)) - (let ((len (string-length hex-str))) - (unless (even? len) + (unless (string? hex-str) + (bytestring-error "invalid hex-str argument" hex-str)) + (let ((sn (string-length hex-str))) + (unless (even? sn) (bytestring-error "incomplete hexadecimal string" hex-str)) - (u8vector-unfold - (lambda (_ i) - (let* ((end (+ i 2)) - (s (substring hex-str i end)) - (n (string->number s 16))) - (if n - (values n end) - (bytestring-error "invalid hexadecimal sequence" s)))) - (truncate-quotient len 2) - 0))) + (let* ((result (make-bytevector (/ sn 2)))) + (do ((si 0 (+ si 2)) + (vi 0 (1+ vi))) + ((= si sn) result) + (let* ((s (substring hex-str si (+ si 2))) + (n (string->number s 16))) + (unless n + (bytestring-error "invalid hexadecimal sequence in hex-str" + s hex-str)) + (bytevector-u8-set! result vi n)))))) (define bytevector->base64 (case-lambda @@ -176,26 +177,36 @@ (lambda (i) (+ i 1)) start)))) -;; Lazily generate the bytestring constructed from objs. -(define (make-bytestring-generator . objs) - (list->generator (flatten-bytestring-segments objs))) - -;; Convert and flatten chars and strings, and flatten bytevectors -;; to yield a flat list of bytes. -(define (flatten-bytestring-segments objs) - (fold-right - (lambda (x res) - (cond ((and (exact-natural? x) (< x 256)) (cons x res)) - ((and (char? x) (char<=? x #\delete)) - (cons (char->integer x) res)) +(define (make-bytestring-generator . args) + "Return a thunk that returns the consecutive bytes, one per +invocation, of the bytevector that (apply bytestring args) would +produce. The elements of args are validated before +make-bytestring-generator returns, and if invalid, an error satisfying +bytestring-error? is raised." + (define (generate) + (if (null? args) + (eof-object) + (let ((x (car args))) + (cond + ((integer? x) + (set! args (cdr args)) + x) + ((char? x) + (set! args (cdr args)) + (char->integer x)) ((bytevector? x) - (append (bytevector->u8-list x) res)) - ((string-ascii? x) - (append (map char->integer (string->list x)) res)) + (set! args (append! (bytevector->u8-list x) (cdr args))) + (generate)) + ((string? x) + (set! args (append! (string->list x) (cdr args))) + (generate)) (else - (bytestring-error "invalid bytestring segment" x)))) - '() - objs)) + (bytestring-error "invalid bytestring segment" x)))))) + (for-each (λ (arg) + (or (valid-bytestring-segment? arg) + (bytestring-error "invalid bytestring segment" arg))) + args) + generate) ;;;; Selection @@ -475,19 +486,19 @@ ((bstring port) (parameterize ((current-output-port port)) (write-string "#u8\"") - (u8vector-for-each - (lambda (b) - (cond ((assv b backslash-codepoints) => - (lambda (p) - (write-char #\\) - (write-char (cdr p)))) - ((and (>= b #x20) (<= b #x7e)) - (write-char (integer->char b))) - (else - (write-string "\\x") - (write-string (number->string b 16)) - (write-char #\;)))) - bstring) + (do ((i 0 (1+ i))) + ((= i (bytevector-length bstring))) + (let ((b (bytevector-u8-ref bstring i))) + (cond ((assv b backslash-codepoints) => + (lambda (p) + (write-char #\\) + (write-char (cdr p)))) + ((and (>= b #x20) (<= b #x7e)) + (write-char (integer->char b))) + (else + (write-string "\\x") + (write-string (number->string b 16)) + (write-char #\;))))) (write-char #\"))))) (define (write-binary-bytestring port . args) diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index ecf4a3175..492214845 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -162,6 +162,7 @@ SCM_TESTS = tests/00-initial-env.test \ tests/srfi-119.test \ tests/srfi-171.test \ tests/srfi-197.sr64 \ + tests/srfi-207.test \ tests/srfi-4.test \ tests/srfi-9.test \ tests/statprof.test \ diff --git a/test-suite/tests/srfi-207.test b/test-suite/tests/srfi-207.test index b5c55cbf7..8f21b1d73 100644 --- a/test-suite/tests/srfi-207.test +++ b/test-suite/tests/srfi-207.test @@ -1,4 +1,7 @@ +;;;; srfi-207.test --- SRFI 207 test suite -*- scheme -*- + ;;; Copyright (C) 2020 Wolfgang Corcoran-Mathe +;;; Copyright (C) 2025 Free Software Foundation, Inc. ;;; ;;; Permission is hereby granted, free of charge, to any person obtaining a ;;; copy of this software and associated documentation files (the @@ -19,63 +22,65 @@ ;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -(import (scheme base)) -(import (scheme write)) -(import (srfi 207)) -(import (only (srfi 1) list-tabulate every)) - -(cond-expand - ((library (srfi 158)) - (import (only (srfi 158) generator->list))) - (else - (begin - (define (generator->list gen) - (let rec ((x (gen))) - (if (eof-object? x) - '() - (cons x (rec (gen))))))))) - -(cond-expand - ((library (srfi 78)) - (import (srfi 78))) - (else - (begin - (define *tests-failed* 0) - (define-syntax check - (syntax-rules (=>) - ((check expr => expected) - (if (equal? expr expected) - (begin - (display 'expr) - (display " => ") - (display expected) - (display " ; correct") - (newline)) - (begin - (set! *tests-failed* (+ *tests-failed* 1)) - (display "FAILED: for ") - (display 'expr) - (display " expected ") - (display expected) - (display " but got ") - (display expr) - (newline)))))) - (define (check-report) - (if (zero? *tests-failed*) - (begin - (display "All tests passed.") - (newline)) - (begin - (display "TESTS FAILED: ") - (display *tests-failed*) - (newline))))))) +(define-module (srfi-207-test) + #:use-module ((srfi srfi-1) #:select (every list-tabulate)) + #:use-module ((srfi srfi-207) + #:select (base64->bytevector + bytestring + bytestring->list + bytestring-break + bytestring-error? + bytestring-index + bytestring-index-right + bytestring-join + bytestring-pad + bytestring-pad-right + bytestring-replace + bytestring-span + bytestring-split + bytestring-trim + bytestring-trim-both + bytestring-trim-right + bytestring<=? + bytestring<? + bytestring>=? + bytestring>? + bytevector->base64 + bytevector->hex-string + hex-string->bytevector + make-bytestring + make-bytestring! + make-bytestring-generator + read-textual-bytestring + write-binary-bytestring + write-textual-bytestring)) + #:use-module ((srfi srfi-34) #:select (guard)) + #:use-module ((test-suite lib) + #:select (pass-if-equal pass-if-exception with-test-prefix)) + #:use-module ((scheme base) + #:select (bytevector + bytevector-length + bytevector-u8-ref + get-output-bytevector + make-bytevector + open-output-bytevector + utf8->string))) + +;; No srfi-158 +(define (generator->list gen) + (let rec ((x (gen))) + (if (eof-object? x) + '() + (cons x (rec (gen)))))) + +(define-syntax check + (syntax-rules (=>) + ((check expr => expected) + (pass-if-equal expected expr)))) ;;;; Utility -(define (print-header message) - (newline) - (display (string-append ";;; " message)) - (newline)) +(define (print-header message) #t) (define-syntax constantly (syntax-rules () @@ -96,7 +101,9 @@ (syntax-rules () ((_ expr) (guard (condition ((bytestring-error? condition) 'bytestring-error) - (else #f)) + (else + (format (current-error-port) "exception: ~s\n" condition) + #f)) expr)))) ;; Testing shorthand for write-binary-bytestring. @@ -128,16 +135,19 @@ (define test-bstring (bytestring "lorem")) (define homer - (bytestring "The Man, O Muse, informe, who many a way / \ - Wound in his wisedome to his wished stay;")) + (bytestring + (string-append "The Man, O Muse, informe, who many a way / " + "Wound in his wisedome to his wished stay;"))) (define homer64 - "VGhlIE1hbiwgTyBNdXNlLCBpbmZvcm1lLCB3aG8gbWFueSBhIHdheSAvIFdvd\ - W5kIGluIGhpcyB3aXNlZG9tZSB0byBoaXMgd2lzaGVkIHN0YXk7") + (string-append + "VGhlIE1hbiwgTyBNdXNlLCBpbmZvcm1lLCB3aG8gbWFueSBhIHdheSAvIFdvd" + "W5kIGluIGhpcyB3aXNlZG9tZSB0byBoaXMgd2lzaGVkIHN0YXk7")) (define homer64-w - "VGhlIE1hb iwgTyBNdXNlL CBpbmZvcm1lL\nCB3aG8gbWF\tueSBhIH\rdheSAvIFdvd\ - W5kIGluI GhpcyB 3aXNlZ\t\t\nG9tZSB0b yBoaXMgd\t2lzaGVkIHN0YXk7") + (string-append + "VGhlIE1hb iwgTyBNdXNlL CBpbmZvcm1lL\nCB3aG8gbWF\tueSBhIH\rdheSAvIFdvd" + "W5kIGluI GhpcyB 3aXNlZ\t\t\nG9tZSB0b yBoaXMgd\t2lzaGVkIHN0YXk7")) ;;;; Constructors @@ -169,14 +179,14 @@ (check (bytevector->base64 test-bstring) => "bG9yZW0=") (check (bytevector->base64 #u8(#xff #xef #xff)) => "/+//") (check (bytevector->base64 #u8(#xff #xef #xff) "*@") => "@*@@") - (check (equal? (bytevector->base64 homer) homer64) => #t) + (check (bytevector->base64 homer) => homer64) (check (bytevector->base64 #u8(1)) => "AQ==") (check (bytevector->base64 #u8()) => "") (check (base64->bytevector "bG9yZW0=") => test-bstring) (check (base64->bytevector "/+//") => #u8(#xff #xef #xff)) (check (base64->bytevector "@*@@" "*@") => #u8(#xff #xef #xff)) - (check (equal? (base64->bytevector homer64) homer) => #t) - (check (equal? (base64->bytevector homer64-w) homer) => #t) + (check (base64->bytevector homer64) => homer) + (check (base64->bytevector homer64-w) => homer) (check (base64->bytevector "AQ==") => #u8(1)) (check (base64->bytevector "") => #u8()) (check (base64->bytevector "\n\n\n==\t\r\n") => #u8()) @@ -215,8 +225,7 @@ (check (catch-bytestring-error (make-bytestring-generator "λ" #\m #\u)) => 'bytestring-error) (check (catch-bytestring-error (make-bytestring-generator 89 90 300)) - => 'bytestring-error) -) + => 'bytestring-error)) (define (check-selection) (print-header "Running selection tests...") @@ -268,8 +277,7 @@ (check (bytestring-replace bv1 bv2 2 2 0 5) => (bytestring "food food"))) (let ((bv1 (bytestring "food food"))) (check (bytestring-replace bv1 (bytevector) 2 7 0 0) - => (bytestring "food"))) -) + => (bytestring "food")))) (define (check-comparison) (define short-bstring (bytestring "lore")) @@ -292,8 +300,7 @@ (check (bytestring>=? test-bstring short-bstring) => #t) (check (bytestring>=? test-bstring mixed-case-bstring) => #t) (check (bytestring>=? mixed-case-bstring test-bstring) => #f) - (check (bytestring>=? short-bstring test-bstring) => #f) -) + (check (bytestring>=? short-bstring test-bstring) => #f)) (define (check-searching) (define (eq-r? b) (= b #x72)) @@ -425,8 +432,7 @@ (every (lambda (bvec) (equal? bvec (parse-SNB/prefix (%bytestring->SNB bvec)))) test-bstrings) - => #t)) -) + => #t))) (define (check-all) (check-constructor) @@ -436,9 +442,6 @@ (check-comparison) (check-searching) (check-join-and-split) - (check-io) - - (newline) - (check-report)) + (check-io)) (check-all)