mike121 pushed a commit to branch wip-mingw-2025
in repository guile.

commit 9bb8793a6cea35969fefab8fd535fcab16219e5e
Author: Michael Gran <spk...@yahoo.com>
AuthorDate: Sun Jun 25 08:16:34 2023 -0700

    Allow read-line to handle "\r\n" as a line terminator
    
    Adds CRLF as a line ending. %read-line will return
    these. In the case of CRLF, %read-line will return a string "\r\n"
    as the line ending.
    
    * libguile/rdelim.c (scm_read_line): handle CRLF line delimiter
    * module/ice-9/suspendable-ports.scm (%read-line): modify to handle CRLF
        line delimiter
      (read-line): use modified %read-line
    * test-suite/tests/rdelim.test ("two lines, split, CRLF"): new test
       ("two long lines, split, CRLF"): new test
    * doc/ref/api-io.texi: update read-line documentation
---
 NEWS                               | 11 +++++++++
 doc/ref/api-io.texi                |  6 +++--
 libguile/rdelim.c                  | 38 +++++++++++++++++++++++++------
 module/ice-9/suspendable-ports.scm | 46 +++++++++++++++++++++++++++++++++-----
 test-suite/tests/rdelim.test       | 22 ++++++++++++++++++
 5 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/NEWS b/NEWS
index dfcd4fe0d..3d13bda74 100644
--- a/NEWS
+++ b/NEWS
@@ -109,6 +109,17 @@ eval-in-sandbox is modified so that #:time-limit accepts 
#f to disable
 the time limit.  Systems without SIGALRM can use eval-in-sandbox if the
 time limit is disabled.
 
+** 'read-line' detects '\r\n' as a line delimiter
+
+read-line is updated to detect a carriage return / line feed pair as a
+line delimiter.  When CRLF is detected at the end of a line, the
+returned line delimiter is "\r\n".  Previously, when a line terminated
+with '\r\n' was read, the return character was appended to the string,
+and the returned delimiter was '\n'.
+
+Carriage return / line feed is a common line deliminator for Windows
+text files.
+
 * Bug fixes
 
 ** `basename` now checks the suffix against the base name, not the full path
diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi
index 1bc5c0836..70e30dc48 100644
--- a/doc/ref/api-io.texi
+++ b/doc/ref/api-io.texi
@@ -894,8 +894,10 @@ a specified set of characters.
 
 @deffn {Scheme Procedure} read-line [port] [handle-delim]
 Return a line of text from @var{port} if specified, otherwise from the
-value returned by @code{(current-input-port)}.  Under Unix, a line of text
-is terminated by the first end-of-line character or by end-of-file.
+value returned by @code{(current-input-port)}.  A line of
+text is terminated by a single linefeed character, a return followed
+by a linefeed, or by end-of-file.
+
 
 If @var{handle-delim} is specified, it should be one of the following
 symbols:
diff --git a/libguile/rdelim.c b/libguile/rdelim.c
index 9d41712dd..c0ae0f377 100644
--- a/libguile/rdelim.c
+++ b/libguile/rdelim.c
@@ -127,6 +127,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   SCM line, strings, result;
   scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
   size_t index;
+  int cr = 0;
 
   if (SCM_UNBNDP (port))
     port = scm_current_input_port ();
@@ -152,12 +153,22 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
          buf[index] = scm_getc (port);
          switch (buf[index])
            {
-           case EOF:
            case '\n':
              delim = buf[index];
-             break;
+              break;
+
+            case EOF:
+              cr = 0;
+              delim = buf[index];
+              break;
+
+            case '\r':
+              cr = 1;
+              index ++;
+              break;
 
            default:
+              cr = 0;
              index++;
            }
        }
@@ -165,20 +176,33 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   while (delim == 0);
 
   if (SCM_LIKELY (scm_is_false (strings)))
-    /* The fast path.  */
-    line = scm_from_utf32_stringn (buf, index);
+    {
+      /* The fast path.  */
+      if (cr)
+        line = scm_from_utf32_stringn (buf, index - 1);
+      else
+        line = scm_from_utf32_stringn (buf, index);
+    }
   else
     {
       /* Aggregate the intermediary results.  */
-      strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
+      if (cr)
+        strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
+      else
+        strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
       line = scm_string_concatenate (scm_reverse (strings));
     }
 
   if (delim == EOF && scm_i_string_length (line) == 0)
     result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
   else
-    result = scm_cons (line,
-                      delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+    {
+      if (cr)
+        result = scm_cons (line, scm_from_latin1_string("\r\n"));
+      else
+        result = scm_cons (line,
+                           delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
+    }
 
   return result;
 #undef LINE_BUFFER_SIZE
diff --git a/module/ice-9/suspendable-ports.scm 
b/module/ice-9/suspendable-ports.scm
index 720f10c0f..6645bd874 100644
--- a/module/ice-9/suspendable-ports.scm
+++ b/module/ice-9/suspendable-ports.scm
@@ -1,5 +1,5 @@
 ;;; Ports, implemented in Scheme
-;;; Copyright (C) 2016, 2019 Free Software Foundation, Inc.
+;;; Copyright (C) 2016, 2018, 2019 Free Software Foundation, Inc.
 ;;;
 ;;; This library is free software: you can redistribute it and/or modify
 ;;; it under the terms of the GNU Lesser General Public License as
@@ -691,12 +691,48 @@
                          (or (eqv? char (string-ref delims i))
                              (lp (1+ i)))))))))))
 
+(define* (%read-line port)
+  (let* ((return-flag-delim-and-chars
+          (let loop ((cr #f)
+                     (chars '())
+                     (c (read-char port)))
+            (cond
+             ((eof-object? c)
+              (list #f c chars))
+             ((char=? c #\newline)
+              (list cr c chars))
+             ((char=? c #\return)
+              (loop #t (cons c chars) (read-char port)))
+             (else
+              (loop #f (cons c chars) (read-char port))))))
+         (return-flag (car return-flag-delim-and-chars))
+         (delim (cadr return-flag-delim-and-chars))
+         (chars (caddr return-flag-delim-and-chars)))
+
+    (if (and (eof-object? delim)
+             (null? chars))
+        (cons the-eof-object the-eof-object)
+        ;; Else
+        (if return-flag
+            (cons (list->string (reverse (cdr chars))) "\r\n")
+            (cons (list->string (reverse chars)) delim)))))
+
 (define* (read-line #:optional (port (current-input-port))
                     (handle-delim 'trim))
-  (read-delimited "\n" port handle-delim))
-
-(define* (%read-line port)
-  (read-line port 'split))
+  (let* ((line/delim   (%read-line port))
+        (line          (car line/delim))
+        (delim         (cdr line/delim)))
+    (case handle-delim
+      ((trim) line)
+      ((split) line/delim)
+      ((concat) (if (and (string? line) (char? delim))
+                   (string-append line (string delim))
+                   line))
+      ((peek) (if (char? delim)
+                 (unread-char delim port))
+             line)
+      (else
+       (error "unexpected handle-delim value: " handle-delim)))))
 
 (define* (put-string port str #:optional (start 0)
                      (count (- (string-length str) start)))
diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test
index ad44278d2..80ea86e76 100644
--- a/test-suite/tests/rdelim.test
+++ b/test-suite/tests/rdelim.test
@@ -63,6 +63,28 @@
                          (read-line p 'split)))
            (eof-object? (read-line p)))))
 
+  (pass-if "two lines, split, CRLF"
+    (let* ((s "foo\r\nbar\r\n")
+           (p (open-input-string s)))
+      (and (equal? '(("foo" . "\r\n")
+                     ("bar" . "\r\n"))
+                   (list (read-line p 'split)
+                         (read-line p 'split)))
+           (eof-object? (read-line p)))))
+
+  (pass-if "two long lines, split, CRLF"
+    ;; Must be longer than 256 codepoints
+    (let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+           (text1 (string-append text0 text0 text0 text0 text0))
+           (text2 (string-append text1 "\r\n" text1 "\r\n")))
+      (let* ((s text2)
+             (p (open-input-string s)))
+        (and (equal? `((,text1 . "\r\n")
+                       (,text1 . "\r\n"))
+                     (list (read-line p 'split)
+                           (read-line p 'split)))
+             (eof-object? (read-line p))))))
+
   (pass-if "two Greek lines, trim"
     (let* ((s "λαμβδα\nμυ\n")
            (p (open-input-string s)))

Reply via email to