Hello, This is a followup to these threads:
http://thread.gmane.org/gmane.lisp.guile.devel/6549 http://thread.gmane.org/gmane.lisp.guile.devel/7155 (to a lesser extent) The attached patch aims to allow an `scm_c_read ()' call for N bytes to translate into a `read(2)' for N bytes in the case of unbuffered file ports (such as sockets); it also allows the port's buffer to be bypassed when more data is requested than can fit in the buffer. The first patch adds a `read' method to ports, which should read up to N bytes into a caller-supplied buffer. It is meant as a replacement for `fill_input', which fills in the port's buffer regardless of the application request, that is, reads a single byte in the case of unbuffered ports. The change is ABI-compatible since: 1. `scm_t_ptob_descriptor's are always allocated by libguile code, not by the application code, so changing its size doesn't break the ABI; 2. Apart from the added `read' field, the layout of `scm_t_ptob_descriptor' is unchanged, so inlines and macros that refer to it still work; 3. `fill_input' is still honored when provided. Subsequent patches make use of `scm_set_port_read ()' in file and string ports, change `uniform-vector-read!' to use `scm_c_read ()', and add a benchmark for this. The benchmark shows that `uniform-vector-read!' is around 3 orders of magnitude (!) faster on unbuffered file ports with the new method. OK to commit to 1.8 and master? Thanks, Ludovic.
>From 3f9bae2580bca420b20d31e1074a2bab19aa3c09 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Ludovic=20Court=C3=A8s?= <[EMAIL PROTECTED]> Date: Sat, 31 May 2008 22:24:45 +0200 Subject: [PATCH] Add `scm_set_port_read ()'. --- libguile/ports.c | 120 +++++++++++++++++++++++++++++++++++++++++++++--------- libguile/ports.h | 5 ++- 2 files changed, 104 insertions(+), 21 deletions(-) diff --git a/libguile/ports.c b/libguile/ports.c index b25a7d0..f887ec9 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -83,6 +83,10 @@ #define HAVE_FTRUNCATE 1 #endif +#ifndef SCM_MIN +# define SCM_MIN(A, B) ((A) < (B) ? (A) : (B)) +#endif + /* The port kind table --- a dynamically resized array of port types. */ @@ -162,6 +166,8 @@ scm_make_port_type (char *name, scm_ptobs[scm_numptob].seek = 0; scm_ptobs[scm_numptob].truncate = 0; + scm_ptobs[scm_numptob].read = NULL; + scm_numptob++; } SCM_CRITICAL_SECTION_END; @@ -177,6 +183,15 @@ scm_make_port_type (char *name, } void +scm_set_port_read (scm_t_bits tc, + size_t (* read) (SCM, void *, size_t)) +{ + /* The provided `read' method overrides `fill_input'. */ + scm_ptobs[SCM_TC2PTOBNUM (tc)].fill_input = NULL; + scm_ptobs[SCM_TC2PTOBNUM (tc)].read = read; +} + +void scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM)) { scm_ptobs[SCM_TC2PTOBNUM (tc)].mark = mark; @@ -946,6 +961,7 @@ int scm_fill_input (SCM port) { scm_t_port *pt = SCM_PTAB_ENTRY (port); + const scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; if (pt->read_buf == pt->putback_buf) { @@ -957,7 +973,23 @@ scm_fill_input (SCM port) if (pt->read_pos < pt->read_end) return *(pt->read_pos); } - return scm_ptobs[SCM_PTOBNUM (port)].fill_input (port); + + if (ptob->fill_input != NULL) + /* Kept for backward compatibility. */ + return ptob->fill_input (port); + else + { + size_t count; + + count = ptob->read (port, pt->read_buf, pt->read_buf_size); + pt->read_pos = pt->read_buf; + pt->read_end = pt->read_buf + count; + + if (count == 0) + return EOF; + else + return (int) *pt->read_buf; + } } @@ -1015,45 +1047,93 @@ scm_c_read (SCM port, void *buffer, size_t size) { scm_t_port *pt; size_t n_read = 0, n_available; + const scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)]; SCM_VALIDATE_OPINPORT (1, port); pt = SCM_PTAB_ENTRY (port); + ptob = &scm_ptobs[SCM_PTOBNUM (port)]; if (pt->rw_active == SCM_PORT_WRITE) - scm_ptobs[SCM_PTOBNUM (port)].flush (port); + ptob->flush (port); if (pt->rw_random) pt->rw_active = SCM_PORT_READ; if (SCM_READ_BUFFER_EMPTY_P (pt)) { - if (scm_fill_input (port) == EOF) - return 0; + if ((ptob->fill_input != NULL) || (size <= pt->read_buf_size)) + { + /* Fill in PORT's buffer. */ + if (scm_fill_input (port) == EOF) + return 0; + } + else + /* Read directly into BUFFER, bypassing PORT's own buffer. */ + return (ptob->read (port, buffer, size)); } - + n_available = pt->read_end - pt->read_pos; - - while (n_available < size) + + if (ptob->fill_input == NULL) { - memcpy (buffer, pt->read_pos, n_available); - buffer = (char *) buffer + n_available; - pt->read_pos += n_available; - n_read += n_available; - - if (SCM_READ_BUFFER_EMPTY_P (pt)) + /* Use PTOB's `read' method. */ + size_t count; + + count = SCM_MIN (n_available, size); + memcpy (buffer, pt->read_pos, count); + pt->read_pos += count; + n_read += count; + + if (n_read < size) { - if (scm_fill_input (port) == EOF) - return n_read; + /* PORT's buffer is now empty but we haven't yet read SIZE + bytes. */ + size_t remaining = size - n_read; + + pt->read_pos = pt->read_end = pt->read_buf; + + if (remaining >= pt->read_buf_size) + /* Read directly into BUFFER. */ + n_read += ptob->read (port, buffer + n_read, remaining); + else + { + /* The remaining size is less than PORT's buffer size. */ + count = ptob->read (port, pt->read_buf, pt->read_buf_size); + pt->read_end += count; + + memcpy (buffer + n_read, pt->read_buf, SCM_MIN (remaining, count)); + pt->read_pos += SCM_MIN (remaining, count); + n_read += SCM_MIN (remaining, count); + } } - size -= n_available; - n_available = pt->read_end - pt->read_pos; + return n_read; } + else + { + /* For backward compatibility: use the `fill_input' method. */ + while (n_available < size) + { + memcpy (buffer, pt->read_pos, n_available); + buffer = (char *) buffer + n_available; + pt->read_pos += n_available; + n_read += n_available; + + if (SCM_READ_BUFFER_EMPTY_P (pt)) + { + if (scm_fill_input (port) == EOF) + return n_read; + } + + size -= n_available; + n_available = pt->read_end - pt->read_pos; + } - memcpy (buffer, pt->read_pos, size); - pt->read_pos += size; + memcpy (buffer, pt->read_pos, size); + pt->read_pos += size; - return n_read + size; + return n_read + size; + } } #undef FUNC_NAME diff --git a/libguile/ports.h b/libguile/ports.h index 084a555..17bbe09 100644 --- a/libguile/ports.h +++ b/libguile/ports.h @@ -178,12 +178,13 @@ typedef struct scm_t_ptob_descriptor void (*flush) (SCM port); void (*end_input) (SCM port, int offset); - int (*fill_input) (SCM port); + int (*fill_input) (SCM port); /* deprecated: use `read' instead */ int (*input_waiting) (SCM port); off_t (*seek) (SCM port, off_t OFFSET, int WHENCE); void (*truncate) (SCM port, off_t length); + size_t (*read) (SCM port, void *data, size_t size); } scm_t_ptob_descriptor; #define SCM_TC2PTOBNUM(x) (0x0ff & ((x) >> 8)) @@ -205,6 +206,8 @@ SCM_API scm_t_bits scm_make_port_type (char *name, void (*write) (SCM port, const void *data, size_t size)); +SCM_API void scm_set_port_read (scm_t_bits tc, + size_t (* read) (SCM, void *, size_t)); SCM_API void scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM)); SCM_API void scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM)); SCM_API void scm_set_port_print (scm_t_bits tc, -- 1.5.5
>From 406d36e849d2d88452f816e2325c5607250b28dc Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Ludovic=20Court=C3=A8s?= <[EMAIL PROTECTED]> Date: Sat, 31 May 2008 22:26:29 +0200 Subject: [PATCH] Use `scm_set_port_read ()' in file ports. --- libguile/fports.c | 23 ++++++++--------------- 1 files changed, 8 insertions(+), 15 deletions(-) diff --git a/libguile/fports.c b/libguile/fports.c index efbd278..1208836 100644 --- a/libguile/fports.c +++ b/libguile/fports.c @@ -582,29 +582,21 @@ fport_wait_for_input (SCM port) static void fport_flush (SCM port); -/* fill a port's read-buffer with a single read. returns the first - char or EOF if end of file. */ -static int -fport_fill_input (SCM port) +/* Read up to SIZE bytes from PORT into BUFFER. */ +static size_t +fport_read (SCM port, void *buffer, size_t size) { long count; - scm_t_port *pt = SCM_PTAB_ENTRY (port); scm_t_fport *fp = SCM_FSTREAM (port); #ifndef __MINGW32__ fport_wait_for_input (port); #endif /* !__MINGW32__ */ - SCM_SYSCALL (count = read (fp->fdes, pt->read_buf, pt->read_buf_size)); + SCM_SYSCALL (count = read (fp->fdes, buffer, size)); if (count == -1) scm_syserror ("fport_fill_input"); - if (count == 0) - return EOF; - else - { - pt->read_pos = pt->read_buf; - pt->read_end = pt->read_buf + count; - return *pt->read_buf; - } + + return (size_t) count; } static off_t_or_off64_t @@ -906,8 +898,9 @@ fport_free (SCM port) static scm_t_bits scm_make_fptob () { - scm_t_bits tc = scm_make_port_type ("file", fport_fill_input, fport_write); + scm_t_bits tc = scm_make_port_type ("file", NULL, fport_write); + scm_set_port_read (tc, fport_read); scm_set_port_free (tc, fport_free); scm_set_port_print (tc, fport_print); scm_set_port_flush (tc, fport_flush); -- 1.5.5
>From a4e465c07d450566115f43844a042e127cb0eeea Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Ludovic=20Court=C3=A8s?= <[EMAIL PROTECTED]> Date: Sat, 31 May 2008 22:27:46 +0200 Subject: [PATCH] Use `scm_c_read ()' in `uniform-vector-read!'. --- libguile/srfi-4.c | 61 ++++++++++++++++++++--------------------------------- 1 files changed, 23 insertions(+), 38 deletions(-) diff --git a/libguile/srfi-4.c b/libguile/srfi-4.c index 7d22f8b..5bcdcd5 100644 --- a/libguile/srfi-4.c +++ b/libguile/srfi-4.c @@ -1,6 +1,6 @@ /* srfi-4.c --- Uniform numeric vector datatypes. * - * Copyright (C) 2001, 2004, 2006 Free Software Foundation, Inc. + * Copyright (C) 2001, 2004, 2006, 2008 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -824,6 +824,17 @@ SCM_DEFINE (scm_uniform_vector_length, "uniform-vector-length", 1, 0, 0, } #undef FUNC_NAME +/* Release the array handle pointed to by ARG. */ +static void +release_array (void *arg) +{ + scm_t_array_handle *handle; + + handle = (scm_t_array_handle *) arg; + scm_array_handle_release (handle); +} + + SCM_DEFINE (scm_uniform_vector_read_x, "uniform-vector-read!", 1, 3, 0, (SCM uvec, SCM port_or_fd, SCM start, SCM end), "Fill the elements of @var{uvec} by reading\n" @@ -846,7 +857,7 @@ SCM_DEFINE (scm_uniform_vector_read_x, "uniform-vector-read!", 1, 3, 0, #define FUNC_NAME s_scm_uniform_vector_read_x { scm_t_array_handle handle; - size_t vlen, sz, ans; + size_t vlen, sz, ans, bytes_read; ssize_t inc; size_t cstart, cend; size_t remaining, off; @@ -886,52 +897,26 @@ SCM_DEFINE (scm_uniform_vector_read_x, "uniform-vector-read!", 1, 3, 0, if (SCM_NIMP (port_or_fd)) { - scm_t_port *pt = SCM_PTAB_ENTRY (port_or_fd); + scm_dynwind_begin (0); + scm_dynwind_unwind_handler (release_array, &handle, 0); - if (pt->rw_active == SCM_PORT_WRITE) - scm_flush (port_or_fd); + bytes_read = scm_c_read (port_or_fd, base + off, remaining); - ans = cend - cstart; - while (remaining > 0) - { - if (pt->read_pos < pt->read_end) - { - size_t to_copy = min (pt->read_end - pt->read_pos, - remaining); - - memcpy (base + off, pt->read_pos, to_copy); - pt->read_pos += to_copy; - remaining -= to_copy; - off += to_copy; - } - else - { - if (scm_fill_input (port_or_fd) == EOF) - { - if (remaining % sz != 0) - SCM_MISC_ERROR ("unexpected EOF", SCM_EOL); - ans -= remaining / sz; - break; - } - } - } - - if (pt->rw_random) - pt->rw_active = SCM_PORT_READ; + scm_dynwind_end (); } else /* file descriptor. */ { int fd = scm_to_int (port_or_fd); - int n; - SCM_SYSCALL (n = read (fd, base + off, remaining)); - if (n == -1) + SCM_SYSCALL (bytes_read = read (fd, base + off, remaining)); + if (bytes_read == -1) SCM_SYSERROR; - if (n % sz != 0) - SCM_MISC_ERROR ("unexpected EOF", SCM_EOL); - ans = n / sz; } + if (bytes_read % sz != 0) + SCM_MISC_ERROR ("unexpected EOF", SCM_EOL); + ans = bytes_read / sz; + scm_array_handle_release (&handle); return scm_from_size_t (ans); -- 1.5.5
>From 9ff77d3b9fe8384841743c4e7a3ef5b33d9a83b5 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Ludovic=20Court=C3=A8s?= <[EMAIL PROTECTED]> Date: Sun, 1 Jun 2008 19:21:35 +0200 Subject: [PATCH] Use `scm_set_port_read ()' in string ports. --- libguile/strports.c | 27 ++++++++++++++++++--------- 1 files changed, 18 insertions(+), 9 deletions(-) diff --git a/libguile/strports.c b/libguile/strports.c index 8659ccf..f6fb24b 100644 --- a/libguile/strports.c +++ b/libguile/strports.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,1999,2000,2001,2002, 2003, 2005, 2006, 2008 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -45,6 +45,10 @@ #include <string.h> #endif +#ifndef SCM_MIN +# define SCM_MIN(A, B) ((A) < (B) ? (A) : (B)) +#endif + /* {Ports - string ports} @@ -93,15 +97,19 @@ scm_t_bits scm_tc16_strport; -static int -stfill_buffer (SCM port) +/* Read up to SIZE bytes from PORT into BUFFER. */ +static size_t +st_read (SCM port, void *buffer, size_t size) { + size_t available, count; scm_t_port *pt = SCM_PTAB_ENTRY (port); - - if (pt->read_pos >= pt->read_end) - return EOF; - else - return scm_return_first_int (*pt->read_pos, port); + + available = pt->read_end - pt->read_pos; + count = SCM_MIN (available, size); + + memcpy (buffer, pt->read_pos, count); + + return count; } /* change the size of a port's string to new_size. this doesn't @@ -538,8 +546,9 @@ scm_eval_string (SCM string) static scm_t_bits scm_make_stptob () { - scm_t_bits tc = scm_make_port_type ("string", stfill_buffer, st_write); + scm_t_bits tc = scm_make_port_type ("string", NULL, st_write); + scm_set_port_read (tc, st_read); scm_set_port_mark (tc, scm_markstream); scm_set_port_end_input (tc, st_end_input); scm_set_port_flush (tc, st_flush); -- 1.5.5
>From a627bb3639d2a1cffccddf55de679c129ee0cda1 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Ludovic=20Court=C3=A8s?= <[EMAIL PROTECTED]> Date: Sun, 1 Jun 2008 19:31:36 +0200 Subject: [PATCH] Add `uniform-vector-read!' benchmark. --- benchmark-suite/Makefile.am | 11 ++-- benchmark-suite/benchmarks/uniform-vector-read.bm | 53 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 benchmark-suite/benchmarks/uniform-vector-read.bm diff --git a/benchmark-suite/Makefile.am b/benchmark-suite/Makefile.am index a8f4719..3993faf 100644 --- a/benchmark-suite/Makefile.am +++ b/benchmark-suite/Makefile.am @@ -1,7 +1,8 @@ -SCM_BENCHMARKS = benchmarks/0-reference.bm \ - benchmarks/continuations.bm \ - benchmarks/if.bm \ - benchmarks/logand.bm \ - benchmarks/read.bm +SCM_BENCHMARKS = benchmarks/0-reference.bm \ + benchmarks/continuations.bm \ + benchmarks/if.bm \ + benchmarks/logand.bm \ + benchmarks/read.bm \ + benchmarks/uniform-vector-read.bm EXTRA_DIST = guile-benchmark lib.scm $(SCM_BENCHMARKS) diff --git a/benchmark-suite/benchmarks/uniform-vector-read.bm b/benchmark-suite/benchmarks/uniform-vector-read.bm new file mode 100644 index 0000000..d288f0b --- /dev/null +++ b/benchmark-suite/benchmarks/uniform-vector-read.bm @@ -0,0 +1,53 @@ +;;; uniform-vector-read.bm --- Exercise binary I/O primitives. -*- Scheme -*- +;;; +;;; Copyright (C) 2008 Free Software Foundation, Inc. +;;; +;;; This program is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 2, or (at your option) +;;; any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this software; see the file COPYING. If not, write to +;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;;; Boston, MA 02110-1301 USA + +(define-module (benchmarks uniform-vector-read) + :use-module (benchmark-suite lib) + :use-module (srfi srfi-4)) + +(define file-name + (tmpnam)) + +(define %buffer-size + 7777) + +(define buf + (make-u8vector %buffer-size)) + +(define str + (make-string %buffer-size)) + + +(with-benchmark-prefix "uniform-vector-read!" + + (benchmark "uniform-vector-write" 500 + (let ((output (open-output-file file-name))) + (uniform-vector-write buf output) + (close output))) + + (benchmark "uniform-vector-read!" 500 + (let ((input (open-input-file file-name))) + (setvbuf input _IONBF) + (uniform-vector-read! buf input) + (close input))) + + (benchmark "string port" 5000 + (let ((input (open-input-string str))) + (uniform-vector-read! buf input) + (close input)))) -- 1.5.5