wez Sun Oct 13 18:52:33 2002 EDT Modified files: /php4/main streams.c Log: Fix a nasty nasty bug: When not enough data to satisfy a read was found in the buffer, fgets modifies the buf pointer to point to the position to store the next chunk. It then returned the modified buf pointer, instead of a pointer to the start of the buffer. Also added some infrastructure for making fgets grow the buffer on-demand to the correct line-size. Since streams uses reasonable chunk sizes, the performance of the realloc's should be pretty good; in the best case, the line is already found completely in the buffer, so the returned buffer will be allocated to precisely the correct size. In the worst case, where the buffer only contains part of the line, we get a realloc per buffer fill. The reallocs are either the size of the remainder of the line, or the chunk_size (if the buffer sill does not contain a complete line). Each realloc adds an extra byte for a NUL terminator. I think this will perform quite well using the default chunk size of 8K. Index: php4/main/streams.c diff -u php4/main/streams.c:1.101 php4/main/streams.c:1.102 --- php4/main/streams.c:1.101 Fri Oct 11 22:56:34 2002 +++ php4/main/streams.c Sun Oct 13 18:52:33 2002 @@ -20,7 +20,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streams.c,v 1.101 2002/10/12 02:56:34 wez Exp $ */ +/* $Id: streams.c,v 1.102 2002/10/13 22:52:33 wez Exp $ */ #define _GNU_SOURCE #include "php.h" @@ -668,12 +668,20 @@ return eol; } +/* If buf == NULL, the buffer will be allocated automatically and will be of an + * appropriate length to hold the line, regardless of the line length, memory + * permitting */ PHPAPI char *_php_stream_gets(php_stream *stream, char *buf, size_t maxlen TSRMLS_DC) { size_t avail = 0; int did_copy = 0; - - if (maxlen == 0) + size_t current_buf_size = 0; + int grow_mode = 0; + char *bufstart = buf; + + if (buf == NULL) + grow_mode = 1; + else if (maxlen == 0) return NULL; /* @@ -708,9 +716,21 @@ cpysz = avail; } - if (cpysz >= maxlen - 1) { - cpysz = maxlen - 1; - done = 1; + if (grow_mode) { + /* allow room for a NUL. If this realloc is really a +realloc + * (ie: second time around), we get an extra byte. In +most + * cases, with the default chunk size of 8K, we will +only + * incur that overhead once. When people have lines +longer + * than 8K, we waste 1 byte per additional 8K or so. + * That seems acceptable to me, to avoid making this +code + * hard to follow */ + bufstart = erealloc(bufstart, current_buf_size + cpysz ++ 1); + current_buf_size += cpysz + 1; + } else { + if (cpysz >= maxlen - 1) { + cpysz = maxlen - 1; + done = 1; + } } memcpy(buf, readptr, cpysz); @@ -728,9 +748,15 @@ break; } else { /* XXX: Should be fine to always read chunk_size */ - size_t toread = maxlen - 1; - if (toread > stream->chunk_size) + size_t toread; + + if (grow_mode) { toread = stream->chunk_size; + } else { + toread = maxlen - 1; + if (toread > stream->chunk_size) + toread = stream->chunk_size; + } php_stream_fill_read_buffer(stream, toread TSRMLS_CC); @@ -745,7 +771,7 @@ buf[0] = '\0'; - return buf; + return bufstart; } PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC)
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php