pollita Thu Mar 30 00:22:51 2006 UTC Modified files: /php-src/ext/standard file.c /php-src/main php_streams.h /php-src/main/streams streams.c Log: Make php_stream_copy_to_mem() unicode aware and update userspace function file_get_contents(). Note: fgc()'s second parameter (use_include_path) has been changed to be a bitmask "flags" parameter instead. For the most commonly used values (TRUE, 1) this will continue functioning as expected since the value of FILE_USE_INCLUDE_PATH is (coincidentally) 1. The impact to other values should be noted in the migration6 guide. This change makes it possible to allow fgc() to return binary file contents (default) or unicode transcoded contents (using FILE_TEXT flag).
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.432&r2=1.433&diff_format=u Index: php-src/ext/standard/file.c diff -u php-src/ext/standard/file.c:1.432 php-src/ext/standard/file.c:1.433 --- php-src/ext/standard/file.c:1.432 Wed Mar 29 22:52:24 2006 +++ php-src/ext/standard/file.c Thu Mar 30 00:22:51 2006 @@ -21,7 +21,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: file.c,v 1.432 2006/03/29 22:52:24 pollita Exp $ */ +/* $Id: file.c,v 1.433 2006/03/30 00:22:51 pollita Exp $ */ /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */ @@ -497,32 +497,32 @@ /* }}} */ -/* {{{ proto string file_get_contents(string filename [, bool use_include_path [, resource context [, long offset [, long maxlen]]]]) +/* {{{ proto string file_get_contents(string filename [, long flags [, resource context [, long offset [, long maxlen]]]]) U Read the entire file into a string */ -/* UTODO: Accept unicode contents -- Maybe? Perhaps a binary fetch leaving the script to icu_ucnv_toUnicode() on its own is best? */ PHP_FUNCTION(file_get_contents) { char *filename; int filename_len; char *contents; + long flags = 0; zend_bool use_include_path = 0; php_stream *stream; int len; long offset = -1; - long maxlen = PHP_STREAM_COPY_ALL; + long maxlen = PHP_STREAM_COPY_ALL, real_maxlen; zval *zcontext = NULL; php_stream_context *context = NULL; /* Parse arguments */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|br!ll", - &filename, &filename_len, &use_include_path, &zcontext, &offset, &maxlen) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lr!ll", + &filename, &filename_len, &flags, &zcontext, &offset, &maxlen) == FAILURE) { return; } context = php_stream_context_from_zval(zcontext, 0); - stream = php_stream_open_wrapper_ex(filename, "rb", - (use_include_path ? USE_PATH : 0) | REPORT_ERRORS, + stream = php_stream_open_wrapper_ex(filename, (flags & PHP_FILE_TEXT) ? "rt" : "rb", + ((flags & PHP_FILE_USE_INCLUDE_PATH) ? USE_PATH : 0) | REPORT_ERRORS, NULL, context); if (!stream) { RETURN_FALSE; @@ -533,9 +533,20 @@ RETURN_FALSE; } + if (maxlen <= 0 || stream->readbuf_type == IS_STRING) { + real_maxlen = maxlen; + } else { + /* Allows worst case scenario of each input char being turned into two UChars */ + real_maxlen = (maxlen * 2); + } + /* uses mmap if possible */ - if ((len = php_stream_copy_to_mem(stream, &contents, maxlen, 0)) > 0) { + len = php_stream_copy_to_mem_ex(stream, stream->readbuf_type, &contents, real_maxlen, maxlen, 0); + + if (stream->readbuf_type == IS_STRING && len > 0) { RETVAL_STRINGL(contents, len, 0); + } else if (stream->readbuf_type == IS_UNICODE && len > 0) { + RETVAL_UNICODEL(contents, len, 0); } else if (len == 0) { RETVAL_EMPTY_STRING(); } else { http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.109&r2=1.110&diff_format=u Index: php-src/main/php_streams.h diff -u php-src/main/php_streams.h:1.109 php-src/main/php_streams.h:1.110 --- php-src/main/php_streams.h:1.109 Wed Mar 29 01:20:43 2006 +++ php-src/main/php_streams.h Thu Mar 30 00:22:51 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_streams.h,v 1.109 2006/03/29 01:20:43 pollita Exp $ */ +/* $Id: php_streams.h,v 1.110 2006/03/30 00:22:51 pollita Exp $ */ #ifndef PHP_STREAMS_H #define PHP_STREAMS_H @@ -284,6 +284,7 @@ /* Convert using runtime_encoding if necessary -- return unicode */ PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int maxlen, int maxchars TSRMLS_DC); #define php_stream_read_unicode(stream, buf, maxlen) _php_stream_read_unicode((stream), (buf), (maxlen), -1 TSRMLS_CC) +#define php_stream_read_unicode_ex(stream, buf, maxlen, maxchars) _php_stream_read_unicode((stream), (buf), (maxlen), (maxchars) TSRMLS_CC) PHPAPI UChar *_php_stream_read_unicode_chars(php_stream *stream, int *pchars TSRMLS_DC); #define php_stream_read_unicode_chars(stream, pchars) _php_stream_read_unicode_chars((stream), (pchars) TSRMLS_CC) @@ -443,9 +444,12 @@ /* read all data from stream and put into a buffer. Caller must free buffer when done. * The copy will use mmap if available. */ -PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen, +PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, void **buf, size_t maxlen, size_t maxchars, int persistent STREAMS_DC TSRMLS_DC); -#define php_stream_copy_to_mem(src, buf, maxlen, persistent) _php_stream_copy_to_mem((src), (buf), (maxlen), (persistent) STREAMS_CC TSRMLS_CC) +#define php_stream_copy_to_mem(src, buf, maxlen, persistent) \ + _php_stream_copy_to_mem_ex((src), IS_STRING, (buf), (maxlen), -1, (persistent) STREAMS_CC TSRMLS_CC) +#define php_stream_copy_to_mem_ex(src, rettype, buf, maxlen, maxchars, persistent) \ + _php_stream_copy_to_mem_ex((src), (rettype), (buf), (maxlen), (maxchars), (persistent) STREAMS_CC TSRMLS_CC) /* output all data from a stream */ PHPAPI size_t _php_stream_passthru(php_stream * src STREAMS_DC TSRMLS_DC); http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.116&r2=1.117&diff_format=u Index: php-src/main/streams/streams.c diff -u php-src/main/streams/streams.c:1.116 php-src/main/streams/streams.c:1.117 --- php-src/main/streams/streams.c:1.116 Wed Mar 29 22:52:24 2006 +++ php-src/main/streams/streams.c Thu Mar 30 00:22:51 2006 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streams.c,v 1.116 2006/03/29 22:52:24 pollita Exp $ */ +/* $Id: streams.c,v 1.117 2006/03/30 00:22:51 pollita Exp $ */ #define _GNU_SOURCE #include "php.h" @@ -1568,19 +1568,24 @@ } -PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t maxlen, int persistent STREAMS_DC TSRMLS_DC) +PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, void **buf, size_t maxlen, size_t maxchars, int persistent STREAMS_DC TSRMLS_DC) { size_t ret = 0; - char *ptr; + zstr ptr; size_t len = 0, max_len; int step = CHUNK_SIZE; int min_room = CHUNK_SIZE / 4; php_stream_statbuf ssbuf; - if (buf) { + if (buf) { *buf = NULL; } + if (rettype != src->readbuf_type) { + /* UTODO: Introduce sloppy buffer conversion */ + return 0; + } + if (maxlen == 0) { return 0; } @@ -1590,6 +1595,7 @@ } if (php_stream_mmap_possible(src)) { + /* guarantees src->readbuf_type == IS_STRING */ char *p; size_t mapped; @@ -1600,7 +1606,7 @@ if (*buf) { memcpy(*buf, p, mapped); - (*buf)[mapped] = '\0'; + ((char*)(*buf))[mapped] = 0; } php_stream_mmap_unmap(src); @@ -1610,14 +1616,29 @@ } if (maxlen > 0) { - ptr = *buf = pemalloc_rel_orig(maxlen + 1, persistent); - while ((len < maxlen) & !php_stream_eof(src)) { - ret = php_stream_read(src, ptr, maxlen - len); - len += ret; - ptr += ret; + if (rettype == IS_UNICODE) { + ptr.u = *buf = pemalloc_rel_orig(UBYTES(maxlen + 1), persistent); + while ((len < maxlen) & !php_stream_eof(src)) { + int ulen; + + ret = php_stream_read_unicode_ex(src, ptr.u, maxlen - len, maxchars); + ulen = u_countChar32(ptr.u, ret); + len += ret; + ptr.u += ret; + maxchars -= ret; + } + *(ptr.u) = 0; + return len; + } else { + ptr.s = *buf = pemalloc_rel_orig(maxlen + 1, persistent); + while ((len < maxlen) & !php_stream_eof(src)) { + ret = php_stream_read(src, ptr.s, maxlen - len); + len += ret; + ptr.s += ret; + } + *(ptr.s) = 0; + return len; } - *ptr = '\0'; - return len; } /* avoid many reallocs by allocating a good sized chunk to begin with, if @@ -1632,21 +1653,49 @@ max_len = step; } - ptr = *buf = pemalloc_rel_orig(max_len, persistent); + if (rettype == IS_UNICODE) { + ptr.u = *buf = pemalloc_rel_orig(UBYTES(max_len + 1), persistent); - while((ret = php_stream_read(src, ptr, max_len - len))) { - len += ret; - if (len + min_room >= max_len) { - *buf = perealloc_rel_orig(*buf, max_len + step, persistent); - max_len += step; - ptr = *buf + len; - } else { - ptr += ret; + while((ret = php_stream_read_unicode_ex(src, ptr.u, max_len - len, maxchars))) { + int ulen = u_countChar32(ptr.u, ret); + + len += ret; + if (len + min_room >= max_len) { + *buf = perealloc_rel_orig(*buf, UBYTES(max_len + step), persistent); + max_len += step; + ptr.u = ((UChar*)(*buf)) + len; + } else { + ptr.u += ret; + } + maxchars -= ulen; + } + } else { + ptr.s = *buf = pemalloc_rel_orig(max_len + 1, persistent); + + while((ret = php_stream_read(src, ptr.s, max_len - len))) { + len += ret; + if (len + min_room >= max_len) { + *buf = perealloc_rel_orig(*buf, max_len + step, persistent); + max_len += step; + ptr.s = ((char*)(*buf)) + len; + } else { + ptr.s += ret; + } } } + if (len) { - *buf = perealloc_rel_orig(*buf, len + 1, persistent); - (*buf)[len] = '\0'; + if (rettype == IS_UNICODE) { + if ((max_len - len) > (2 * step)) { + *buf = perealloc_rel_orig(*buf, UBYTES(len + 1), persistent); + } + ((UChar*)(*buf))[len] = 0; + } else { + if ((max_len - len) > (2 * step)) { + *buf = perealloc_rel_orig(*buf, len + 1, persistent); + } + ((char*)(*buf))[len] = 0; + } } else { pefree(*buf, persistent); *buf = NULL;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php