pollita         Thu Mar 30 00:22:51 2006 UTC

  Modified files:              
    /php-src/ext/standard       file.c 
    /php-src/main       php_streams.h 
    /php-src/main/streams       streams.c 
  Log:
  Make php_stream_copy_to_mem() unicode aware and
  update userspace function file_get_contents().
  
  Note: fgc()'s second parameter (use_include_path) has been changed
  to be a bitmask "flags" parameter instead.
    
  For the most commonly used values (TRUE, 1) this will continue functioning
  as expected since the value of FILE_USE_INCLUDE_PATH is (coincidentally) 1.
  The impact to other values should be noted in the migration6 guide.
    
  This change makes it possible to allow fgc() to return binary file
  contents (default) or unicode transcoded contents (using FILE_TEXT flag).
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/file.c?r1=1.432&r2=1.433&diff_format=u
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.432 php-src/ext/standard/file.c:1.433
--- php-src/ext/standard/file.c:1.432   Wed Mar 29 22:52:24 2006
+++ php-src/ext/standard/file.c Thu Mar 30 00:22:51 2006
@@ -21,7 +21,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: file.c,v 1.432 2006/03/29 22:52:24 pollita Exp $ */
+/* $Id: file.c,v 1.433 2006/03/30 00:22:51 pollita Exp $ */
 
 /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
 
@@ -497,32 +497,32 @@
 
 /* }}} */
 
-/* {{{ proto string file_get_contents(string filename [, bool use_include_path 
[, resource context [, long offset [, long maxlen]]]])
+/* {{{ proto string file_get_contents(string filename [, long flags [, 
resource context [, long offset [, long maxlen]]]]) U
    Read the entire file into a string */
-/* UTODO: Accept unicode contents -- Maybe? Perhaps a binary fetch leaving the 
script to icu_ucnv_toUnicode() on its own is best? */
 PHP_FUNCTION(file_get_contents)
 {
        char *filename;
        int filename_len;
        char *contents;
+       long flags = 0;
        zend_bool use_include_path = 0;
        php_stream *stream;
        int len;
        long offset = -1;
-       long maxlen = PHP_STREAM_COPY_ALL;
+       long maxlen = PHP_STREAM_COPY_ALL, real_maxlen;
        zval *zcontext = NULL;
        php_stream_context *context = NULL;
 
        /* Parse arguments */
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|br!ll",
-                                 &filename, &filename_len, &use_include_path, 
&zcontext, &offset, &maxlen) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|lr!ll",
+                                 &filename, &filename_len, &flags, &zcontext, 
&offset, &maxlen) == FAILURE) {
                return;
        }
 
        context = php_stream_context_from_zval(zcontext, 0);
 
-       stream = php_stream_open_wrapper_ex(filename, "rb", 
-                               (use_include_path ? USE_PATH : 0) | 
REPORT_ERRORS,
+       stream = php_stream_open_wrapper_ex(filename, (flags & PHP_FILE_TEXT) ? 
"rt" : "rb", 
+                               ((flags & PHP_FILE_USE_INCLUDE_PATH) ? USE_PATH 
: 0) | REPORT_ERRORS,
                                NULL, context);
        if (!stream) {
                RETURN_FALSE;
@@ -533,9 +533,20 @@
                RETURN_FALSE;
        }
 
+       if (maxlen <= 0 || stream->readbuf_type == IS_STRING) {
+               real_maxlen = maxlen;
+       } else {
+               /* Allows worst case scenario of each input char being turned 
into two UChars */
+               real_maxlen = (maxlen * 2);
+       }
+
        /* uses mmap if possible */
-       if ((len = php_stream_copy_to_mem(stream, &contents, maxlen, 0)) > 0) {
+       len = php_stream_copy_to_mem_ex(stream, stream->readbuf_type, 
&contents, real_maxlen, maxlen, 0);
+
+       if (stream->readbuf_type == IS_STRING && len > 0) {
                RETVAL_STRINGL(contents, len, 0);
+       } else if (stream->readbuf_type == IS_UNICODE && len > 0) {
+               RETVAL_UNICODEL(contents, len, 0);
        } else if (len == 0) {
                RETVAL_EMPTY_STRING();
        } else {
http://cvs.php.net/viewcvs.cgi/php-src/main/php_streams.h?r1=1.109&r2=1.110&diff_format=u
Index: php-src/main/php_streams.h
diff -u php-src/main/php_streams.h:1.109 php-src/main/php_streams.h:1.110
--- php-src/main/php_streams.h:1.109    Wed Mar 29 01:20:43 2006
+++ php-src/main/php_streams.h  Thu Mar 30 00:22:51 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: php_streams.h,v 1.109 2006/03/29 01:20:43 pollita Exp $ */
+/* $Id: php_streams.h,v 1.110 2006/03/30 00:22:51 pollita Exp $ */
 
 #ifndef PHP_STREAMS_H
 #define PHP_STREAMS_H
@@ -284,6 +284,7 @@
 /* Convert using runtime_encoding if necessary -- return unicode */
 PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int 
maxlen, int maxchars TSRMLS_DC);
 #define php_stream_read_unicode(stream, buf, maxlen)   
_php_stream_read_unicode((stream), (buf), (maxlen), -1 TSRMLS_CC)
+#define php_stream_read_unicode_ex(stream, buf, maxlen, maxchars)      
_php_stream_read_unicode((stream), (buf), (maxlen), (maxchars) TSRMLS_CC)
 
 PHPAPI UChar *_php_stream_read_unicode_chars(php_stream *stream, int *pchars 
TSRMLS_DC);
 #define  php_stream_read_unicode_chars(stream, pchars) 
_php_stream_read_unicode_chars((stream), (pchars) TSRMLS_CC)
@@ -443,9 +444,12 @@
 
 /* read all data from stream and put into a buffer. Caller must free buffer 
when done.
  * The copy will use mmap if available. */
-PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t 
maxlen,
+PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, 
void **buf, size_t maxlen, size_t maxchars,
                int persistent STREAMS_DC TSRMLS_DC);
-#define php_stream_copy_to_mem(src, buf, maxlen, persistent) 
_php_stream_copy_to_mem((src), (buf), (maxlen), (persistent) STREAMS_CC 
TSRMLS_CC)
+#define php_stream_copy_to_mem(src, buf, maxlen, persistent) \
+               _php_stream_copy_to_mem_ex((src), IS_STRING, (buf), (maxlen), 
-1, (persistent) STREAMS_CC TSRMLS_CC)
+#define php_stream_copy_to_mem_ex(src, rettype, buf, maxlen, maxchars, 
persistent) \
+               _php_stream_copy_to_mem_ex((src), (rettype), (buf), (maxlen), 
(maxchars), (persistent) STREAMS_CC TSRMLS_CC)
 
 /* output all data from a stream */
 PHPAPI size_t _php_stream_passthru(php_stream * src STREAMS_DC TSRMLS_DC);
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.116&r2=1.117&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.116 
php-src/main/streams/streams.c:1.117
--- php-src/main/streams/streams.c:1.116        Wed Mar 29 22:52:24 2006
+++ php-src/main/streams/streams.c      Thu Mar 30 00:22:51 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: streams.c,v 1.116 2006/03/29 22:52:24 pollita Exp $ */
+/* $Id: streams.c,v 1.117 2006/03/30 00:22:51 pollita Exp $ */
 
 #define _GNU_SOURCE
 #include "php.h"
@@ -1568,19 +1568,24 @@
 }
 
 
-PHPAPI size_t _php_stream_copy_to_mem(php_stream *src, char **buf, size_t 
maxlen, int persistent STREAMS_DC TSRMLS_DC)
+PHPAPI size_t _php_stream_copy_to_mem_ex(php_stream *src, zend_uchar rettype, 
void **buf, size_t maxlen, size_t maxchars, int persistent STREAMS_DC TSRMLS_DC)
 {
        size_t ret = 0;
-       char *ptr;
+       zstr ptr;
        size_t len = 0, max_len;
        int step = CHUNK_SIZE;
        int min_room = CHUNK_SIZE / 4;
        php_stream_statbuf ssbuf;
 
-       if (buf) { 
+       if (buf) {
                *buf = NULL;
        }
 
+       if (rettype != src->readbuf_type) {
+               /* UTODO: Introduce sloppy buffer conversion */
+               return 0;
+       }
+
        if (maxlen == 0) { 
                return 0;
        }
@@ -1590,6 +1595,7 @@
        }
 
        if (php_stream_mmap_possible(src)) {
+               /* guarantees src->readbuf_type == IS_STRING */
                char *p;
                size_t mapped;
 
@@ -1600,7 +1606,7 @@
 
                        if (*buf) {
                                memcpy(*buf, p, mapped);
-                               (*buf)[mapped] = '\0';
+                               ((char*)(*buf))[mapped] = 0;
                        }
 
                        php_stream_mmap_unmap(src);
@@ -1610,14 +1616,29 @@
        }
 
        if (maxlen > 0) {
-               ptr = *buf = pemalloc_rel_orig(maxlen + 1, persistent);
-               while ((len < maxlen) & !php_stream_eof(src)) {
-                       ret = php_stream_read(src, ptr, maxlen - len);
-                       len += ret;
-                       ptr += ret;
+               if (rettype == IS_UNICODE) {
+                       ptr.u = *buf = pemalloc_rel_orig(UBYTES(maxlen + 1), 
persistent);
+                       while ((len < maxlen) & !php_stream_eof(src)) {
+                               int ulen;
+
+                               ret = php_stream_read_unicode_ex(src, ptr.u, 
maxlen - len, maxchars);
+                               ulen = u_countChar32(ptr.u, ret);
+                               len += ret;
+                               ptr.u += ret;
+                               maxchars -= ret;
+                       }
+                       *(ptr.u) = 0;
+                       return len;
+               } else {
+                       ptr.s = *buf = pemalloc_rel_orig(maxlen + 1, 
persistent);
+                       while ((len < maxlen) & !php_stream_eof(src)) {
+                               ret = php_stream_read(src, ptr.s, maxlen - len);
+                               len += ret;
+                               ptr.s += ret;
+                       }
+                       *(ptr.s) = 0;
+                       return len;
                }
-               *ptr = '\0';
-               return len;
        }
 
        /* avoid many reallocs by allocating a good sized chunk to begin with, 
if
@@ -1632,21 +1653,49 @@
                max_len = step;
        }
 
-       ptr = *buf = pemalloc_rel_orig(max_len, persistent);
+       if (rettype == IS_UNICODE) {
+               ptr.u = *buf = pemalloc_rel_orig(UBYTES(max_len + 1), 
persistent);
 
-       while((ret = php_stream_read(src, ptr, max_len - len))) {
-               len += ret;
-               if (len + min_room >= max_len) {
-                       *buf = perealloc_rel_orig(*buf, max_len + step, 
persistent);
-                       max_len += step;
-                       ptr = *buf + len;
-               } else {
-                       ptr += ret;
+               while((ret = php_stream_read_unicode_ex(src, ptr.u, max_len - 
len, maxchars)))  {
+                       int ulen = u_countChar32(ptr.u, ret);
+
+                       len += ret;
+                       if (len + min_room >= max_len) {
+                               *buf = perealloc_rel_orig(*buf, UBYTES(max_len 
+ step), persistent);
+                               max_len += step;
+                               ptr.u = ((UChar*)(*buf)) + len;
+                       } else {
+                               ptr.u += ret;
+                       }
+                       maxchars -= ulen;
+               }
+       } else {
+               ptr.s = *buf = pemalloc_rel_orig(max_len + 1, persistent);
+
+               while((ret = php_stream_read(src, ptr.s, max_len - len)))       
{
+                       len += ret;
+                       if (len + min_room >= max_len) {
+                               *buf = perealloc_rel_orig(*buf, max_len + step, 
persistent);
+                               max_len += step;
+                               ptr.s = ((char*)(*buf)) + len;
+                       } else {
+                               ptr.s += ret;
+                       }
                }
        }
+
        if (len) {
-               *buf = perealloc_rel_orig(*buf, len + 1, persistent);
-               (*buf)[len] = '\0';
+               if (rettype == IS_UNICODE) {
+                       if ((max_len - len) > (2 * step)) {
+                               *buf = perealloc_rel_orig(*buf, UBYTES(len + 
1), persistent);
+                       }
+                       ((UChar*)(*buf))[len] = 0;
+               } else {
+                       if ((max_len - len) > (2 * step)) {
+                               *buf = perealloc_rel_orig(*buf, len + 1, 
persistent);
+                       }
+                       ((char*)(*buf))[len] = 0;
+               }
        } else {
                pefree(*buf, persistent);
                *buf = NULL;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to