moriyoshi               Fri Oct  3 22:51:20 2003 EDT

  Modified files:              
    /php-src/main       config.w32.h 
    /php-src/ext/standard       basic_functions.h config.m4 file.c 
  Log:
  Fix fgetcsv() to correctly support international characters
  # note: mblen() is not a mbstring function, but is part of the ANSI standard
  # which is even supported by Microsoft's libc.
  
  
Index: php-src/main/config.w32.h
diff -u php-src/main/config.w32.h:1.78 php-src/main/config.w32.h:1.79
--- php-src/main/config.w32.h:1.78      Sun Sep 14 05:12:54 2003
+++ php-src/main/config.w32.h   Fri Oct  3 22:51:19 2003
@@ -2,7 +2,7 @@
        Build Configuration for Win32.
        This has only been tested with MS VisualC++ 6 (and later).
 
-       $Id: config.w32.h,v 1.78 2003/09/14 09:12:54 helly Exp $
+       $Id: config.w32.h,v 1.79 2003/10/04 02:51:19 moriyoshi Exp $
 */
 
 /* Default PHP / PEAR directories */
@@ -192,3 +192,5 @@
 
 /* Win32 support proc_open */
 #define PHP_CAN_SUPPORT_PROC_OPEN 1
+
+#define HAVE_MBLEN
Index: php-src/ext/standard/basic_functions.h
diff -u php-src/ext/standard/basic_functions.h:1.123 
php-src/ext/standard/basic_functions.h:1.124
--- php-src/ext/standard/basic_functions.h:1.123        Thu Aug  7 15:53:31 2003
+++ php-src/ext/standard/basic_functions.h      Fri Oct  3 22:51:19 2003
@@ -17,13 +17,17 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: basic_functions.h,v 1.123 2003/08/07 19:53:31 moriyoshi Exp $ */
+/* $Id: basic_functions.h,v 1.124 2003/10/04 02:51:19 moriyoshi Exp $ */
 
 #ifndef BASIC_FUNCTIONS_H
 #define BASIC_FUNCTIONS_H
 
 #include <sys/stat.h>
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 #include "zend_highlight.h"
 
 #include "url_scanner.h"
@@ -199,6 +203,11 @@
 #endif
 
        HashTable *user_filter_map;
+
+       /* file.c */
+#if defined(_REENTRANT) && defined(HAVE_MBRLEN) && defined(HAVE_MBSTATE_T)
+       mbstate_t mblen_state;
+#endif
 } php_basic_globals;
 
 #ifdef ZTS
Index: php-src/ext/standard/config.m4
diff -u php-src/ext/standard/config.m4:1.64 php-src/ext/standard/config.m4:1.65
--- php-src/ext/standard/config.m4:1.64 Fri Sep  5 20:35:21 2003
+++ php-src/ext/standard/config.m4      Fri Oct  3 22:51:19 2003
@@ -1,4 +1,4 @@
-dnl $Id: config.m4,v 1.64 2003/09/06 00:35:21 pollita Exp $ -*- sh -*-
+dnl $Id: config.m4,v 1.65 2003/10/04 02:51:19 moriyoshi Exp $ -*- sh -*-
 
 divert(3)dnl
 
@@ -296,6 +296,19 @@
 PHP_CHECK_FUNC(res_nsend, resolv, bind, socket)
 PHP_CHECK_FUNC(dn_expand, resolv, bind, socket)
 dnl already done PHP_CHECK_FUNC(dn_skipname, resolv, bind, socket)
+
+AC_CHECK_HEADERS([wchar.h])
+AC_CHECK_FUNCS([mblen])
+AC_CHECK_FUNCS([mbrlen mbsinit],,,[
+#ifdef HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+])
+AC_CHECK_TYPES([mbstate_t],,,[
+#ifdef HAVE_WCHAR_H
+# include <wchar.h>
+#endif
+])
 
 PHP_NEW_EXTENSION(standard, array.c base64.c basic_functions.c browscap.c crc32.c 
crypt.c \
                             cyr_convert.c datetime.c dir.c dl.c dns.c exec.c file.c 
filestat.c \
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.359 php-src/ext/standard/file.c:1.360
--- php-src/ext/standard/file.c:1.359   Tue Sep 30 05:52:10 2003
+++ php-src/ext/standard/file.c Fri Oct  3 22:51:19 2003
@@ -21,7 +21,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: file.c,v 1.359 2003/09/30 09:52:10 stas Exp $ */
+/* $Id: file.c,v 1.360 2003/10/04 02:51:19 moriyoshi Exp $ */
 
 /* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
 
@@ -114,6 +114,10 @@
 #include <fnmatch.h>
 #endif
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 /* }}} */
 /* {{{ ZTS-stuff / Globals / Prototypes */
 
@@ -1706,18 +1710,59 @@
 }
 /* }}} */
 
+#ifndef HAVE_MBLEN
+# define _php_mblen(ptr, len) 1
+#else
+# if defined(_REENTRANT) && defined(HAVE_MBRLEN) && defined(HAVE_MBSTATE_T)
+#  define _php_mblen(ptr, len) (ptr == NULL ? mbsinit(&BG(mblen_state)): 
(int)mbrlen(ptr, len, &BG(mblen_state)))
+# else
+#  define _php_mblen(ptr, len) mblen(ptr, len)
+# endif
+#endif
+
+static const char *php_fgetcsv_lookup_trailing_spaces(const char *ptr, size_t len, 
const char delimiter TSRMLS_DC)
+{
+       int inc_len;
+       size_t cnt = 0;
+
+       while (len > 0) {
+               switch ((inc_len = _php_mblen(ptr, len))) {
+                       case -2:
+                       case -1:
+                               inc_len = 1;
+                               break;
+                       case 0:
+                               goto quit_loop;
+                       case 1:
+                               if (delimiter != *ptr && isspace((int)*(const unsigned 
char *)ptr)) {
+                                       cnt++;
+                                       break;
+                               }
+                               /* break is omitted intentionally */
+                       default:
+                               cnt = 0;
+                               break;
+               }
+               ptr += inc_len;
+               len -= inc_len;
+       }
+quit_loop:
+       return ptr - cnt;
+}
+
 /* {{{ proto array fgetcsv(resource fp, int length [, string delimiter [, string 
enclosure]])
    Get line from file pointer and parse for CSV fields */
 PHP_FUNCTION(fgetcsv)
 {
-       char *temp, *tptr, *bptr, *lineEnd;
+       char *temp, *tptr, *bptr, *line_end, *limit;
        char delimiter = ',';   /* allow this to be set as parameter */
        char enclosure = '"';   /* allow this to be set as parameter */
-
+       const char escape_char = '\\';
        /* first section exactly as php_fgetss */
 
        zval **fd, **bytes, **p_delim, **p_enclosure;
-       int len, temp_len;
+       long len;
+       size_t buf_len, temp_len, line_end_len;
        char *buf;
        php_stream *stream;
 
@@ -1778,34 +1823,27 @@
        }
 
        buf = emalloc(len + 1);
-       /* needed because recv/read/gzread doesnt set null char at end */
-       memset(buf, 0, len + 1);
 
-       if (php_stream_gets(stream, buf, len) == NULL) {
+       if (php_stream_get_line(stream, buf, len, &buf_len) == NULL) {
                efree(buf);
                RETURN_FALSE;
        }
 
+       /* initialize internal state */
+       _php_mblen(NULL, 0);
+
        /* Now into new section that parses buf for delimiter/enclosure fields */
 
        /* Strip trailing space from buf, saving end of line in case required for 
enclosure field */
 
-       lineEnd = emalloc(len + 1);
        bptr = buf;
-       tptr = buf + strlen(buf) -1;
-       while ( isspace((int)*(unsigned char *)tptr) && (*tptr!=delimiter) && (tptr > 
bptr) ) tptr--;
-       tptr++;
-       strcpy(lineEnd, tptr);
-
-       /* add single space - makes it easier to parse trailing null field */
-       *tptr++ = ' ';
-       *tptr = 0;
+       tptr = (char *)php_fgetcsv_lookup_trailing_spaces(buf, buf_len, delimiter 
TSRMLS_CC);
+       line_end_len = buf_len - (size_t)(tptr - buf);
+       line_end = limit = tptr;
 
        /* reserve workspace for building each individual field */
-
-       temp_len = len;
-       temp = emalloc(temp_len + 1);   /* unlikely but possible! */
-       tptr = temp;
+       temp_len = buf_len;
+       temp = emalloc(temp_len + line_end_len + 1);
 
        /* Initialize return array */
        array_init(return_value);
@@ -1813,113 +1851,229 @@
        /* Main loop to read CSV fields */
        /* NB this routine will return a single null entry for a blank line */
 
-       do {
+       for (;;) {
+               int inc_len;
+               char *comp_end, *hunk_begin;
+
+               tptr = temp;
+
                /* 1. Strip any leading space */
-               while(isspace((int)*(unsigned char *)bptr) && (*bptr!=delimiter)) 
bptr++;
+               for (;;) {
+                       inc_len = (bptr < limit ? _php_mblen(bptr, limit - bptr): 0);
+                       switch (inc_len) {
+                               case -2:
+                               case -1:
+                                       inc_len = 1;
+                                       _php_mblen(NULL, 0);
+                                       break;
+                               case 0:
+                                       goto quit_loop_0;
+                               case 1:
+                                       if (!isspace((int)*(unsigned char *)bptr) || 
*bptr == delimiter) {
+                                               goto quit_loop_1;
+                                       }
+                                       break;
+                               default:
+                                       goto quit_loop_1;
+                       }
+                       bptr += inc_len;
+               }
+       quit_loop_1:
                /* 2. Read field, leaving bptr pointing at start of next field */
-               if (enclosure && *bptr == enclosure) {
+               if (*bptr == enclosure) {
+                       int state = 0;
+
                        bptr++; /* move on to first character in field */
+                       hunk_begin = bptr;
 
                        /* 2A. handle enclosure delimited field */
-                       while (*bptr) {
-                               /* we need to determine if the enclosure is 'real' or 
is it escaped */
-                               if (*(bptr - 1) == '\\') {
-                                       int escape_cnt = 0;
-                                       char *bptr_p = bptr - 2;
-                               
-                                       while (bptr_p > buf && *bptr_p == '\\') {
-                                               escape_cnt++;
-                                               bptr_p--;
-                                       }
-                                       if (!(escape_cnt % 2)) {
-                                               goto normal_char;
-                                               continue;
-                                       }
-                               }
-                       
-                               if (*bptr == enclosure) {
-                                       /* handle the enclosure */
-                                       if ( *(bptr+1) == enclosure) {
-                                       /* embedded enclosure */
-                                               *tptr++ = *bptr; bptr +=2;
-                                       } else {
-                                       /* must be end of string - skip to start of 
next field or end */
-                                               while ( (*bptr != delimiter) && *bptr 
) bptr++;
-                                               if (*bptr == delimiter) bptr++;
-                                               *tptr=0;        /* terminate temporary 
string */
-                                               break;  /* .. from handling this field 
- resumes at 3. */
-                                       }
-                               } else {
-normal_char:
-                               /* normal character */
-                                       *tptr++ = *bptr++;
-
-                                       if (*bptr == 0) {               /* embedded 
line end? */
-                                               *(tptr-1)=0;            /* remove 
space character added on reading line */
-                                               strcat(temp, lineEnd);  /* add the 
embedded line end to the field */
-
-                                               /* read a new line from input, as at 
start of routine */
-                                               memset(buf, 0, len+1);
-
-                                               if (php_stream_gets(stream, buf, len) 
== NULL) {
-                                                       /* we've got an unterminated 
enclosure, assign all the data
-                                                        * from the start of the 
enclosure to end of data to the last element
-                                                        */
-                                                       if (temp_len > len) { 
+                       for (;;) {
+                               inc_len = (bptr < limit ? _php_mblen(bptr, limit - 
bptr): 0);
+                               switch (inc_len) {
+                                       case 0:
+                                               switch (state) {
+                                                       case 2:
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin - 1);
+                                                               tptr += (bptr - 
hunk_begin - 1);
+                                                               hunk_begin = bptr;
+                                                               goto quit_loop_2;
+
+                                                       case 1:
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin);
+                                                               tptr += (bptr - 
hunk_begin);
+                                                               hunk_begin = bptr;
+                                                               /* break is omitted 
intentionally */
+
+                                                       case 0: {
+                                                               char *new_buf;
+                                                               size_t new_len;
+                                                               char *new_temp;
+
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin);
+                                                               tptr += (bptr - 
hunk_begin);
+                                                               hunk_begin = bptr;
+
+                                                               /* add the embedded 
line end to the field */
+                                                               memcpy(tptr, line_end, 
line_end_len);
+                                                               tptr += line_end_len;
+
+                                                               if ((new_buf = 
php_stream_get_line(stream, NULL, 0, &new_len)) == NULL) {
+                                                                       /* we've got 
an unterminated enclosure,
+                                                                        * assign all 
the data from the start of
+                                                                        * the 
enclosure to end of data to the
+                                                                        * last 
element */
+                                                                       if 
((size_t)temp_len > (size_t)(limit - buf)) { 
+                                                                               goto 
quit_loop_2;
+                                                                       }
+                                                                       
zval_dtor(return_value);
+                                                                       RETVAL_FALSE;
+                                                                       goto out;
+                                                               }
+                                                               temp_len += new_len;
+                                                               new_temp = 
erealloc(temp, temp_len);
+                                                               tptr = new_temp + 
(size_t)(tptr - temp);
+                                                               temp = new_temp;
+
+                                                               efree(buf);
+                                                               buf_len = new_len;
+                                                               bptr = buf = new_buf;
+                                                               hunk_begin = buf;
+
+                                                               line_end = limit = 
(char *)php_fgetcsv_lookup_trailing_spaces(buf, buf_len, delimiter TSRMLS_CC);
+                                                               line_end_len = buf_len 
- (size_t)(limit - buf); 
+
+                                                               state = 0;
+                                                       } break;
+                                               }
+                                               break;
+
+                                       case -2:
+                                       case -1:
+                                               _php_mblen(NULL, 0);
+                                               /* break is omitted intentionally */
+                                       case 1:
+                                               /* we need to determine if the 
enclosure is
+                                                * 'real' or is it escaped */
+                                               switch (state) {
+                                                       case 1: /* escaped */
+                                                               bptr++;
+                                                               state = 0;
+                                                               break;
+                                                       case 2: /* embedded enclosure 
? let's check it */
+                                                               if (*bptr != 
enclosure) {
+                                                                       /* real 
enclosure */
+                                                                       memcpy(tptr, 
hunk_begin, bptr - hunk_begin - 1);
+                                                                       tptr += (bptr 
- hunk_begin - 1);
+                                                                       goto 
quit_loop_2;
+                                                               }
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin);
+                                                               tptr += (bptr - 
hunk_begin);
+                                                               bptr++;
+                                                               hunk_begin = bptr;
+                                                               state = 0;
+                                                               break;
+                                                       default:
+                                                               if (*bptr == 
escape_char) {
+                                                                       state = 1;
+                                                               } else if (*bptr == 
enclosure) {
+                                                                       state = 2;
+                                                               } else {
+                                                               }
+                                                               bptr++;
                                                                break;
-                                                       }
-                                                       
-                                                       efree(lineEnd); 
-                                                       efree(temp); 
-                                                       efree(buf);
-                                                       zval_dtor(return_value);
-                                                       RETURN_FALSE;
                                                }
+                                               break;
 
-                                               temp_len += len;
-                                               temp = erealloc(temp, temp_len+1);
-                                               bptr = buf;
-                                               tptr = buf + strlen(buf) -1;
-                                               while (isspace((int)*(unsigned char 
*)tptr) && (*tptr!=delimiter) && (tptr > bptr)) {
-                                                       tptr--;
+                                       default:
+                                               switch (state) {
+                                                       case 2:
+                                                               /* real enclosure */
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin - 1);
+                                                               tptr += (bptr - 
hunk_begin - 1);
+                                                               goto quit_loop_2;
+                                                       case 1:
+                                                               bptr += inc_len;
+                                                               memcpy(tptr, 
hunk_begin, bptr - hunk_begin);
+                                                               tptr += (bptr - 
hunk_begin);
+                                                               hunk_begin = bptr;
+                                                               break;
+                                                               /* break is missing 
intentionally */
+                                                       default:
+                                                               bptr += inc_len;
+                                                               break;
                                                }
-                                               tptr++; 
-                                               strcpy(lineEnd, tptr);
-                                               *tptr++ = ' ';
-                                               *tptr = 0;
-
-                                               tptr = temp;    /* reset temp pointer 
to end of field as read so far */
-                                               while (*tptr) {
-                                                       tptr++;
+                                               break;
+                               }
+                       }
+               quit_loop_2:
+                       /* look up for a delimiter */
+                       for (;;) {
+                               switch (inc_len) {
+                                       case 0:
+                                               goto quit_loop_3;
+                                       case -2:
+                                       case -1:
+                                               inc_len = 1;
+                                               _php_mblen(NULL, 0);
+                                               /* break is omitted intentionally */
+                                       case 1:
+                                               if (*bptr == delimiter) {
+                                                       goto quit_loop_3;
                                                }
-                                       }
+                                               break;
+                                       default:
+                                               break;
                                }
+                               bptr += inc_len;
+                               inc_len = (bptr < limit ? _php_mblen(bptr, limit - 
bptr): 0);
+                       }
+               quit_loop_3:
+                       comp_end = tptr;
+
+                       if (*bptr == delimiter) {
+                               bptr++;
                        }
                } else {
                        /* 2B. Handle non-enclosure field */
-                       while ((*bptr != delimiter) && *bptr) {
-                               *tptr++ = *bptr++;
-                       }
-                       *tptr=0;        /* terminate temporary string */
 
-                       if (strlen(temp)) {
-                               tptr--;
-                               while (isspace((int)*(unsigned char *)tptr) && 
(*tptr!=delimiter)) {
-                                       *tptr-- = 0;    /* strip any trailing spaces */
+                       hunk_begin = bptr;
+
+                       for (;;) {
+                               inc_len = (bptr < limit ? _php_mblen(bptr, limit - 
bptr): 0);
+                               switch (inc_len) {
+                                       case 0:
+                                               goto quit_loop_4;
+                                       case -2:
+                                       case -1:
+                                               inc_len = 1;
+                                               _php_mblen(NULL, 0);
+                                               /* break is omitted intentionally */
+                                       case 1:
+                                               if (*bptr == delimiter) {
+                                                       goto quit_loop_4;
+                                               }
+                                               break;
+                                       default:
+                                               break;
                                }
+                               bptr += inc_len;
                        }
-                       
+               quit_loop_4:
+                       memcpy(tptr, hunk_begin, bptr - hunk_begin);
+                       tptr += (bptr - hunk_begin);
+
+                       comp_end = (char *)php_fgetcsv_lookup_trailing_spaces(temp, 
tptr - temp, delimiter TSRMLS_CC);
                        if (*bptr == delimiter) {
                                bptr++;
                        }
                }
 
                /* 3. Now pass our field back to php */
-               add_next_index_string(return_value, temp, 1);
-               tptr = temp;
-       } while (*bptr);
-
-       efree(lineEnd);
+               *comp_end = '\0';
+               add_next_index_stringl(return_value, temp, comp_end - temp, 1);
+       }
+quit_loop_0:
+out:
        efree(temp);
        efree(buf);
 }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to