iliaa Sun Jan 18 13:46:33 2004 EDT
Modified files: (Branch: PHP_4_3)
/php-src/ext/standard file.c
/php-src/ext/standard/tests/file bug26634.phpt
Log:
Corrections for various fgetcsv() issues.
Index: php-src/ext/standard/file.c
diff -u php-src/ext/standard/file.c:1.279.2.53 php-src/ext/standard/file.c:1.279.2.54
--- php-src/ext/standard/file.c:1.279.2.53 Wed Jan 14 09:31:27 2004
+++ php-src/ext/standard/file.c Sun Jan 18 13:46:31 2004
@@ -21,7 +21,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: file.c,v 1.279.2.53 2004/01/14 14:31:27 wez Exp $ */
+/* $Id: file.c,v 1.279.2.54 2004/01/18 18:46:31 iliaa Exp $ */
/* Synced with php 3.0 revision 1.218 1999-06-16 [ssb] */
@@ -2159,7 +2159,7 @@
}
/* }}} */
-static char *_php_fgetcsv_find_enclosure(char *start, int len, char enclosure)
+static char *_php_fgetcsv_find_enclosure(char *start, int len, char enclosure, int
end)
{
char *s=start, *p, *e=start+len;
@@ -2177,12 +2177,60 @@
continue;
}
}
+ if (end) {
+ int i = 0;
+ while (e > p && *p == enclosure) {
+ s = ++p;
+ i++;
+ }
+ if (!(i % 2)) {
+ continue;
+ } else {
+ p--;
+ }
+ }
return p;
}
return NULL;
}
+static void _php_fgetcsv_trim_enclosed(char *buf2, int *buf2_len, char enclosure)
+{
+ if (memchr(buf2, enclosure, *buf2_len)) {
+ int esc = 0, enc_c = 0, pos = 0;
+ while (pos < *buf2_len) {
+ if (*(buf2 + pos) == '\\') {
+ esc = !esc;
+ enc_c = 0;
+ } else if (*(buf2 + pos) == enclosure) {
+ if (esc) {
+ esc = 0;
+ } else if (enc_c) {
+ enc_c = 0;
+ memmove(buf2 + pos, buf2 + pos + 1, *buf2_len
- pos - 1);
+ (*buf2_len)--;
+ continue;
+ } else if (!esc) {
+ enc_c = 2;
+ }
+ } else {
+ if (enc_c == 2) {
+ memmove(buf2 + pos - 1, buf2 + pos, *buf2_len
- pos);
+ (*buf2_len)--;
+ enc_c--;
+ }
+ esc = 0;
+ }
+ pos++;
+ }
+ if (enc_c && *(buf2 + pos - 1) == enclosure) {
+ (*buf2_len)--;
+ }
+ }
+ buf2[*buf2_len] = '\0';
+}
+
/* {{{ proto array fgetcsv(resource fp, int length [, string delimiter [, string
enclosure]])
Get line from file pointer and parse for CSV fields */
PHP_FUNCTION(fgetcsv)
@@ -2264,29 +2312,23 @@
while (isspace((int)*(unsigned char *)s) && *s != delimiter && s < re) {
s++;
}
+
/* strip trailing spaces */
- while (--e >= s && isspace((int)*(unsigned char *)(e)) && *e != delimiter);
+ while (--e >= s && (*e == '\n' || *e == '\r') && *e != delimiter);
e++;
array_init(return_value);
-#define CSV_ADD_ENTRY(os, es, st) { \
- int tmp_sl = es - st; \
- char *tmp_s=os; \
- if (tmp_sl) { \
- while (isspace((int)*(unsigned char *)tmp_s)) { \
- tmp_s++; \
- tmp_sl--; \
- } \
- } \
- if (tmp_sl) { \
- add_next_index_stringl(return_value, tmp_s, tmp_sl, 1); \
+#define CSV_ADD_ENTRY(os, es, st) { \
+ if (es - st) { \
+ add_next_index_stringl(return_value, os, es - st, 1); \
} else { \
add_next_index_string(return_value, "", 1); \
} \
-} \
+}
- if (!enclosure || !(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure))) {
+csv_start:
+ if (!enclosure || !(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure,
0))) {
no_enclosure:
while ((p = memchr(s, delimiter, (e - s)))) {
CSV_ADD_ENTRY(s, p, s);
@@ -2303,12 +2345,23 @@
}
p++;
- if (*s == enclosure) {
+ /* strip leading spaces */
+ while (isspace((int)*(unsigned char *)s) && *s != delimiter && s < re)
{
s++;
}
+ if (*s != enclosure) {
+ if ((p = memchr(s, delimiter, (e - s)))) {
+ CSV_ADD_ENTRY(s, p, s);
+ s = p + 1;
+ goto csv_start;
+ } else {
+ goto no_enclosure;
+ }
+ }
+ s++;
/* try to find end of enclosure */
- while (!(p2 = _php_fgetcsv_find_enclosure(p, (e - p), enclosure))) {
+ while (!(p2 = _php_fgetcsv_find_enclosure(p, (e - p), enclosure, 1))) {
buf2 = erealloc(buf2, buf2_len + (re - p) + 1);
memcpy(buf2 + buf2_len, p, (re - p));
buf2_len += (re - p);
@@ -2318,8 +2371,9 @@
}
s = p = buf;
re = e = buf + buf_len;
+
/* strip trailing spaces */
- while (isspace((int)*(unsigned char *)(--e)) && *e !=
delimiter);
+ while (--e >= s && (*e == '\n' || *e == '\r') && *e !=
delimiter);
e++;
}
@@ -2327,18 +2381,20 @@
if ((p = memchr(p2, delimiter, (e - p2)))) {
p2 = s;
s = p + 1;
- if (p > p2 && *(p - 1) == enclosure) {
- p--;
- }
- if (p - p2) {
- buf2 = erealloc(buf2, buf2_len + (p - p2) + 1);
- memcpy(buf2 + buf2_len, p2, (p - p2));
- buf2_len += (p - p2);
+ if (*p2 == enclosure) {
+ p2++;
}
+
+ /* copy data to buffer */
+ buf2 = erealloc(buf2, buf2_len + (p - p2) + 1);
+ memcpy(buf2 + buf2_len, p2, (p - p2));
+ buf2_len += p - p2;
+
+ _php_fgetcsv_trim_enclosed(buf2, &buf2_len, enclosure);
CSV_ADD_ENTRY(buf2, buf2_len, 0);
buf2_len = 0;
- if (!(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure)))
{
+ if (!(p = _php_fgetcsv_find_enclosure(s, (e - s), enclosure,
0))) {
goto no_enclosure;
} else {
goto enclosure;
@@ -2351,16 +2407,17 @@
if (e - s) {
buf2 = erealloc(buf2, buf2_len + (e - s) + 1);
memcpy(buf2 + buf2_len, s, (e - s));
- buf2_len += (e - s);
+ buf2_len += e - s;
}
enclosure_done:
+ _php_fgetcsv_trim_enclosed(buf2, &buf2_len, enclosure);
CSV_ADD_ENTRY(buf2, buf2_len, 0);
goto done;
}
}
- if (s < e) {
+ if (s < e || (s > buf && *(s - 1) == delimiter)) {
CSV_ADD_ENTRY(s, e, s);
}
done:
Index: php-src/ext/standard/tests/file/bug26634.phpt
diff -u php-src/ext/standard/tests/file/bug26634.phpt:1.1.2.1
php-src/ext/standard/tests/file/bug26634.phpt:1.1.2.2
--- php-src/ext/standard/tests/file/bug26634.phpt:1.1.2.1 Mon Dec 15 13:53:26
2003
+++ php-src/ext/standard/tests/file/bug26634.phpt Sun Jan 18 13:46:32 2004
@@ -10,14 +10,14 @@
--EXPECT--
Array
(
- [0] =>
+ [0] =>
[1] =>
[2] => 971221
[3] => 1
)
Array
(
- [0] =>
+ [0] =>
[1] =>
[2] => 950707
[3] => \\
@@ -25,7 +25,7 @@
)
Array
(
- [0] =>
+ [0] =>
[1] =>
[2] => 999637
[3] => ERYC 250
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php