andrei Fri Sep 22 17:47:10 2006 UTC Modified files: /php-src unicode-progress.txt /php-src/ext/standard string.c Log: Unicode support in count_chars(). Note that due to the size of the Unicode character set only mode=1 is supported. http://cvs.php.net/viewvc.cgi/php-src/unicode-progress.txt?r1=1.47&r2=1.48&diff_format=u Index: php-src/unicode-progress.txt diff -u php-src/unicode-progress.txt:1.47 php-src/unicode-progress.txt:1.48 --- php-src/unicode-progress.txt:1.47 Wed Sep 20 20:30:19 2006 +++ php-src/unicode-progress.txt Fri Sep 22 17:47:09 2006 @@ -16,10 +16,6 @@ string.c -------- - count_chars() - Params API. Do we really want to go through the whole Unicode table? - May need to use hashtable instead of array. - hebrev(), hebrevc() Figure out if this is something we can use ICU for, internally. Check with Zeev. @@ -122,7 +118,6 @@ max() range() shuffle() - strrchr() end(), prev(), next(), reset(), current(), key() @@ -150,6 +145,7 @@ bin2hex() chr() chunk_split() + count_chars() dirname() explode() implode() http://cvs.php.net/viewvc.cgi/php-src/ext/standard/string.c?r1=1.588&r2=1.589&diff_format=u Index: php-src/ext/standard/string.c diff -u php-src/ext/standard/string.c:1.588 php-src/ext/standard/string.c:1.589 --- php-src/ext/standard/string.c:1.588 Wed Sep 20 20:30:19 2006 +++ php-src/ext/standard/string.c Fri Sep 22 17:47:09 2006 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: string.c,v 1.588 2006/09/20 20:30:19 andrei Exp $ */ +/* $Id: string.c,v 1.589 2006/09/22 17:47:09 andrei Exp $ */ /* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */ @@ -6295,79 +6295,115 @@ } /* }}} */ -/* {{{ proto mixed count_chars(string input [, int mode]) +/* {{{ proto mixed count_chars(string input [, int mode]) U Returns info about what characters are used in input */ PHP_FUNCTION(count_chars) { - zval **input, **mode; + zstr input; + int input_len; + zend_uchar type; + long mode = 0; int chars[256]; - int ac=ZEND_NUM_ARGS(); - int mymode=0; + HashTable uchars; + UChar32 cp; + int *uchar_cnt_ptr, uchar_cnt; unsigned char *buf; - int len, inx; + int inx; char retstr[256]; int retlen=0; - if (ac < 1 || ac > 2 || zend_get_parameters_ex(ac, &input, &mode) == FAILURE) { - WRONG_PARAM_COUNT; + if (zend_parse_parameters(ZEND_NUM_ARGS(), "t|l", &input, &input_len, + &type, &mode) == FAILURE) { + return; } - convert_to_string_ex(input); - - if (ac == 2) { - convert_to_long_ex(mode); - mymode = Z_LVAL_PP(mode); - - if (mymode < 0 || mymode > 4) { + if (ZEND_NUM_ARGS() > 1) { + if (mode < 0 || mode > 4) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown mode."); RETURN_FALSE; } + if (UG(unicode) && mode != 1) { + php_error_docref(NULL TSRMLS_DC, E_WARNING, "Only mode=1 is supported with Unicode strings"); + } } - len = Z_STRLEN_PP(input); - buf = (unsigned char *) Z_STRVAL_PP(input); - memset((void*) chars, 0, sizeof(chars)); + if (type == IS_UNICODE) { + UChar buf[3]; + int buf_len; - while (len > 0) { - chars[*buf]++; - buf++; - len--; - } + zend_hash_init(&uchars, 0, NULL, NULL, 0); - if (mymode < 3) { - array_init(return_value); - } + inx = 0; + while (inx < input_len) { + U16_NEXT_UNSAFE(input.u, inx, cp); + if (zend_hash_index_find(&uchars, cp, (void**)&uchar_cnt_ptr) == SUCCESS) { + (*uchar_cnt_ptr)++; + } else { + uchar_cnt = 1; + zend_hash_index_update(&uchars, cp, &uchar_cnt, sizeof(int), NULL); + } + } - for (inx = 0; inx < 256; inx++) { - switch (mymode) { - case 0: - add_index_long(return_value, inx, chars[inx]); - break; - case 1: - if (chars[inx] != 0) { - add_index_long(return_value, inx, chars[inx]); - } - break; - case 2: - if (chars[inx] == 0) { - add_index_long(return_value, inx, chars[inx]); - } - break; - case 3: - if (chars[inx] != 0) { - retstr[retlen++] = inx; - } - break; - case 4: - if (chars[inx] == 0) { - retstr[retlen++] = inx; - } - break; + if (mode < 3) { + array_init(return_value); } - } - if (mymode >= 3 && mymode <= 4) { - RETURN_STRINGL(retstr, retlen, 1); + for (zend_hash_internal_pointer_reset(&uchars); + zend_hash_get_current_data(&uchars, (void**)&uchar_cnt_ptr) == SUCCESS; + zend_hash_move_forward(&uchars)) { + + zend_hash_get_current_key(&uchars, NULL, (ulong*)&cp, 0); + + buf_len = zend_codepoint_to_uchar(cp, buf); + buf[buf_len] = 0; + add_u_assoc_long_ex(return_value, IS_UNICODE, ZSTR(buf), buf_len+1, *uchar_cnt_ptr); + } + + zend_hash_destroy(&uchars); + } else { + buf = (unsigned char *) input.s; + memset((void*) chars, 0, sizeof(chars)); + + while (input_len > 0) { + chars[*buf]++; + buf++; + input_len--; + } + + if (mode < 3) { + array_init(return_value); + } + + for (inx = 0; inx < 256; inx++) { + switch (mode) { + case 0: + add_index_long(return_value, inx, chars[inx]); + break; + case 1: + if (chars[inx] != 0) { + add_index_long(return_value, inx, chars[inx]); + } + break; + case 2: + if (chars[inx] == 0) { + add_index_long(return_value, inx, chars[inx]); + } + break; + case 3: + if (chars[inx] != 0) { + retstr[retlen++] = inx; + } + break; + case 4: + if (chars[inx] == 0) { + retstr[retlen++] = inx; + } + break; + } + } + if (mode >= 3 && mode <= 4) { + RETURN_STRINGL(retstr, retlen, 1); + } } } /* }}} */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php