dmitry Fri Jun 29 13:58:34 2007 UTC Added files: /php-src/ext/dom/tests dom002u.phpt
Modified files: /php-src/ext/libxml libxml.c php_libxml.h /php-src/ext/simplexml simplexml.c /php-src/ext/dom document.c /php-src/ext/soap soap.c /php-src/ext/soap/tests/bugs bug39815.phpt Log: Allowed loading XML from unicode strings
http://cvs.php.net/viewvc.cgi/php-src/ext/libxml/libxml.c?r1=1.59&r2=1.60&diff_format=u Index: php-src/ext/libxml/libxml.c diff -u php-src/ext/libxml/libxml.c:1.59 php-src/ext/libxml/libxml.c:1.60 --- php-src/ext/libxml/libxml.c:1.59 Mon Jun 18 16:46:40 2007 +++ php-src/ext/libxml/libxml.c Fri Jun 29 13:58:33 2007 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: libxml.c,v 1.59 2007/06/18 16:46:40 iliaa Exp $ */ +/* $Id: libxml.c,v 1.60 2007/06/29 13:58:33 dmitry Exp $ */ #define IS_EXT_MODULE @@ -1059,6 +1059,107 @@ } /* }}} */ +PHP_LIBXML_API char* php_libxml_unicode_to_string(UChar *ustr, int ustr_len, int *str_len TSRMLS_DC) +{ + UErrorCode errCode = 0; + char *tmp; + int tmp_len; + + zend_unicode_to_string_ex(UG(utf8_conv), &tmp, &tmp_len, ustr, ustr_len, &errCode); + *str_len = tmp_len; + + /* Substitute uncoding with "utf8" */ + if (tmp[0] == '<' && + tmp[1] == '?' && + tmp[2] == 'x' && + tmp[3] == 'm' && + tmp[4] == 'l') { + char *s = tmp + sizeof("<?xml")-1; + + while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { + ++s; + } + while (*s != 0 && *s != '?' && *s != '>') { + if ((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z')) { + char *attr = s; + char *val; + int attr_len, val_len; + + while ((*s >= 'a' && *s <= 'z') || + (*s >= 'A' && *s <= 'Z') || + (*s >= '0' && *s <= '9') || + (*s == '_')) { + ++s; + } + attr_len = s - attr; + while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { + ++s; + } + if (*s == '=') { + ++s; + } else { + break; + } + while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { + ++s; + } + if (*s == '"') { + ++s; + } else { + break; + } + val = s; + while (*s != 0 && *s != '"') { + ++s; + } + if (*s == '"') { + val_len = s - val; + ++s; + } else { + break; + } + + while (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n') { + ++s; + } + + if (attr_len == sizeof("encoding")-1 && + strncasecmp(attr, "encoding", sizeof("encoding")-1) == 0) { + if (val_len >= sizeof("utf-8")-1) { + val[0] = 'u'; + val[1] = 't'; + val[2] = 'f'; + val[3] = '-'; + val[4] = '8'; + val[5] = '"'; + while (val_len > sizeof("utf-8")-1) { + val[val_len] = ' '; + --val_len; + } + }else if (val_len >= sizeof("utf8")-1) { + val[0] = 'u'; + val[1] = 't'; + val[2] = 'f'; + val[3] = '8'; + val[4] = '"'; + while (val_len > sizeof("utf8")-1) { + val[val_len] = ' '; + --val_len; + } + } else { + /* Encoding name too short */ + break; + } + } + + } else { + break; + } + } + } + return tmp; +} + #ifdef PHP_WIN32 PHP_LIBXML_API BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { http://cvs.php.net/viewvc.cgi/php-src/ext/libxml/php_libxml.h?r1=1.24&r2=1.25&diff_format=u Index: php-src/ext/libxml/php_libxml.h diff -u php-src/ext/libxml/php_libxml.h:1.24 php-src/ext/libxml/php_libxml.h:1.25 --- php-src/ext/libxml/php_libxml.h:1.24 Mon Jan 8 20:01:23 2007 +++ php-src/ext/libxml/php_libxml.h Fri Jun 29 13:58:33 2007 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: php_libxml.h,v 1.24 2007/01/08 20:01:23 pollita Exp $ */ +/* $Id: php_libxml.h,v 1.25 2007/06/29 13:58:33 dmitry Exp $ */ #ifndef PHP_LIBXML_H #define PHP_LIBXML_H @@ -93,6 +93,7 @@ PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s); PHP_LIBXML_API zval *php_libxml_switch_context(zval *context TSRMLS_DC); PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg TSRMLS_DC); +PHP_LIBXML_API char* php_libxml_unicode_to_string(UChar *ustr, int ustr_len, int *str_len TSRMLS_DC); /* Init/shutdown functions*/ PHP_LIBXML_API void php_libxml_initialize(); http://cvs.php.net/viewvc.cgi/php-src/ext/simplexml/simplexml.c?r1=1.235&r2=1.236&diff_format=u Index: php-src/ext/simplexml/simplexml.c diff -u php-src/ext/simplexml/simplexml.c:1.235 php-src/ext/simplexml/simplexml.c:1.236 --- php-src/ext/simplexml/simplexml.c:1.235 Sun Jun 24 11:43:34 2007 +++ php-src/ext/simplexml/simplexml.c Fri Jun 29 13:58:34 2007 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: simplexml.c,v 1.235 2007/06/24 11:43:34 nlopess Exp $ */ +/* $Id: simplexml.c,v 1.236 2007/06/29 13:58:34 dmitry Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -30,6 +30,7 @@ #include "php_ini.h" #include "ext/standard/info.h" #include "ext/standard/php_string.h" +#include "ext/libxml/php_libxml.h" #include "php_simplexml.h" #include "php_simplexml_exports.h" #include "zend_exceptions.h" @@ -2059,8 +2060,9 @@ PHP_FUNCTION(simplexml_load_string) { php_sxe_object *sxe; - char *data; + zstr data; int data_len; + zend_uchar data_type; xmlDocPtr docp; char *ns = NULL; int ns_len = 0; @@ -2068,11 +2070,17 @@ zend_class_entry *ce= sxe_class_entry; zend_bool isprefix = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S|C!ls&b", &data, &data_len, &ce, &options, &ns, &ns_len, UG(utf8_conv), &isprefix) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|C!ls&b", &data, &data_len, &data_type, &ce, &options, &ns, &ns_len, UG(utf8_conv), &isprefix) == FAILURE) { return; } - docp = xmlReadMemory(data, data_len, NULL, NULL, options); + if (data_type == IS_UNICODE) { + data.s = php_libxml_unicode_to_string(data.u, data_len, &data_len TSRMLS_CC); + } + docp = xmlReadMemory(data.s, data_len, NULL, NULL, options); + if (data_type == IS_UNICODE) { + efree(data.s); + } if (! docp) { RETURN_FALSE; @@ -2474,7 +2482,7 @@ { php_info_print_table_start(); php_info_print_table_header(2, "Simplexml support", "enabled"); - php_info_print_table_row(2, "Revision", "$Revision: 1.235 $"); + php_info_print_table_row(2, "Revision", "$Revision: 1.236 $"); php_info_print_table_row(2, "Schema support", #ifdef LIBXML_SCHEMAS_ENABLED "enabled"); http://cvs.php.net/viewvc.cgi/php-src/ext/dom/document.c?r1=1.85&r2=1.86&diff_format=u Index: php-src/ext/dom/document.c diff -u php-src/ext/dom/document.c:1.85 php-src/ext/dom/document.c:1.86 --- php-src/ext/dom/document.c:1.85 Mon Jan 1 09:29:23 2007 +++ php-src/ext/dom/document.c Fri Jun 29 13:58:34 2007 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: document.c,v 1.85 2007/01/01 09:29:23 sebastian Exp $ */ +/* $Id: document.c,v 1.86 2007/06/29 13:58:34 dmitry Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -32,6 +32,8 @@ #include <libxml/xmlschemas.h> #endif +#include "ext/libxml/php_libxml.h" + typedef struct _idsIterator idsIterator; struct _idsIterator { xmlChar *elementId; @@ -1532,7 +1534,7 @@ xmlDoc *docp = NULL, *newdoc; dom_doc_propsptr doc_prop; dom_object *intern; - char *source; + zstr source; int source_len, refcount, ret; zend_uchar source_type = IS_STRING; long options = 0; @@ -1542,14 +1544,8 @@ id = NULL; } - if (mode == DOM_LOAD_FILE) { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|l", &source, &source_len, &source_type, &options) == FAILURE) { - return; - } - } else { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S|l", &source, &source_len, &options) == FAILURE) { - return; - } + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t|l", &source, &source_len, &source_type, &options) == FAILURE) { + return; } if (!source_len) { @@ -1558,15 +1554,19 @@ } if (source_type == IS_UNICODE) { - if (php_stream_path_encode(NULL, &source, &source_len, (UChar*)source, source_len, REPORT_ERRORS, NULL) == FAILURE) { - RETURN_FALSE; + if (mode == DOM_LOAD_FILE) { + if (php_stream_path_encode(NULL, &source.s, &source_len, source.u, source_len, REPORT_ERRORS, NULL) == FAILURE) { + RETURN_FALSE; + } + } else { + source.s = php_libxml_unicode_to_string(source.u, source_len, &source_len TSRMLS_CC); } } - newdoc = dom_document_parser(id, mode, source, options TSRMLS_CC); + newdoc = dom_document_parser(id, mode, source.s, options TSRMLS_CC); if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } if (!newdoc) @@ -1860,13 +1860,13 @@ #if defined(LIBXML_SCHEMAS_ENABLED) -static void -_dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type) +static void _dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type) { zval *id; xmlDoc *docp; dom_object *intern; - char *source = NULL, *valid_file = NULL; + zstr source = NULL_ZSTR; + char *valid_file = NULL; int source_len = 0; xmlSchemaParserCtxtPtr parser; xmlSchemaPtr sptr; @@ -1875,14 +1875,8 @@ char resolved_path[MAXPATHLEN + 1]; zend_uchar source_type = IS_STRING; - if (type == DOM_LOAD_FILE) { - if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Ot", &id, dom_document_class_entry, &source, &source_len, &source_type) == FAILURE) { - return; - } - } else { - if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "OS", &id, dom_document_class_entry, &source, &source_len) == FAILURE) { - return; - } + if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Ot", &id, dom_document_class_entry, &source, &source_len, &source_type) == FAILURE) { + return; } if (source_len == 0) { @@ -1895,15 +1889,15 @@ switch (type) { case DOM_LOAD_FILE: if (source_type == IS_UNICODE) { - if (php_stream_path_encode(NULL, &source, &source_len, (UChar*)source, source_len, REPORT_ERRORS, NULL) == FAILURE) { + if (php_stream_path_encode(NULL, &source.s, &source_len, source.u, source_len, REPORT_ERRORS, NULL) == FAILURE) { RETURN_FALSE; } } - valid_file = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN TSRMLS_CC); + valid_file = _dom_get_valid_file_path(source.s, resolved_path, MAXPATHLEN TSRMLS_CC); if (!valid_file) { if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid Schema file source"); RETURN_FALSE; @@ -1911,13 +1905,19 @@ parser = xmlSchemaNewParserCtxt(valid_file); if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } break; case DOM_LOAD_STRING: - parser = xmlSchemaNewMemParserCtxt(source, source_len); + if (source_type == IS_UNICODE) { + source.s = php_libxml_unicode_to_string(source.u, source_len, &source_len TSRMLS_CC); + } + parser = xmlSchemaNewMemParserCtxt(source.s, source_len); /* If loading from memory, we need to set the base directory for the document but it is not apparent how to do that for schema's */ + if (source_type == IS_UNICODE) { + efree(source.s); + } break; default: return; @@ -1976,7 +1976,8 @@ zval *id; xmlDoc *docp; dom_object *intern; - char *source = NULL, *valid_file = NULL; + zstr source = NULL_ZSTR; + char *valid_file = NULL; int source_len = 0; xmlRelaxNGParserCtxtPtr parser; xmlRelaxNGPtr sptr; @@ -1985,14 +1986,8 @@ char resolved_path[MAXPATHLEN + 1]; zend_uchar source_type = IS_STRING; - if (type == DOM_LOAD_FILE) { - if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Ot", &id, dom_document_class_entry, &source, &source_len, &source_type) == FAILURE) { - return; - } - } else { - if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "OS", &id, dom_document_class_entry, &source, &source_len) == FAILURE) { - return; - } + if (zend_parse_method_parameters(ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Ot", &id, dom_document_class_entry, &source, &source_len, &source_type) == FAILURE) { + return; } if (source_len == 0) { @@ -2005,27 +2000,33 @@ switch (type) { case DOM_LOAD_FILE: if (source_type == IS_UNICODE) { - if (php_stream_path_encode(NULL, &source, &source_len, (UChar*)source, source_len, REPORT_ERRORS, NULL) == FAILURE) { + if (php_stream_path_encode(NULL, &source.s, &source_len, source.u, source_len, REPORT_ERRORS, NULL) == FAILURE) { RETURN_FALSE; } } - valid_file = _dom_get_valid_file_path(source, resolved_path, MAXPATHLEN TSRMLS_CC); + valid_file = _dom_get_valid_file_path(source.s, resolved_path, MAXPATHLEN TSRMLS_CC); if (!valid_file) { if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid RelaxNG file source"); RETURN_FALSE; } parser = xmlRelaxNGNewParserCtxt(valid_file); if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } break; case DOM_LOAD_STRING: - parser = xmlRelaxNGNewMemParserCtxt(source, source_len); + if (source_type == IS_UNICODE) { + source.s = php_libxml_unicode_to_string(source.u, source_len, &source_len TSRMLS_CC); + } + parser = xmlRelaxNGNewMemParserCtxt(source.s, source_len); /* If loading from memory, we need to set the base directory for the document but it is not apparent how to do that for schema's */ + if (source_type == IS_UNICODE) { + efree(source.s); + } break; default: return; @@ -2087,21 +2088,15 @@ xmlDoc *docp = NULL, *newdoc; dom_object *intern; dom_doc_propsptr doc_prop; - char *source; + zstr source; int source_len, refcount, ret; htmlParserCtxtPtr ctxt; zend_uchar source_type = IS_STRING; id = getThis(); - if (mode == DOM_LOAD_FILE) { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &source, &source_len, &source_type) == FAILURE) { - return; - } - } else { - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S", &source, &source_len) == FAILURE) { - return; - } + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &source, &source_len, &source_type) == FAILURE) { + return; } if (!source_len) { @@ -2111,19 +2106,26 @@ if (mode == DOM_LOAD_FILE) { if (source_type == IS_UNICODE) { - if (php_stream_path_encode(NULL, &source, &source_len, (UChar*)source, source_len, REPORT_ERRORS, NULL) == FAILURE) { + if (php_stream_path_encode(NULL, &source.s, &source_len, source.u, source_len, REPORT_ERRORS, NULL) == FAILURE) { RETURN_FALSE; } } - ctxt = htmlCreateFileParserCtxt(source, NULL); + ctxt = htmlCreateFileParserCtxt(source.s, NULL); if (source_type == IS_UNICODE) { - efree(source); + efree(source.s); } } else { - source_len = xmlStrlen(source); - ctxt = htmlCreateMemoryParserCtxt(source, source_len); + if (source_type == IS_UNICODE) { + source.s = php_libxml_unicode_to_string(source.u, source_len, &source_len TSRMLS_CC); + } + + ctxt = htmlCreateMemoryParserCtxt(source.s, source_len); + + if (source_type == IS_UNICODE) { + efree(source.s); + } } if (!ctxt) { http://cvs.php.net/viewvc.cgi/php-src/ext/soap/soap.c?r1=1.222&r2=1.223&diff_format=u Index: php-src/ext/soap/soap.c diff -u php-src/ext/soap/soap.c:1.222 php-src/ext/soap/soap.c:1.223 --- php-src/ext/soap/soap.c:1.222 Wed May 2 08:22:32 2007 +++ php-src/ext/soap/soap.c Fri Jun 29 13:58:34 2007 @@ -17,7 +17,7 @@ | Dmitry Stogov <[EMAIL PROTECTED]> | +----------------------------------------------------------------------+ */ -/* $Id: soap.c,v 1.222 2007/05/02 08:22:32 dmitry Exp $ */ +/* $Id: soap.c,v 1.223 2007/06/29 13:58:34 dmitry Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -296,12 +296,9 @@ char* soap_unicode_to_string(UChar *ustr, int ustr_len TSRMLS_DC) { - UErrorCode errCode = 0; - char *tmp; - int tmp_len; + int dummy; - zend_unicode_to_string_ex(UG(utf8_conv), &tmp, &tmp_len, ustr, ustr_len, &errCode); - return tmp; + return php_libxml_unicode_to_string(ustr, ustr_len, &dummy TSRMLS_CC); } void soap_decode_string(zval *ret, char* str TSRMLS_DC) http://cvs.php.net/viewvc.cgi/php-src/ext/soap/tests/bugs/bug39815.phpt?r1=1.4&r2=1.5&diff_format=u Index: php-src/ext/soap/tests/bugs/bug39815.phpt diff -u php-src/ext/soap/tests/bugs/bug39815.phpt:1.4 php-src/ext/soap/tests/bugs/bug39815.phpt:1.5 --- php-src/ext/soap/tests/bugs/bug39815.phpt:1.4 Tue Dec 19 21:40:59 2006 +++ php-src/ext/soap/tests/bugs/bug39815.phpt Fri Jun 29 13:58:34 2007 @@ -32,10 +32,10 @@ $x = new LocalSoapClient(NULL,array('location'=>'test://', 'uri'=>'http://testuri.org', "trace"=>1)); -setlocale(LC_ALL,"sv_SE","sv_SE.ISO8859-1"); [EMAIL PROTECTED](LC_ALL,"sv_SE","sv_SE.ISO8859-1"); var_dump($x->test()); echo $x->__getLastResponse(); -setlocale(LC_ALL,"en_US","en_US.ISO8859-1"); [EMAIL PROTECTED](LC_ALL,"en_US","en_US.ISO8859-1"); var_dump($x->test()); echo $x->__getLastResponse(); --EXPECT-- http://cvs.php.net/viewvc.cgi/php-src/ext/dom/tests/dom002u.phpt?view=markup&rev=1.1 Index: php-src/ext/dom/tests/dom002u.phpt +++ php-src/ext/dom/tests/dom002u.phpt --TEST-- Test 2u: getElementsByTagName() / getElementsByTagNameNS() --SKIPIF-- <?php require_once('skipif.inc'); ?> --FILE-- <?php $xml = <<<HERE <?xml version="1.0" encoding="ISO-8859-1" ?> <foo xmlns="http://www.example.com/ns/foo" xmlns:fubar="http://www.example.com/ns/fubar"> <bar><test1 /></bar> <bar><test2 /></bar> <fubar:bar><test3 /></fubar:bar> <fubar:bar><test4 /></fubar:bar> </foo> HERE; function dump($elems) { foreach ($elems as $elem) { var_dump($elem->nodeName); dump($elem->childNodes); } } $dom = new DOMDocument(); $dom->loadXML($xml); $doc = $dom->documentElement; dump($dom->getElementsByTagName('bar')); dump($doc->getElementsByTagName('bar')); dump($dom->getElementsByTagNameNS('http://www.example.com/ns/fubar', 'bar')); dump($doc->getElementsByTagNameNS('http://www.example.com/ns/fubar', 'bar')); ?> --EXPECT-- string(3) "bar" string(5) "test1" string(3) "bar" string(5) "test2" string(9) "fubar:bar" string(5) "test3" string(9) "fubar:bar" string(5) "test4" string(3) "bar" string(5) "test1" string(3) "bar" string(5) "test2" string(9) "fubar:bar" string(5) "test3" string(9) "fubar:bar" string(5) "test4" string(9) "fubar:bar" string(5) "test3" string(9) "fubar:bar" string(5) "test4" string(9) "fubar:bar" string(5) "test3" string(9) "fubar:bar" string(5) "test4" --UEXPECT-- unicode(3) "bar" unicode(5) "test1" unicode(3) "bar" unicode(5) "test2" unicode(9) "fubar:bar" unicode(5) "test3" unicode(9) "fubar:bar" unicode(5) "test4" unicode(3) "bar" unicode(5) "test1" unicode(3) "bar" unicode(5) "test2" unicode(9) "fubar:bar" unicode(5) "test3" unicode(9) "fubar:bar" unicode(5) "test4" unicode(9) "fubar:bar" unicode(5) "test3" unicode(9) "fubar:bar" unicode(5) "test4" unicode(9) "fubar:bar" unicode(5) "test3" unicode(9) "fubar:bar" unicode(5) "test4"
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php