pollita Wed Apr 12 22:40:56 2006 UTC
Modified files:
/php-src/ext/standard http_fopen_wrapper.c
/php-src/main/streams filter.c php_stream_filter_api.h streams.c
Log:
Allow http:// wrapper to automatically apply correct unicode.from.* filter
based on content-type header
http://cvs.php.net/viewcvs.cgi/php-src/ext/standard/http_fopen_wrapper.c?r1=1.112&r2=1.113&diff_format=u
Index: php-src/ext/standard/http_fopen_wrapper.c
diff -u php-src/ext/standard/http_fopen_wrapper.c:1.112
php-src/ext/standard/http_fopen_wrapper.c:1.113
--- php-src/ext/standard/http_fopen_wrapper.c:1.112 Sun Mar 26 17:12:26 2006
+++ php-src/ext/standard/http_fopen_wrapper.c Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
| Sara Golemon <[EMAIL PROTECTED]> |
+----------------------------------------------------------------------+
*/
-/* $Id: http_fopen_wrapper.c,v 1.112 2006/03/26 17:12:26 iliaa Exp $ */
+/* $Id: http_fopen_wrapper.c,v 1.113 2006/04/12 22:40:56 pollita Exp $ */
#include "php.h"
#include "php_globals.h"
@@ -81,6 +81,47 @@
#define HTTP_HEADER_CONTENT_LENGTH 16
#define HTTP_HEADER_TYPE 32
+static inline char *php_http_detect_charset(char *http_header_line)
+{
+ char *s;
+
+ /* Note: This is a fairly remedial parser which could be easily
confused by invalid data
+ The worst case scenario from such confusion should only result in
the unicode filter not
+ being applied. While unfortunate, it's more an issue of the server
sending a bad header */
+ for (s = strchr(http_header_line, ';'); s; s = strchr(s + 1, ';')) {
+ char *p = s;
+
+ while (*(++p) == ' ');
+ if (strncmp(p, "charset", sizeof("charset") - 1) != 0) {
+ continue;
+ }
+ p += sizeof("charset") - 1;
+
+ while (*p == ' ') p++;
+ if (*p != '=') {
+ continue;
+ }
+
+ while (*(++p) == ' ');
+ if (*p == '"') {
+ s = p + 1;
+ if (!(p = strchr(s, '"'))) {
+ /* Bad things, unmatched quote */
+ return NULL;
+ }
+ return estrndup(s, p - s);
+ break;
+ }
+
+ /* Unquoted value */
+ s = p;
+ while (*p && *p != ' ' && *p != ';') p++;
+ return estrndup(s, p - s);
+ }
+
+ return NULL;
+}
+
php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char
*path, char *mode, int options, char **opened_path, php_stream_context
*context, int redirect_max, int header_init STREAMS_DC TSRMLS_DC)
{
php_stream *stream = NULL;
@@ -104,6 +145,7 @@
int transport_len, have_header = 0, request_fulluri = 0;
char *protocol_version = NULL;
int protocol_version_len = 3; /* Default: "1.0" */
+ char *charset = NULL;
tmp_line[0] = '\0';
@@ -543,6 +585,11 @@
if (!strncasecmp(http_header_line, "Location: ", 10)) {
strlcpy(location, http_header_line + 10,
sizeof(location));
} else if (!strncasecmp(http_header_line,
"Content-Type: ", 14)) {
+
+ if (UG(unicode) && strchr(mode, 't')) {
+ charset =
php_http_detect_charset(http_header_line + sizeof("Content-type: "));
+ }
+
php_stream_notify_info(context,
PHP_STREAM_NOTIFY_MIME_TYPE_IS, http_header_line + 14, 0);
} else if (!strncasecmp(http_header_line,
"Content-Length: ", 16)) {
file_size = atoi(http_header_line + 16);
@@ -572,6 +619,11 @@
php_stream_close(stream);
stream = NULL;
+ if (charset) {
+ efree(charset);
+ charset = NULL;
+ }
+
if (location[0] != '\0') {
char new_path[HTTP_HEADER_BLOCK_SIZE];
@@ -684,6 +736,13 @@
}
+ if (charset) {
+ if (stream && UG(unicode) && strchr(mode, 't')) {
+ php_stream_encoding_apply(stream, 0, charset,
UG(to_error_mode), NULL);
+ }
+ efree(charset);
+ }
+
return stream;
}
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/filter.c?r1=1.28&r2=1.29&diff_format=u
Index: php-src/main/streams/filter.c
diff -u php-src/main/streams/filter.c:1.28 php-src/main/streams/filter.c:1.29
--- php-src/main/streams/filter.c:1.28 Wed Mar 29 01:20:43 2006
+++ php-src/main/streams/filter.c Wed Apr 12 22:40:56 2006
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: filter.c,v 1.28 2006/03/29 01:20:43 pollita Exp $ */
+/* $Id: filter.c,v 1.29 2006/04/12 22:40:56 pollita Exp $ */
#include "php.h"
#include "php_globals.h"
@@ -550,6 +550,35 @@
return preferred ^ inverted;
}
+PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type
TSRMLS_DC)
+{
+ php_stream_filter *f;
+
+ for (f = chain->head; f; f = f->next) {
+ if ((type == IS_STRING && (f->fops->flags &
PSFO_FLAG_ACCEPTS_STRING) == 0) ||
+ (type == IS_UNICODE && (f->fops->flags &
PSFO_FLAG_ACCEPTS_UNICODE) == 0)) {
+ /* At some point, the type produced conflicts with the
type accepted */
+ return 0;
+ }
+
+ if (f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) {
+ type = (type == IS_STRING) ? IS_UNICODE : IS_STRING;
+ continue;
+ }
+ if ((f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) ||
+ (f->fops->flags & PSFO_FLAG_OUTPUTS_ANY)) {
+ continue;
+ }
+ if (f->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) {
+ type = IS_UNICODE;
+ continue;
+ }
+ type = IS_STRING;
+ }
+
+ return type;
+}
+
PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish
TSRMLS_DC)
{
php_stream_bucket_brigade brig_a = { NULL, NULL }, brig_b = { NULL,
NULL }, *inp = &brig_a, *outp = &brig_b, *brig_temp;
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/php_stream_filter_api.h?r1=1.17&r2=1.18&diff_format=u
Index: php-src/main/streams/php_stream_filter_api.h
diff -u php-src/main/streams/php_stream_filter_api.h:1.17
php-src/main/streams/php_stream_filter_api.h:1.18
--- php-src/main/streams/php_stream_filter_api.h:1.17 Wed Mar 29 01:20:43 2006
+++ php-src/main/streams/php_stream_filter_api.h Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: php_stream_filter_api.h,v 1.17 2006/03/29 01:20:43 pollita Exp $ */
+/* $Id: php_stream_filter_api.h,v 1.18 2006/04/12 22:40:56 pollita Exp $ */
/* The filter API works on the principle of "Bucket-Brigades". This is
* partially inspired by the Apache 2 method of doing things, although
@@ -153,6 +153,7 @@
PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain,
php_stream_filter *filter TSRMLS_DC);
PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain
TSRMLS_DC);
PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter
TSRMLS_DC);
+PHPAPI int _php_stream_filter_product(php_stream_filter_chain *chain, int type
TSRMLS_DC);
PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish
TSRMLS_DC);
PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter,
int call_dtor TSRMLS_DC);
PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
@@ -166,6 +167,7 @@
#define php_stream_filter_flush(filter, finish)
_php_stream_filter_flush((filter), (finish) TSRMLS_CC)
#define php_stream_filter_check_chain(chain)
_php_stream_filter_check_chain((chain) TSRMLS_CC)
#define php_stream_filter_output_prefer_unicode(filter)
_php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
+#define php_stream_filter_product(chain, type)
_php_stream_filter_product((chain), (type) TSRMLS_CC)
#define php_stream_encoding_apply(stream, writechain, encoding, error_mode,
subst) \
_php_stream_encoding_apply((stream), (writechain), (encoding),
(error_mode), (subst) TSRMLS_CC)
http://cvs.php.net/viewcvs.cgi/php-src/main/streams/streams.c?r1=1.119&r2=1.120&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.119
php-src/main/streams/streams.c:1.120
--- php-src/main/streams/streams.c:1.119 Thu Apr 6 19:39:11 2006
+++ php-src/main/streams/streams.c Wed Apr 12 22:40:56 2006
@@ -19,7 +19,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: streams.c,v 1.119 2006/04/06 19:39:11 pollita Exp $ */
+/* $Id: streams.c,v 1.120 2006/04/12 22:40:56 pollita Exp $ */
#define _GNU_SOURCE
#include "php.h"
@@ -2305,13 +2305,17 @@
/* Output encoding on text mode streams defaults to utf8 unless
specified in context parameter */
if (stream && strchr(implicit_mode, 't') && UG(unicode)) {
- if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') ||
strchr(implicit_mode, '+')) {
+ /* Only apply implicit unicode.to. filter if the wrapper didn't
do it for us */
+ if ((php_stream_filter_product(&stream->writefilters,
IS_UNICODE) == IS_STRING) &&
+ (strchr(implicit_mode, 'w') || strchr(implicit_mode,
'a') || strchr(implicit_mode, '+'))) {
char *encoding = (context && context->output_encoding)
? context->output_encoding : "utf8";
/* UTODO: (Maybe?) Allow overriding the default error
handlers on a per-stream basis via context params */
php_stream_encoding_apply(stream, 1, encoding,
UG(from_error_mode), UG(from_subst_char));
}
- if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {
+
+ /* Only apply implicit unicode.from. filter if the wrapper
didn't do it for us */
+ if ((stream->readbuf_type == IS_STRING) &&
(strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) {
char *encoding = (context && context->input_encoding) ?
context->input_encoding : "utf8";
/* UTODO: (Maybe?) Allow overriding the default error
handlers on a per-stream basis via context params */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php