Author: bhofmann
Date: Mon Aug 23 09:35:26 2010
New Revision: 988044
URL: http://svn.apache.org/viewvc?rev=988044&view=rev
Log:
PHP BasicRemoteContentFetcher: Only set charset to something other than UTF-8
if there was a valid charset matched and not some empty string because of some
matches in the response body
Modified:
shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
Modified: shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
URL:
http://svn.apache.org/viewvc/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php?rev=988044&r1=988043&r2=988044&view=diff
==============================================================================
--- shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php (original)
+++ shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php Mon Aug
23 09:35:26 2010
@@ -103,14 +103,16 @@ class BasicRemoteContentFetcher extends
}
if ($isTextType && function_exists('mb_convert_encoding')) {
// try to retrieve content type out of
- if (0 == preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, $charset)
&& //http header or html meta tags
- 0 == preg_match("/encoding\s*=\s*[\'\"]([^\"' >]*)/ix",$content,
$charset)) { //xml declaration
- $charset = 'UTF-8';
- } else {
- $charset = trim($charset[1]);
- if (($pos = strpos($charset, "\n")) !== false) {
- $charset = trim(substr($charset, 0, $pos));
- }
+ $charset = 'UTF-8';
+ $matchedCharset = array();
+ if (0 != preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content,
$matchedCharset) || //http header or html meta tags
+ 0 != preg_match("/encoding\s*=\s*[\'\"]([^\"' >]*)/ix",$content,
$matchedCharset)) { //xml declaration
+ if (trim($matchedCharset[1])) {
+ $charset = trim($matchedCharset[1]);
+ if (($pos = strpos($charset, "\n")) !== false) {
+ $charset = trim(substr($charset, 0, $pos));
+ }
+ }
}
// the xml and json parsers get very upset if there are invalid UTF8
sequences in the string, by recoding it any bad chars will be filtered out
$content = mb_convert_encoding($content, 'UTF-8', $charset);