Author: bhofmann
Date: Mon Aug 23 09:35:26 2010
New Revision: 988044

URL: http://svn.apache.org/viewvc?rev=988044&view=rev
Log:
PHP BasicRemoteContentFetcher: Only set charset to something other than UTF-8 
if there was a valid charset matched and not some empty string because of some 
matches in the response body

Modified:
    shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php

Modified: shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php
URL: 
http://svn.apache.org/viewvc/shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php?rev=988044&r1=988043&r2=988044&view=diff
==============================================================================
--- shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php (original)
+++ shindig/trunk/php/src/common/sample/BasicRemoteContentFetcher.php Mon Aug 
23 09:35:26 2010
@@ -103,14 +103,16 @@ class BasicRemoteContentFetcher extends 
     }
     if ($isTextType && function_exists('mb_convert_encoding')) {
       // try to retrieve content type out of
-      if (0 == preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, $charset) 
&& //http header or html meta tags
-          0 == preg_match("/encoding\s*=\s*[\'\"]([^\"' >]*)/ix",$content, 
$charset)) { //xml declaration
-        $charset = 'UTF-8';
-      } else {
-               $charset = trim($charset[1]);
-               if (($pos = strpos($charset, "\n")) !== false) {
-                 $charset = trim(substr($charset, 0, $pos));
-               }
+      $charset = 'UTF-8';
+      $matchedCharset = array();
+      if (0 != preg_match("/charset\s*=\s*([^\"' >]*)/ix",$content, 
$matchedCharset) || //http header or html meta tags
+          0 != preg_match("/encoding\s*=\s*[\'\"]([^\"' >]*)/ix",$content, 
$matchedCharset)) { //xml declaration
+        if (trim($matchedCharset[1])) {
+                 $charset = trim($matchedCharset[1]);
+                 if (($pos = strpos($charset, "\n")) !== false) {
+                   $charset = trim(substr($charset, 0, $pos));
+                 }
+        }
          }
          // the xml and json parsers get very upset if there are invalid UTF8 
sequences in the string, by recoding it any bad chars will be filtered out
       $content = mb_convert_encoding($content, 'UTF-8', $charset);


Reply via email to