Martineznovo has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/237601

Change subject: Provide fallbacks for use of mb_convert_encoding() in 
HtmlFormatter
......................................................................

Provide fallbacks for use of mb_convert_encoding() in HtmlFormatter

Since we don't strictly require mbstring in core, provide fallbacks for
the use of mb_convert_encoding() to go to/from 'HTML-ENTITIES' in
HtmlFormatter.

Bug: T62174
Change-Id: I2dcde96e0e68a7d141f2ba79558b20e1d9c799ec
(cherry picked from commit 21ae7bdb3a48be60f1ce75979ccd31111c256af3)
---
M includes/HtmlFormatter.php
1 file changed, 17 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/01/237601/1

diff --git a/includes/HtmlFormatter.php b/includes/HtmlFormatter.php
index b2926d1..221cefb 100644
--- a/includes/HtmlFormatter.php
+++ b/includes/HtmlFormatter.php
@@ -63,7 +63,15 @@
         */
        public function getDoc() {
                if ( !$this->doc ) {
-                       $html = mb_convert_encoding( $this->html, 
'HTML-ENTITIES', 'UTF-8' );
+                       // DOMDocument::loadHTML apparently isn't very good 
with encodings, so
+                       // convert input to ASCII by encoding everything above 
128 as entities.
+                       if ( function_exists( 'mb_convert_encoding' ) ) {
+                               $html = mb_convert_encoding( $this->html, 
'HTML-ENTITIES', 'UTF-8' );
+                       } else {
+                               $html = preg_replace_callback( 
'/[\x{80}-\x{10ffff}]/u', function ( $m ) {
+                                       return '&#' . 
UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';';
+                               }, $this->html );
+                       }
 
                        // Workaround for bug that caused spaces before 
references
                        // to disappear during processing:
@@ -244,7 +252,14 @@
                        ) );
                }
                $html = $replacements->replace( $html );
-               $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' );
+
+               if ( function_exists( 'mb_convert_encoding' ) ) {
+                       // Just in case the conversion in getDoc() above used 
named
+                       // entities that aren't known to html_entity_decode().
+                       $html = mb_convert_encoding( $html, 'UTF-8', 
'HTML-ENTITIES' );
+               } else {
+                       $html = html_entity_decode( $html, ENT_COMPAT, 'utf-8' 
);
+               }
                return $html;
        }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/237601
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2dcde96e0e68a7d141f2ba79558b20e1d9c799ec
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: REL1_25
Gerrit-Owner: Martineznovo <martinezn...@gmail.com>
Gerrit-Reviewer: Anomie <bjor...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to