https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113937
Revision: 113937
Author: maxsem
Date: 2012-03-15 17:36:36 +0000 (Thu, 15 Mar 2012)
Log Message:
-----------
Avoid a DOM parse when it's not needed
Modified Paths:
--------------
trunk/extensions/MobileFrontend/HtmlFormatter.php
trunk/extensions/MobileFrontend/MobileFormatter.php
Modified: trunk/extensions/MobileFrontend/HtmlFormatter.php
===================================================================
--- trunk/extensions/MobileFrontend/HtmlFormatter.php 2012-03-15 17:35:08 UTC
(rev 113936)
+++ trunk/extensions/MobileFrontend/HtmlFormatter.php 2012-03-15 17:36:36 UTC
(rev 113937)
@@ -7,8 +7,9 @@
/**
* @var DOMDocument
*/
- protected $doc;
+ private $doc;
+ private $html;
private $itemsToRemove = array();
private $elementsToFlatten = array();
private $removeImages = false;
@@ -23,14 +24,7 @@
public function __construct( $html ) {
wfProfileIn( __METHOD__ );
- $html = mb_convert_encoding( $html, 'HTML-ENTITIES', "UTF-8" );
- libxml_use_internal_errors( true );
- $this->doc = new DOMDocument();
- $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html );
- libxml_use_internal_errors( false );
- $this->doc->preserveWhiteSpace = false;
- $this->doc->strictErrorChecking = false;
- $this->doc->encoding = 'UTF-8';
+ $this->html = $html;
wfProfileOut( __METHOD__ );
}
@@ -57,6 +51,16 @@
* @return DOMDocument: DOM to manipulate
*/
public function getDoc() {
+ if ( !$this->doc ) {
+ $html = mb_convert_encoding( $this->html,
'HTML-ENTITIES', "UTF-8" );
+ libxml_use_internal_errors( true );
+ $this->doc = new DOMDocument();
+ $this->doc->loadHTML( '<?xml encoding="UTF-8">' . $html
);
+ libxml_use_internal_errors( false );
+ $this->doc->preserveWhiteSpace = false;
+ $this->doc->strictErrorChecking = false;
+ $this->doc->encoding = 'UTF-8';
+ }
return $this->doc;
}
@@ -112,6 +116,12 @@
public function filterContent() {
$removals = $this->parseItemsToRemove();
+ if ( !$removals ) {
+ return;
+ }
+
+ $doc = $this->getDoc();
+
// Remove tags
// You can't remove DOMNodes from a DOMNodeList as you're
iterating
@@ -122,7 +132,7 @@
$domElemsToRemove = array();
foreach ( $removals['TAG'] as $tagToRemove ) {
- $tagToRemoveNodes = $this->doc->getElementsByTagName(
$tagToRemove );
+ $tagToRemoveNodes = $doc->getElementsByTagName(
$tagToRemove );
foreach ( $tagToRemoveNodes as $tagToRemoveNode ) {
$tagToRemoveNodeIdAttributeValue = '';
if ( $tagToRemoveNode ) {
@@ -143,14 +153,14 @@
// Elements with named IDs
foreach ( $removals['ID'] as $itemToRemove ) {
- $itemToRemoveNode = $this->doc->getElementById(
$itemToRemove );
+ $itemToRemoveNode = $doc->getElementById( $itemToRemove
);
if ( $itemToRemoveNode ) {
$itemToRemoveNode->parentNode->removeChild(
$itemToRemoveNode );
}
}
// CSS Classes
- $xpath = new DOMXpath( $this->doc );
+ $xpath = new DOMXpath( $doc );
foreach ( $removals['CLASS'] as $classToRemove ) {
$elements = $xpath->query( '//*[@class="' .
$classToRemove . '"]' );
@@ -177,7 +187,7 @@
$redLinks = $xpath->query( '//a[@class="new"]' );
foreach ( $redLinks as $redLink ) {
// PHP Bug #36795 — Inappropriate "unterminated
entity reference"
- $spanNode = $this->doc->createElement( "span",
str_replace( "&", "&", $redLink->nodeValue ) );
+ $spanNode = $doc->createElement( "span",
str_replace( "&", "&", $redLink->nodeValue ) );
if ( $redLink->hasAttributes() ) {
$attributes = $redLink->attributes;
@@ -203,10 +213,14 @@
public function getText( $element = null ) {
wfProfileIn( __METHOD__ );
- if ( $element !== null && !( $element instanceof DOMElement ) )
{
- $element = $this->doc->getElementById( $element );
+ if ( $this->doc ) {
+ if ( $element !== null && !( $element instanceof
DOMElement ) ) {
+ $element = $this->doc->getElementById( $element
);
+ }
+ $html = $this->doc->saveXML( $element,
LIBXML_NOEMPTYTAG );
+ } else {
+ $html = $this->html;
}
- $html = $this->doc->saveXML( $element, LIBXML_NOEMPTYTAG );
if ( !$element ) {
$html = preg_replace(
'/<!--.*?-->|^.*?<body>|<\/body>.*$/s', '', $html );
}
Modified: trunk/extensions/MobileFrontend/MobileFormatter.php
===================================================================
--- trunk/extensions/MobileFrontend/MobileFormatter.php 2012-03-15 17:35:08 UTC
(rev 113936)
+++ trunk/extensions/MobileFrontend/MobileFormatter.php 2012-03-15 17:36:36 UTC
(rev 113937)
@@ -131,10 +131,10 @@
* @return string: Processed HTML
*/
- public function getText( $element = false ) {
+ public function getText( $element = null ) {
wfProfileIn( __METHOD__ );
if ( $this->mainPage ) {
- $element = $this->parseMainPage( $this->doc );
+ $element = $this->parseMainPage( $this->getDoc() );
}
$html = parent::getText( $element );
wfProfileOut( __METHOD__ );
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs