Ori.livneh has uploaded a new change for review.
https://gerrit.wikimedia.org/r/244586
Change subject: Consolidate common Preprocessor caching code
......................................................................
Consolidate common Preprocessor caching code
* Consolidate nearly-identical caching code in Preprocessor_DOM and
Preprocessor_Hash by making Preprocessor an abstract class rather than an
interface and by implementing Preprocessor::cacheSetTree() and
Preprocessor::cacheGetTree().
* Cache trees for wikitext blobs that have length equal or greater to
PreprocessorCacheThreshold. Previously they needed to be greater than
PreprocessorCacheThreshold, so this changes the requirement by one character.
I did it because it seems more natural.
* Modernize the code to use singleton service objects rather than globals.
We spend a lot of time in the Preprocessor, so it would be nice for this code
to be well-factored and clear.
Change-Id: Ib71c29f14a28445a505e12c774a24ad964330b95
---
M includes/parser/Preprocessor.php
M includes/parser/Preprocessor_DOM.php
M includes/parser/Preprocessor_Hash.php
3 files changed, 84 insertions(+), 70 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/86/244586/1
diff --git a/includes/parser/Preprocessor.php b/includes/parser/Preprocessor.php
index b32593c..76ceb31 100644
--- a/includes/parser/Preprocessor.php
+++ b/includes/parser/Preprocessor.php
@@ -21,23 +21,83 @@
* @ingroup Parser
*/
+use MediaWiki\Logger\LoggerFactory;
+
/**
* @ingroup Parser
*/
-interface Preprocessor {
+abstract class Preprocessor {
+
+ const CACHE_VERSION = 1;
+
/**
- * Create a new preprocessor object based on an initialised Parser
object
+ * Store a document tree in the cache.
*
- * @param Parser $parser
+ * @param string $text
+ * @param int $flags
*/
- public function __construct( $parser );
+ protected function cacheSetTree( $text, $flags, $tree ) {
+ $config = RequestContext::getMain()->getConfig();
+
+ $length = strlen( $text );
+ $threshold = $config->get( 'PreprocessorCacheThreshold' );
+ if ( $threshold === false || $length < $threshold || $length >
1e6 ) {
+ return false;
+ }
+
+ $key = wfMemcKey(
+ defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX :
__CLASS__,
+ md5( $text ), $flags );
+
+ $value = sprintf( "%08d", self::CACHE_VERSION ) . serialize(
$tree );
+
+ $cache = ObjectCache::getInstance( $config->get(
'MainCacheType' ) );
+ $cache->set( $key, $value, 86400 );
+
+ LoggerFactory::getInstance( 'Preprocessor' )
+ ->info( "Saved preprocessor XML to memcached (key
$key)" );
+ }
+
+ /**
+ * Attempt to load a precomputed document tree for some given wikitext
+ * from the cache.
+ *
+ * @param string $text
+ * @param int $flags
+ * @return PPNode_Hash_Tree|bool
+ */
+ protected function cacheGetTree( $text, $flags ) {
+ $config = RequestContext::getMain()->getConfig();
+
+ $length = strlen( $text );
+ $threshold = $config->get( 'PreprocessorCacheThreshold' );
+ if ( $threshold === false || $length < $threshold || $length >
1e6 ) {
+ return false;
+ }
+
+ $cache = ObjectCache::getInstance( $config->get(
'MainCacheType' ) );
+ $value = $cache->get( $key );
+ if ( !$value ) {
+ return false;
+ }
+
+ $version = intval( substr( $value, 0, 8 ) );
+ if ( $version !== self::CACHE_VERSION ) {
+ return false;
+ }
+
+ LoggerFactory::getInstance( 'Preprocessor' )
+ ->info( "Loaded preprocessor hash from memcached (key
$key)" );
+
+ return unserialize( substr( $value, 8 ) );
+ }
/**
* Create a new top-level frame for expansion of a page
*
* @return PPFrame
*/
- public function newFrame();
+ abstract public function newFrame();
/**
* Create a new custom frame for programmatic use of parameter
replacement
@@ -47,7 +107,7 @@
*
* @return PPFrame
*/
- public function newCustomFrame( $args );
+ abstract public function newCustomFrame( $args );
/**
* Create a new custom node for programmatic use of parameter
replacement
@@ -55,7 +115,7 @@
*
* @param array $values
*/
- public function newPartNodeArray( $values );
+ abstract public function newPartNodeArray( $values );
/**
* Preprocess text to a PPNode
@@ -65,7 +125,7 @@
*
* @return PPNode
*/
- public function preprocessToObj( $text, $flags = 0 );
+ abstract public function preprocessToObj( $text, $flags = 0 );
}
/**
diff --git a/includes/parser/Preprocessor_DOM.php
b/includes/parser/Preprocessor_DOM.php
index 4e359a6..7438b05 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -25,7 +25,7 @@
* @ingroup Parser
* @codingStandardsIgnoreStart
*/
-class Preprocessor_DOM implements Preprocessor {
+class Preprocessor_DOM extends Preprocessor {
// @codingStandardsIgnoreEnd
/**
@@ -35,7 +35,7 @@
public $memoryLimit;
- const CACHE_VERSION = 1;
+ const CACHE_PREFIX = 'preprocess-xml';
public function __construct( $parser ) {
$this->parser = $parser;
@@ -148,30 +148,11 @@
* @return PPNode_DOM
*/
public function preprocessToObj( $text, $flags = 0 ) {
- global $wgMemc, $wgPreprocessorCacheThreshold;
- $xml = false;
- $cacheable = ( $wgPreprocessorCacheThreshold !== false
- && strlen( $text ) > $wgPreprocessorCacheThreshold );
- if ( $cacheable ) {
- $cacheKey = wfMemcKey( 'preprocess-xml', md5( $text ),
$flags );
- $cacheValue = $wgMemc->get( $cacheKey );
- if ( $cacheValue ) {
- $version = substr( $cacheValue, 0, 8 );
- if ( intval( $version ) == self::CACHE_VERSION
) {
- $xml = substr( $cacheValue, 8 );
- // From the cache
- wfDebugLog( "Preprocessor", "Loaded
preprocessor XML from memcached (key $cacheKey)" );
- }
- }
- if ( $xml === false ) {
- $xml = $this->preprocessToXml( $text, $flags );
- $cacheValue = sprintf( "%08d",
self::CACHE_VERSION ) . $xml;
- $wgMemc->set( $cacheKey, $cacheValue, 86400 );
- wfDebugLog( "Preprocessor", "Saved preprocessor
XML to memcached (key $cacheKey)" );
- }
- } else {
+ $xml = $this->cacheGetTree( $text, $flags );
+ if ( $xml === false ) {
$xml = $this->preprocessToXml( $text, $flags );
+ $this->cacheSetTree( $text, $flags, $xml );
}
// Fail if the number of elements exceeds acceptable limits
@@ -179,8 +160,7 @@
$this->parser->mGeneratedPPNodeCount += substr_count( $xml, '<'
);
$max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
if ( $this->parser->mGeneratedPPNodeCount > $max ) {
- if ( $cacheable ) {
- }
+ // if ( $cacheable ) { ... }
throw new MWException( __METHOD__ . ': generated node
count limit exceeded' );
}
@@ -199,8 +179,7 @@
$obj = new PPNode_DOM( $dom->documentElement );
}
- if ( $cacheable ) {
- }
+ // if ( $cacheable ) { ... }
if ( !$result ) {
throw new MWException( __METHOD__ . ' generated invalid
XML' );
diff --git a/includes/parser/Preprocessor_Hash.php
b/includes/parser/Preprocessor_Hash.php
index f536d82..8528dc4 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -28,7 +28,7 @@
* @ingroup Parser
* @codingStandardsIgnoreStart
*/
-class Preprocessor_Hash implements Preprocessor {
+class Preprocessor_Hash extends Preprocessor {
// @codingStandardsIgnoreEnd
/**
@@ -36,7 +36,7 @@
*/
public $parser;
- const CACHE_VERSION = 1;
+ const CACHE_PREFIX = 'preprocess-hash';
public function __construct( $parser ) {
$this->parser = $parser;
@@ -88,6 +88,7 @@
return $node;
}
+
/**
* Preprocess some wikitext and return the document tree.
* This is the ghost of Parser::replace_variables().
@@ -112,25 +113,9 @@
* @return PPNode_Hash_Tree
*/
public function preprocessToObj( $text, $flags = 0 ) {
- // Check cache.
- global $wgMemc, $wgPreprocessorCacheThreshold;
-
- $cacheable = $wgPreprocessorCacheThreshold !== false
- && strlen( $text ) > $wgPreprocessorCacheThreshold;
-
- if ( $cacheable ) {
- $cacheKey = wfMemcKey( 'preprocess-hash', md5( $text ),
$flags );
- $cacheValue = $wgMemc->get( $cacheKey );
- if ( $cacheValue ) {
- $version = substr( $cacheValue, 0, 8 );
- if ( intval( $version ) == self::CACHE_VERSION
) {
- $hash = unserialize( substr(
$cacheValue, 8 ) );
- // From the cache
- wfDebugLog( "Preprocessor",
- "Loaded preprocessor hash from
memcached (key $cacheKey)" );
- return $hash;
- }
- }
+ $tree = $this->cacheGetTree( $text, $flags );
+ if ( $tree !== false ) {
+ return $tree;
}
$rules = array(
@@ -629,13 +614,11 @@
$lastNode =
$node;
}
if ( !$node ) {
- if ( $cacheable
) {
- }
+ // if (
$cacheable ) { ... }
throw new
MWException( __METHOD__ . ': eqpos not found' );
}
if ( $node->name !==
'equals' ) {
- if ( $cacheable
) {
- }
+ // if (
$cacheable ) { ... }
throw new
MWException( __METHOD__ . ': eqpos is not equals' );
}
$equalsNode = $node;
@@ -732,15 +715,7 @@
$rootNode->lastChild = $stack->rootAccum->lastNode;
// Cache
- if ( $cacheable ) {
- $cacheValue = sprintf( "%08d", self::CACHE_VERSION ) .
serialize( $rootNode );
-
- // T111289: Cache values should not exceed 1 Mb, but
they do.
- if ( strlen( $cacheValue ) <= 1e6 ) {
- $wgMemc->set( $cacheKey, $cacheValue, 86400 );
- wfDebugLog( "Preprocessor", "Saved preprocessor
Hash to memcached (key $cacheKey)" );
- }
- }
+ $this->cacheSetTree( $text, $flags, $rootNode );
return $rootNode;
}
--
To view, visit https://gerrit.wikimedia.org/r/244586
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib71c29f14a28445a505e12c774a24ad964330b95
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits