jenkins-bot has submitted this change and it was merged.
Change subject: Improve performance of UpdateMath.php
......................................................................
Improve performance of UpdateMath.php
* Speed up hook
* Skip MathML verification, when purge == false
Change-Id: I722d596b3785b273c85d5b5892f83ab6efb05b35
---
M MathSearch.hooks.php
M maintenance/GenerateFeatureTable.php
M maintenance/UpdateMath.php
3 files changed, 91 insertions(+), 73 deletions(-)
Approvals:
Physikerwelt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php
index 9892e4a..d1196e3 100644
--- a/MathSearch.hooks.php
+++ b/MathSearch.hooks.php
@@ -91,19 +91,7 @@
if ( $exists ) {
wfDebugLog( "MathSearch", 'Index $' . $tex . '$
already in database.' );
} else {
- wfDebugLog( "MathSearch", 'Store index for $' .
$tex . '$ in database' );
- $dbw = wfGetDB( DB_MASTER );
- $dbw->onTransactionIdle(
- function () use ( $oldID, $eid,
$inputHash, $dbw ) {
- $dbw->replace( 'mathindex',
- array(
'mathindex_revision_id', 'mathindex_anchor' ),
- array(
-
'mathindex_revision_id' => $oldID,
-
'mathindex_anchor' => $eid ,
-
'mathindex_inputhash' => $inputHash
- ) );
- }
- );
+ self::writeMathIndex( $oldID, $eid, $inputHash,
$tex );
}
} catch ( Exception $e ) {
wfDebugLog( "MathSearch", 'Problem writing to math
index!'
@@ -197,9 +185,6 @@
* @param null $Result
* @param int $pid
* @param int $eid
- * @internal param $content
- * @internal param $attributes
- * @internal param \Parser $parser
* @return boolean (true)
*/
static function onMathFormulaRenderedNoLink( $Renderer, &$Result =
null, $pid = 0, $eid = 0 ) {
@@ -230,4 +215,22 @@
static function generateMathAnchorString($pageID, $anchorID, $prefix =
"#"){
return "{$prefix}math.$pageID.$anchorID";
}
+
+ /**
+ * @param int $oldID
+ * @param int $eid
+ * @param string $inputHash
+ * @param string $tex
+ */
+ public static function writeMathIndex( $oldID, $eid, $inputHash, $tex )
{
+ wfDebugLog( "MathSearch", 'Store index for $' . $tex . '$ in
database' );
+ $dbw = wfGetDB( DB_MASTER );
+ $dbw->onTransactionIdle( function () use ( $oldID, $eid,
$inputHash, $dbw ) {
+ $dbw->replace( 'mathindex', array(
'mathindex_revision_id', 'mathindex_anchor' ), array(
+ 'mathindex_revision_id' => $oldID,
+ 'mathindex_anchor' => $eid,
+ 'mathindex_inputhash' => $inputHash
+ ) );
+ } );
+ }
}
diff --git a/maintenance/GenerateFeatureTable.php
b/maintenance/GenerateFeatureTable.php
index 59c75ad..5a6f471 100644
--- a/maintenance/GenerateFeatureTable.php
+++ b/maintenance/GenerateFeatureTable.php
@@ -21,7 +21,7 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
-class UpdateMath extends Maintenance {
+class GenerateFeatureTable extends Maintenance {
const RTI_CHUNK_SIZE = 100000;
public $purge = false;
public $dbw = null;
@@ -74,13 +74,7 @@
}
// $this->output( "Updated {$fcount} formulae!\n" );
}
- /**
- * @param unknown $pId
- * @param unknown $pText
- * @param string $pTitle
- * @param string $purge
- * @return number
- */
+
private function doUpdate( $pid ) {
// TODO: fix link id problem
$anchorID = 0;
@@ -107,5 +101,6 @@
}
}
-$maintClass = "UpdateMath";
+$maintClass = "GenerateFeatureTable";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/UpdateMath.php b/maintenance/UpdateMath.php
index e31926c..a0461fd 100644
--- a/maintenance/UpdateMath.php
+++ b/maintenance/UpdateMath.php
@@ -21,28 +21,28 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class UpdateMath
+ */
class UpdateMath extends Maintenance {
const RTI_CHUNK_SIZE = 100;
public $purge = false;
/** @var boolean */
private $verbose;
/** @var DatabaseBase */
- public $dbw = null;
+ public $dbw;
+ /** @var DatabaseBase */
+ private $db;
/** @var MathRenderer */
private $current;
- private $time = 0;//microtime( true );
+ private $time = 0.0; // microtime( true );
private $performance = array();
- private $renderingMode = MW_MATH_LATEXML;
+ private $renderingMode = 7; // MW_MATH_LATEXML
- /**
- * @var DatabaseBase
- */
- private $db;
/**
*
*/
public function __construct() {
- $this->verbose = $this->verbose;
parent::__construct();
$this->mDescription = 'Updates the index of Mathematical
formulae.';
$this->addOption( 'purge', "If set all formulae are rendered
again without using caches. (Very time consuming!)", false, false, "f" );
@@ -50,13 +50,21 @@
$this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max", false );
$this->addOption( 'verbose', "If set output for successful
rendering will produced",false,false,'v' );
$this->addOption( 'SVG', "If set SVG images will be produced",
false, false );
- $this->addOption( 'hoooks', "If set hooks will be skipped",
false, false );
+ $this->addOption( 'hoooks', "If set hooks will be skipped, but
index will be updated.", false, false );
$this->addOption( 'texvccheck', "If set texvccheck will be
skipped", false, false );
$this->addOption( 'mode' , 'Rendering mode to be used (0 = PNG,
5= MathML, 7=MathML)',false,true,'m');
}
- private function time($category='default'){
+
+ /**
+ * Measures time in ms.
+ * In order to have a formula centric evaluation, we can not just the
build in profiler
+ * @param string $category
+ *
+ * @return int
+ */
+ private function time( $category = 'default' ){
global $wgMathDebug;
- $delta = (microtime(true) - $this->time)*1000;
+ $delta = ( microtime( true ) - $this->time ) * 1000;
if (isset ($this->performance[$category] ))
$this->performance[$category] += $delta;
else
@@ -74,28 +82,34 @@
return (int) $delta;
}
+
/**
* Populates the search index with content from all pages
+ *
+ * @param int $n
+ * @param int $cMax
+ *
+ * @throws DBUnexpectedError
*/
- protected function populateSearchIndex( $n = 0, $cmax = -1 ) {
+ protected function populateSearchIndex( $n = 0, $cMax = -1 ) {
$res = $this->db->select( 'page', 'MAX(page_id) AS count' );
$s = $this->db->fetchObject( $res );
$count = $s->count;
- if ( $cmax > 0 && $count > $cmax ) {
- $count = $cmax;
+ if ( $cMax > 0 && $count > $cMax ) {
+ $count = $cMax;
}
$this->output( "Rebuilding index fields for {$count} pages with
option {$this->purge}...\n" );
- $fcount = 0;
-
+ $fCount = 0;
+ //return;
while ( $n < $count ) {
if ( $n ) {
$this->output( $n . " of $count \n" );
}
- $end = $n + self::RTI_CHUNK_SIZE - 1;
+ $end = min( $n + self::RTI_CHUNK_SIZE - 1, $count );
$res = $this->db->select( array( 'page', 'revision',
'text' ),
- array( 'page_id', 'page_namespace',
'page_title', 'old_flags', 'old_text' ),
- array( "page_id BETWEEN $n AND $end",
'page_latest = rev_id', 'rev_text_id = old_id' ),
+ array( 'page_id', 'page_namespace',
'page_title', 'old_flags', 'old_text', 'rev_id' ),
+ array( "rev_id BETWEEN $n AND $end",
'page_latest = rev_id', 'rev_text_id = old_id' ),
__METHOD__
);
$this->dbw->begin();
@@ -103,33 +117,32 @@
$i = $n;
foreach ( $res as $s ) {
echo "\np$i:";
- $revtext = Revision::getRevisionText( $s );
- $fcount += $this->doUpdate( $s->page_id,
$revtext, $s->page_title);
+ $revText = Revision::getRevisionText( $s );
+ $fCount += $this->doUpdate( $s->page_id,
$revText, $s->page_title, $s->rev_id );
$i++;
}
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$start = microtime( true );
$this->dbw->commit();
echo " committed in " . ( microtime( true ) -$start ) .
"s\n\n";
- var_export($this->performance);
+ var_dump($this->performance);
// echo "after" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$n += self::RTI_CHUNK_SIZE;
}
- $this->output( "Updated {$fcount} formulae!\n" );
+ $this->output( "Updated {$fCount} formulae!\n" );
}
/**
- * @param $pid
- * @param unknown $pText
- * @param string $pTitle
- * @internal param unknown $pId
- * @internal param string $purge
+ * @param int $pid
+ * @param string $pText
+ * @param string $pTitle
+ * @param int $revId
+ *
* @return number
*/
- private function doUpdate( $pid, $pText, $pTitle = "") {
+ private function doUpdate( $pid, $pText, $pTitle = "", $revId = 0) {
$notused = '';
- // TODO: fix link id problem
- $anchorID = 0;
+ $eId = 0;
$math = MathObject::extractMathTagsFromWikiText( $pText );
$matches = sizeof( $math );
if ( $matches ) {
@@ -146,19 +159,23 @@
$this->time("checkTex");
}
if ( $checked ) {
- $renderer->render( $this->purge );
- if( $renderer->getMathml() ){
- $this->time("Rendering");
- } else {
- $this->time("Failing");
- }
- if ( $this->getOption( "SVG", false ) )
{
- $svg = $renderer->getSvg();
- if ( $svg ) {
- $this->time(
"SVG-Rendering" );
+ if( ! $renderer->isInDatabase() ||
$this->purge ) {
+ $renderer->render( $this->purge
);
+ if( $renderer->getMathml() ){
+ $this->time("render");
} else {
- $this->time( "SVG-Fail"
);
+ $this->time("Failing");
}
+ if ( $this->getOption( "SVG",
false ) ) {
+ $svg =
$renderer->getSvg();
+ if ( $svg ) {
+ $this->time(
"SVG-Rendering" );
+ } else {
+ $this->time(
"SVG-Fail" );
+ }
+ }
+ } else {
+ $this->time('checkInDB');
}
} else {
$this->time("checkTex-Fail");
@@ -166,15 +183,19 @@
continue;
}
if ( ! $this->getOption( "hooks", false ) ) {
- wfRunHooks( 'MathFormulaRendered',
array( &$renderer, &$notused, $pid, $anchorID ) );
+ wfRunHooks( 'MathFormulaRendered',
array( &$renderer, &$notused, $pid, $eId ) );
$this->time( "hooks" );
- $anchorID++;
+ $eId++;
+ } else {
+ MathSearchHooks::writeMathIndex(
$revId, $eId, $renderer->getInputHash(), '' );
+ $this->time( "index" );
+ $eId++;
}
$renderer->writeCache($this->dbw);
$this->time("write Cache");
if ( $renderer->getLastError() ) {
echo "\n\t\t".
$renderer->getLastError() ;
- echo "\nF:\t\t".$renderer->getMd5()."
equation " . ( $anchorID -1 ) .
+ echo "\nF:\t\t".$renderer->getMd5()."
equation " . ( $eId -1 ) .
"-failed beginning with\n\t\t'"
. substr( $formula, 0, 100 )
. "'\n\t\tmathml:" .
substr($renderer->getMathml(),0,10) ."\n ";
} else{
@@ -187,9 +208,7 @@
}
return 0;
}
- /**
- *
- */
+
public function execute() {
global $wgMathValidModes;
$this->dbw = wfGetDB( DB_MASTER );
@@ -205,4 +224,5 @@
}
$maintClass = "UpdateMath";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
--
To view, visit https://gerrit.wikimedia.org/r/185676
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I722d596b3785b273c85d5b5892f83ab6efb05b35
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <[email protected]>
Gerrit-Reviewer: Physikerwelt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits