jenkins-bot has submitted this change and it was merged.

Change subject: Improve performance of UpdateMath.php
......................................................................


Improve performance of UpdateMath.php

* Speed up hook
* Skip MathML verification, when purge == false

Change-Id: I722d596b3785b273c85d5b5892f83ab6efb05b35
---
M MathSearch.hooks.php
M maintenance/GenerateFeatureTable.php
M maintenance/UpdateMath.php
3 files changed, 91 insertions(+), 73 deletions(-)

Approvals:
  Physikerwelt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php
index 9892e4a..d1196e3 100644
--- a/MathSearch.hooks.php
+++ b/MathSearch.hooks.php
@@ -91,19 +91,7 @@
                        if ( $exists ) {
                                wfDebugLog( "MathSearch", 'Index $' . $tex . '$ 
already in database.' );
                        } else {
-                               wfDebugLog( "MathSearch", 'Store index for $' . 
$tex . '$ in database' );
-                               $dbw = wfGetDB( DB_MASTER );
-                               $dbw->onTransactionIdle(
-                                       function () use ( $oldID, $eid, 
$inputHash, $dbw ) {
-                                               $dbw->replace( 'mathindex',
-                                                       array( 
'mathindex_revision_id', 'mathindex_anchor' ),
-                                                       array(
-                                                               
'mathindex_revision_id' => $oldID,
-                                                               
'mathindex_anchor' =>  $eid ,
-                                                               
'mathindex_inputhash' => $inputHash
-                                                       ) );
-                                       }
-                               );
+                               self::writeMathIndex( $oldID, $eid, $inputHash, 
$tex );
                        }
                } catch ( Exception $e ) {
                        wfDebugLog( "MathSearch", 'Problem writing to math 
index!'
@@ -197,9 +185,6 @@
         * @param null $Result
         * @param int $pid
         * @param int $eid
-        * @internal param $content
-        * @internal param $attributes
-        * @internal param \Parser $parser
         * @return boolean (true)
         */
        static function onMathFormulaRenderedNoLink( $Renderer, &$Result = 
null, $pid = 0, $eid = 0 ) {
@@ -230,4 +215,22 @@
        static function generateMathAnchorString($pageID, $anchorID, $prefix = 
"#"){
                return "{$prefix}math.$pageID.$anchorID";
        }
+
+       /**
+        * @param int    $oldID
+        * @param int    $eid
+        * @param string $inputHash
+        * @param string $tex
+        */
+       public static function writeMathIndex( $oldID, $eid, $inputHash, $tex ) 
{
+               wfDebugLog( "MathSearch", 'Store index for $' . $tex . '$ in 
database' );
+               $dbw = wfGetDB( DB_MASTER );
+               $dbw->onTransactionIdle( function () use ( $oldID, $eid, 
$inputHash, $dbw ) {
+                       $dbw->replace( 'mathindex', array( 
'mathindex_revision_id', 'mathindex_anchor' ), array(
+                                       'mathindex_revision_id' => $oldID,
+                                       'mathindex_anchor' => $eid,
+                                       'mathindex_inputhash' => $inputHash
+                               ) );
+               } );
+       }
 }
diff --git a/maintenance/GenerateFeatureTable.php 
b/maintenance/GenerateFeatureTable.php
index 59c75ad..5a6f471 100644
--- a/maintenance/GenerateFeatureTable.php
+++ b/maintenance/GenerateFeatureTable.php
@@ -21,7 +21,7 @@
 
 require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
 
-class UpdateMath extends Maintenance {
+class GenerateFeatureTable extends Maintenance {
        const RTI_CHUNK_SIZE = 100000;
        public $purge = false;
        public $dbw = null;
@@ -74,13 +74,7 @@
                }
                // $this->output( "Updated {$fcount} formulae!\n" );
        }
-       /**
-        * @param unknown $pId
-        * @param unknown $pText
-        * @param string $pTitle
-        * @param string $purge
-        * @return number
-        */
+
        private function doUpdate( $pid ) {
                // TODO: fix link id problem
                $anchorID = 0;
@@ -107,5 +101,6 @@
        }
 }
 
-$maintClass = "UpdateMath";
+$maintClass = "GenerateFeatureTable";
+/** @noinspection PhpIncludeInspection */
 require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/UpdateMath.php b/maintenance/UpdateMath.php
index e31926c..a0461fd 100644
--- a/maintenance/UpdateMath.php
+++ b/maintenance/UpdateMath.php
@@ -21,28 +21,28 @@
 
 require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
 
+/**
+ * Class UpdateMath
+ */
 class UpdateMath extends Maintenance {
        const RTI_CHUNK_SIZE = 100;
        public $purge = false;
        /** @var boolean */
        private $verbose;
        /** @var DatabaseBase */
-       public $dbw = null;
+       public $dbw;
+       /** @var DatabaseBase */
+       private $db;
        /** @var MathRenderer  */
        private $current;
-       private $time = 0;//microtime( true );
+       private $time = 0.0; // microtime( true );
        private $performance = array();
-       private $renderingMode =  MW_MATH_LATEXML;
+       private $renderingMode = 7; // MW_MATH_LATEXML
 
-       /**
-        * @var DatabaseBase
-        */
-       private $db;
        /**
         *
         */
        public function __construct() {
-               $this->verbose = $this->verbose;
                parent::__construct();
                $this->mDescription = 'Updates the index of Mathematical 
formulae.';
                $this->addOption( 'purge', "If set all formulae are rendered 
again without using caches. (Very time consuming!)", false, false, "f" );
@@ -50,13 +50,21 @@
                $this->addArg( 'max', "If set processing is stopped at the page 
with rank(pageID)<=max", false );
                $this->addOption( 'verbose', "If set output for successful 
rendering will produced",false,false,'v' );
                $this->addOption( 'SVG', "If set SVG images will be produced", 
false, false );
-               $this->addOption( 'hoooks', "If set hooks will be skipped", 
false, false );
+               $this->addOption( 'hoooks', "If set hooks will be skipped, but 
index will be updated.", false, false );
                $this->addOption( 'texvccheck', "If set texvccheck will be 
skipped", false, false );
                $this->addOption( 'mode' , 'Rendering mode to be used (0 = PNG, 
5= MathML, 7=MathML)',false,true,'m');
        }
-       private function time($category='default'){
+
+       /**
+        * Measures time in ms.
+        * In order to have a formula centric evaluation, we can not just the 
build in profiler
+        * @param string $category
+        *
+        * @return int
+        */
+       private function time( $category = 'default' ){
                global $wgMathDebug;
-               $delta = (microtime(true) - $this->time)*1000;
+               $delta = ( microtime( true ) - $this->time ) * 1000;
                if (isset ($this->performance[$category] ))
                        $this->performance[$category] += $delta;
                else
@@ -74,28 +82,34 @@
 
                return (int) $delta;
        }
+
        /**
         * Populates the search index with content from all pages
+        *
+        * @param int $n
+        * @param int $cMax
+        *
+        * @throws DBUnexpectedError
         */
-       protected function populateSearchIndex( $n = 0, $cmax = -1 ) {
+       protected function populateSearchIndex( $n = 0, $cMax = -1 ) {
                $res = $this->db->select( 'page', 'MAX(page_id) AS count' );
                $s = $this->db->fetchObject( $res );
                $count = $s->count;
-               if ( $cmax > 0 && $count > $cmax ) {
-                       $count = $cmax;
+               if ( $cMax > 0 && $count > $cMax ) {
+                       $count = $cMax;
                }
                $this->output( "Rebuilding index fields for {$count} pages with 
option {$this->purge}...\n" );
-               $fcount = 0;
-
+               $fCount = 0;
+               //return;
                while ( $n < $count ) {
                        if ( $n ) {
                                $this->output( $n . " of $count \n" );
                        }
-                       $end = $n + self::RTI_CHUNK_SIZE - 1;
+                       $end = min( $n + self::RTI_CHUNK_SIZE - 1, $count );
 
                        $res = $this->db->select( array( 'page', 'revision', 
'text' ),
-                                       array( 'page_id', 'page_namespace', 
'page_title', 'old_flags', 'old_text' ),
-                                       array( "page_id BETWEEN $n AND $end", 
'page_latest = rev_id', 'rev_text_id = old_id' ),
+                                       array( 'page_id', 'page_namespace', 
'page_title', 'old_flags', 'old_text', 'rev_id' ),
+                                       array( "rev_id BETWEEN $n AND $end", 
'page_latest = rev_id', 'rev_text_id = old_id' ),
                                        __METHOD__
                        );
                        $this->dbw->begin();
@@ -103,33 +117,32 @@
                        $i = $n;
                        foreach ( $res as $s ) {
                                echo "\np$i:";
-                               $revtext = Revision::getRevisionText( $s );
-                               $fcount += $this->doUpdate( $s->page_id, 
$revtext, $s->page_title);
+                               $revText = Revision::getRevisionText( $s );
+                               $fCount += $this->doUpdate( $s->page_id, 
$revText, $s->page_title, $s->rev_id );
                                $i++;
                        }
                        // echo "before" +$this->dbw->selectField('mathindex', 
'count(*)')."\n";
                        $start = microtime( true );
                        $this->dbw->commit();
                        echo " committed in " . ( microtime( true ) -$start ) . 
"s\n\n";
-                       var_export($this->performance);
+                       var_dump($this->performance);
                        // echo "after" +$this->dbw->selectField('mathindex', 
'count(*)')."\n";
                        $n += self::RTI_CHUNK_SIZE;
                }
-               $this->output( "Updated {$fcount} formulae!\n" );
+               $this->output( "Updated {$fCount} formulae!\n" );
        }
 
        /**
-        * @param $pid
-        * @param unknown $pText
-        * @param string $pTitle
-        * @internal param unknown $pId
-        * @internal param string $purge
+        * @param int     $pid
+        * @param string  $pText
+        * @param string  $pTitle
+        * @param int     $revId
+        *
         * @return number
         */
-       private function doUpdate( $pid, $pText, $pTitle = "") {
+       private function doUpdate( $pid, $pText, $pTitle = "", $revId = 0) {
                $notused = '';
-               // TODO: fix link id problem
-               $anchorID = 0;
+               $eId = 0;
                $math = MathObject::extractMathTagsFromWikiText( $pText );
                $matches = sizeof( $math );
                if ( $matches ) {
@@ -146,19 +159,23 @@
                                        $this->time("checkTex");
                                }
                                if ( $checked ) {
-                                       $renderer->render( $this->purge );
-                                       if( $renderer->getMathml() ){
-                                               $this->time("Rendering");
-                                       } else {
-                                               $this->time("Failing");
-                                       }
-                                       if ( $this->getOption( "SVG", false ) ) 
{
-                                               $svg = $renderer->getSvg();
-                                               if ( $svg ) {
-                                                       $this->time( 
"SVG-Rendering" );
+                                       if( ! $renderer->isInDatabase() || 
$this->purge ) {
+                                               $renderer->render( $this->purge 
);
+                                               if( $renderer->getMathml() ){
+                                                       $this->time("render");
                                                } else {
-                                                       $this->time( "SVG-Fail" 
);
+                                                       $this->time("Failing");
                                                }
+                                               if ( $this->getOption( "SVG", 
false ) ) {
+                                                       $svg = 
$renderer->getSvg();
+                                                       if ( $svg ) {
+                                                               $this->time( 
"SVG-Rendering" );
+                                                       } else {
+                                                               $this->time( 
"SVG-Fail" );
+                                                       }
+                                               }
+                                       } else {
+                                               $this->time('checkInDB');
                                        }
                                } else {
                                        $this->time("checkTex-Fail");
@@ -166,15 +183,19 @@
                                        continue;
                                }
                                if ( ! $this->getOption( "hooks", false ) ) {
-                                       wfRunHooks( 'MathFormulaRendered', 
array( &$renderer, &$notused, $pid, $anchorID ) );
+                                       wfRunHooks( 'MathFormulaRendered', 
array( &$renderer, &$notused, $pid, $eId ) );
                                        $this->time( "hooks" );
-                                       $anchorID++;
+                                       $eId++;
+                               } else {
+                                       MathSearchHooks::writeMathIndex( 
$revId, $eId, $renderer->getInputHash(), '' );
+                                       $this->time( "index" );
+                                       $eId++;
                                }
                                $renderer->writeCache($this->dbw);
                                $this->time("write Cache");
                                if ( $renderer->getLastError() ) {
                                        echo "\n\t\t". 
$renderer->getLastError() ;
-                                       echo "\nF:\t\t".$renderer->getMd5()." 
equation " . ( $anchorID -1 ) .
+                                       echo "\nF:\t\t".$renderer->getMd5()." 
equation " . ( $eId -1 ) .
                                                "-failed beginning with\n\t\t'" 
. substr( $formula, 0, 100 )
                                                . "'\n\t\tmathml:" . 
substr($renderer->getMathml(),0,10) ."\n ";
                                } else{
@@ -187,9 +208,7 @@
                }
                return 0;
        }
-       /**
-        *
-        */
+
        public function execute() {
                global $wgMathValidModes;
                $this->dbw = wfGetDB( DB_MASTER );
@@ -205,4 +224,5 @@
 }
 
 $maintClass = "UpdateMath";
+/** @noinspection PhpIncludeInspection */
 require_once( RUN_MAINTENANCE_IF_MAIN );

-- 
To view, visit https://gerrit.wikimedia.org/r/185676
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I722d596b3785b273c85d5b5892f83ab6efb05b35
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <[email protected]>
Gerrit-Reviewer: Physikerwelt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to