Physikerwelt has uploaded a new change for review.
https://gerrit.wikimedia.org/r/186633
Change subject: Reformat maintenance scripts
......................................................................
Reformat maintenance scripts
Change-Id: I39cc729f79b7045e712b4079d8497d065dc5fdad
---
M maintenance/BatchExport.php
M maintenance/BatchImport.php
M maintenance/CalculateDistances.php
M maintenance/CleanMathTable.php
M maintenance/CreateBaseXMathTable.php
M maintenance/CreateDB2MathTable.php
M maintenance/CreateMWSHarvest.php
M maintenance/ExtractFeatures.php
M maintenance/GenerateFeatureTable.php
M maintenance/GenerateWorkload.php
M maintenance/IndexBase.php
M maintenance/MathMLFilter.php
M maintenance/UpdateMath.php
M maintenance/batch.sh
M maintenance/ibm_driver.sh
15 files changed, 452 insertions(+), 266 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MathSearch
refs/changes/33/186633/1
diff --git a/maintenance/BatchExport.php b/maintenance/BatchExport.php
index 2305a1f..1ef82cb 100644
--- a/maintenance/BatchExport.php
+++ b/maintenance/BatchExport.php
@@ -21,13 +21,17 @@
require_once( __DIR__ . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class BatchExport
+ */
class BatchExport extends Maintenance {
/**
*
*/
public function __construct() {
parent::__construct();
- $this->mDescription = "Exports submissions to a folder. \n
Each run is named after the following convention: \n
\$userName-\$runName-\$runId.csv";
+ $this->mDescription =
+ "Exports submissions to a folder. \n Each run is named
after the following convention: \n \$userName-\$runName-\$runId.csv";
$this->addArg( "dir", "The output directory", true );
}
@@ -36,9 +40,9 @@
*/
public function execute() {
$dir = $this->getArg( 0 );
- if ( ! is_dir($dir) ){
- $this->output("{$dir} is not a directory.\n");
- exit(1);
+ if ( !is_dir( $dir ) ) {
+ $this->output( "{$dir} is not a directory.\n" );
+ exit( 1 );
}
$dbr = wfGetDB( DB_SLAVE );
//runId INT PRIMARY KEY AUTO_INCREMENT NOT NULL,
@@ -50,10 +54,10 @@
foreach ( $res as $row ) {
$user = User::newFromId( $row->userId );
$username = $user->getName();
- $runName = preg_replace( "#/#","_", escapeSingleString(
$row->runName ));
+ $runName = preg_replace( "#/#", "_",
escapeSingleString( $row->runName ) );
$fn = "$dir/$username-$runName-{$row->runId}.csv";
- $this->output("Export to file $fn.\n");
- $fh = fopen( $fn, 'w' );
+ $this->output( "Export to file $fn.\n" );
+ $fh = fopen( $fn, 'w' );
fwrite( $fh, SpecialMathDownloadResult::run2CSV(
$row->runId ) );
fclose( $fh );
}
@@ -61,4 +65,5 @@
}
$maintClass = "BatchExport";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/BatchImport.php b/maintenance/BatchImport.php
index 63fb79f..7b641a8 100644
--- a/maintenance/BatchImport.php
+++ b/maintenance/BatchImport.php
@@ -21,22 +21,23 @@
require_once( __DIR__ . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class BatchImport
+ */
class BatchImport extends Maintenance {
private $dir;
private $overwrite;
/**
- * @var DatabaseBase
- */
- private $db;
- /**
*
*/
public function __construct() {
parent::__construct();
- $this->mDescription = "Batch imports submissions from a folder.
\n Processes CSV files that follow the naming convention: \n
\$userName-\$runName.csv";
+ $this->mDescription =
+ "Batch imports submissions from a folder. \n Processes
CSV files that follow the naming convention: \n \$userName-\$runName.csv";
$this->addArg( "dir", "The directory to be read", true );
- $this->addOption( "overwrite" , "Overwrite existing runs with
the same name.", false, false, "o" );
+ $this->addOption( "overwrite", "Overwrite existing runs with
the same name.", false, false,
+ "o" );
}
/**
@@ -45,32 +46,34 @@
public function execute() {
$this->dir = $this->getArg( 0 );
$this->overwrite = $this->getOption( 'overwrite' );
- if( $this->overwrite ){
+ if ( $this->overwrite ) {
$this->output( "Loaded with option overwrite enabled
.\n" );
}
- if ( ! is_dir($this->dir) ){
- $this->output("{$this->dir} is not a directory.\n");
- exit(1);
+ if ( !is_dir( $this->dir ) ) {
+ $this->output( "{$this->dir} is not a directory.\n" );
+ exit( 1 );
}
- $files = new GlobIterator($this->dir."/*-*.csv");
+ $files = new GlobIterator( $this->dir . "/*-*.csv" );
foreach ( $files as $file ) {
$fn = $file->getFilename();
- if ( preg_match(
"/(?P<user>.*?)-(?P<runName>.*?)\\.csv/", $fn,$matches) ){
+ if ( preg_match(
"/(?P<user>.*?)-(?P<runName>.*?)\\.csv/", $fn, $matches ) ) {
$user = User::newFromName( $matches['user'] );
- if( $user->getId() > 0 ){
- $this->output("Importing filename $fn
for userId {$user->getId()}.\n");
- $importer = new ImportCsv($user);
- $result = $importer->execute(
fopen($file,'r'), $matches['runName'], $this->overwrite );
- foreach( $importer->getWarnings() as
$warning){
- $this->output("warning:
$warning \n");
+ if ( $user->getId() > 0 ) {
+ $this->output( "Importing filename $fn
for userId {$user->getId()}.\n" );
+ $importer = new ImportCsv( $user );
+ $result =
+ $importer->execute( fopen(
$file, 'r' ), $matches['runName'],
+ $this->overwrite );
+ foreach ( $importer->getWarnings() as
$warning ) {
+ $this->output( "warning:
$warning \n" );
}
- if ( $result !== true ){
- $this->output("$result\n");
+ if ( $result !== true ) {
+ $this->output( "$result\n" );
} else {
$this->output( "File $fn
imported as {$importer->getRunId()} \n" );
}
} else {
- $this->output("User {$matches['user']}
is invalid. Skipping file $fn.\n");
+ $this->output( "User {$matches['user']}
is invalid. Skipping file $fn.\n" );
}
}
}
@@ -78,4 +81,5 @@
}
$maintClass = "BatchImport";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/CalculateDistances.php
b/maintenance/CalculateDistances.php
index 20aa23d..2477a67 100644
--- a/maintenance/CalculateDistances.php
+++ b/maintenance/CalculateDistances.php
@@ -21,6 +21,9 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class CalculateDistances
+ */
class CalculateDistances extends Maintenance {
const RTI_CHUNK_SIZE = 100;
/**@var DatabaseBase $dbw */
@@ -38,7 +41,8 @@
public function __construct() {
parent::__construct();
$this->mDescription = 'Outputs page text to stdout';
- $this->addOption( 'page9', 'Ignore pages with only 9 equations
or less.', false, false, "9" );
+ $this->addOption( 'page9', 'Ignore pages with only 9 equations
or less.', false, false,
+ "9" );
$this->addArg( 'min', "If set processing is started at the page
with curid>min", false );
$this->addArg( 'max', "If set processing is stopped at the page
with curid<=max", false );
}
@@ -57,10 +61,14 @@
$conds .= " AND pagestat_pageid <= $max";
}
if ( $this->getOption( 'page9', false ) ) {
- $res = $this->db->select( array( 'mathpage9' ,
'mathpagestat'), array( 'page_id' ,'pagestat_pageid') ,
- $conds . ' AND pagestat_pageid = page_id',
__METHOD__, array( 'DISTINCT' ) );
+ $res =
+ $this->db->select( array( 'mathpage9',
'mathpagestat' ),
+ array( 'page_id', 'pagestat_pageid' ),
+ $conds . ' AND pagestat_pageid =
page_id', __METHOD__, array( 'DISTINCT' ) );
} else {
- $res = $this->db->select( 'mathpagestat',
'pagestat_pageid', $conds, __METHOD__, array( 'DISTINCT' ) );
+ $res =
+ $this->db->select( 'mathpagestat',
'pagestat_pageid', $conds, __METHOD__,
+ array( 'DISTINCT' ) );
}
foreach ( $res as $row ) {
array_push( $this->pagelist, $row->pagestat_pageid );
@@ -72,22 +80,23 @@
/**
* Populates the search index with content from all pages
*/
- protected function populateSearchIndex( ) {
+ protected function populateSearchIndex() {
$n = 0;
- $count = sizeof($this->pagelist);
+ $count = sizeof( $this->pagelist );
$this->output( "Rebuilding index fields for $count pages...\n"
);
while ( $n < $count ) {
if ( $n ) {
$this->output( $n . " of $count \n" );
}
- $this->dbw->begin();
- for($j=0;$j<self::RTI_CHUNK_SIZE;$j++){
+ $this->dbw->begin();
+ for ( $j = 0; $j < self::RTI_CHUNK_SIZE; $j ++ ) {
//TODO: USE PREPARED STATEMENTS
$pid = $this->pagelist[$n];
- $sql = "INSERT IGNORE INTO
mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n"
- . "SELECT DISTINCT
$pid,`pagestat_pageid`,\n"
- . "CosProd( $pid,`pagestat_pageid`)
FROM `mathpagestat` m ";
- if ( $this->getOption( 'page9', false ) ){
+ $sql =
+ "INSERT IGNORE INTO
mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" .
+ "SELECT DISTINCT
$pid,`pagestat_pageid`,\n" .
+ "CosProd( $pid,`pagestat_pageid`) FROM
`mathpagestat` m ";
+ if ( $this->getOption( 'page9', false ) ) {
$sql .= " JOIN (SELECT page_id from
mathpage9) as r WHERE m.pagestat_pageid=r.page_id AND ";
} else {
$sql .= " WHERE ";
@@ -97,7 +106,7 @@
$start = microtime( true );
$this->dbw->query( $sql );
echo 'done in ' . ( microtime( true ) - $start
) . "\n";
- $n++;
+ $n ++;
}
$start = microtime( true );
$this->dbw->commit();
@@ -107,4 +116,5 @@
}
$maintClass = "CalculateDistances";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/CleanMathTable.php b/maintenance/CleanMathTable.php
index b80f200..10e28a7 100644
--- a/maintenance/CleanMathTable.php
+++ b/maintenance/CleanMathTable.php
@@ -21,6 +21,9 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class CleanMathTable
+ */
class CleanMathTable extends Maintenance {
const RTI_CHUNK_SIZE = 10;
public $purge = false;
@@ -29,14 +32,18 @@
* @var DatabaseBase
*/
private $db;
+
/**
*
*/
public function __construct() {
parent::__construct();
$this->mDescription = 'Outputs page text to stdout';
- $this->addOption( 'purge', "If set all formulae are rendered
again from strech. (Very time consuming!)", false, false, "f" );
+ $this->addOption( 'purge',
+ "If set all formulae are rendered again from strech.
(Very time consuming!)", false,
+ false, "f" );
}
+
/**
* The idea is basically to select the math elements that do not have a
corresponding mathindex entry.
* Basically that means:
@@ -52,4 +59,5 @@
}
$maintClass = "CleanMathTable";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/CreateBaseXMathTable.php
b/maintenance/CreateBaseXMathTable.php
index 6e92ca0..ca13235 100644
--- a/maintenance/CreateBaseXMathTable.php
+++ b/maintenance/CreateBaseXMathTable.php
@@ -31,7 +31,7 @@
private static $mwsns = "mws:";
private static $XMLHead;
private static $XMLFooter;
- /** @var \BaseXSession */
+ /** @var \BaseXSession */
private $session;
/**
@@ -39,28 +39,31 @@
*/
public function __construct() {
parent::__construct();
- $this->mDescription = 'Generates harvest files for the
MathWebSearch Deamon.';
+ $this->mDescription = 'Generates harvest files for the
MathWebSearch Daemon.';
$this->addOption( 'mwsns', 'The namespace or mws normally
"mws:"', false );
$this->addOption( 'truncate', 'If set the database will be
recreated.' );
}
/**
- * @param unknown $row
+ * @param stdClass $row
+ *
* @return string
*/
protected function generateIndexString( $row ) {
$out = "";
- $xml = simplexml_load_string( utf8_decode($row->math_mathml) );
+ $xml = simplexml_load_string( utf8_decode( $row->math_mathml )
);
if ( !$xml ) {
echo "ERROR while converting:\n " . var_export(
$row->math_mathml, true ) . "\n";
- foreach ( libxml_get_errors() as $error )
+ foreach ( libxml_get_errors() as $error ) {
echo "\t", $error->message;
+ }
libxml_clear_errors();
return "";
}
$out .= "\n<" . self::$mwsns . "expr url=\"" .
- MathSearchHooks::generateMathAnchorString(
$row->mathindex_revision_id, $row->mathindex_anchor, '' ) . "\">\n\t";
- $out .= utf8_decode( $row->math_mathml );//
$xml->math->children()->asXML();
+ MathSearchHooks::generateMathAnchorString(
$row->mathindex_revision_id,
+ $row->mathindex_anchor, '' ) .
"\">\n\t";
+ $out .= utf8_decode( $row->math_mathml );//
$xml->math->children()->asXML();
$out .= "\n</" . self::$mwsns . "expr>\n";
// TODO: This does not work yet.
// Find out how to insert new data without to write it into a
temporary file
@@ -68,43 +71,51 @@
return $out;
}
- protected function getHead(){
+ protected function getHead() {
return self::$XMLHead;
}
- protected function getFooter(){
+
+ protected function getFooter() {
return self::$XMLFooter;
}
+
/**
* @param string $fn
- * @param int $min
- * @param int $inc
+ * @param int $min
+ * @param int $inc
+ *
* @return boolean
*/
protected function wFile( $fn, $min, $inc ) {
- $retval = parent::wFile($fn,$min,$inc);
- $this->session->execute("add $fn");
+ $retval = parent::wFile( $fn, $min, $inc );
+ $this->session->execute( "add $fn" );
return $retval;
}
+
/**
*
*/
public function execute() {
global $wgMathSearchBaseXDatabaseName;
self::$mwsns = $this->getOption( 'mwsns', '' );
- self::$XMLHead = "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
"harvest xmlns:mws=\"http://search.mathweb.org/ns\"
xmlns:m=\"http://www.w3.org/1998/Math/MathML\">";
+ self::$XMLHead =
+ "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
+ "harvest xmlns:mws=\"http://search.mathweb.org/ns\"
xmlns:m=\"http://www.w3.org/1998/Math/MathML\">";
self::$XMLFooter = "</" . self::$mwsns . "harvest>";
$this->session = new BaseXSession();
- if( $this->getOption('truncate',false) ){
- $this->session->execute("open
".$wgMathSearchBaseXDatabaseName);
+ if ( $this->getOption( 'truncate', false ) ) {
+ $this->session->execute( "open " .
$wgMathSearchBaseXDatabaseName );
} else {
- $this->session->execute("create db
".$wgMathSearchBaseXDatabaseName);
+ $this->session->execute( "create db " .
$wgMathSearchBaseXDatabaseName );
}
parent::execute();
}
- public function __destruct(){
+
+ public function __destruct() {
$this->session->close();
}
}
$maintClass = "CreateBaseXMathTable";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/CreateDB2MathTable.php
b/maintenance/CreateDB2MathTable.php
index 3b3f008..11bd92f 100644
--- a/maintenance/CreateDB2MathTable.php
+++ b/maintenance/CreateDB2MathTable.php
@@ -30,6 +30,9 @@
private $conn;
private $time;
+ /**
+ *
+ */
public function __construct() {
parent::__construct();
$this->mDescription = 'Exports a db2 compatible math index
table.';
@@ -37,44 +40,62 @@
}
/**
- * @param unknown $row
+ * @param stdClass $row
+ *
* @return string
*/
protected function generateIndexString( $row ) {
- $mo = MathObject::constructformpagerow($row);
- $out = '"'. $mo->getMd5().'"';
- $out .= ',"'. $mo->getTex().'"';
- $out .= ','. $row->mathindex_revision_id .'';
- $out .= ','. $row->mathindex_anchor.'';
- $out .= ',"'.str_replace(array('"',"\n"),array('"',' '),
$mo->getMathml()).'"';
- $res = db2_execute($this->statment,
array($mo->getMd5(),$mo->getTex(),$row->mathindex_revision_id,$row->mathindex_anchor,$mo->getMathml()));
- if ( ! $res ){
+ $mo = MathObject::constructformpagerow( $row );
+ $out = '"' . $mo->getMd5() . '"';
+ $out .= ',"' . $mo->getTex() . '"';
+ $out .= ',' . $row->mathindex_revision_id . '';
+ $out .= ',' . $row->mathindex_anchor . '';
+ $out .= ',"' . str_replace( array( '"', "\n" ), array( '"', ' '
), $mo->getMathml() ) . '"';
+ $res =
+ db2_execute( $this->statment, array(
+ $mo->getMd5(),
+ $mo->getTex(),
+ $row->mathindex_revision_id,
+ $row->mathindex_anchor,
+ $mo->getMathml()
+ ) );
+ if ( !$res ) {
echo db2_stmt_errormsg();
}
- return $out."\n";
+ return $out . "\n";
}
+ /**
+ * @param string $fn
+ * @param int $min
+ * @param int $inc
+ *
+ * @return bool
+ */
protected function wFile( $fn, $min, $inc ) {
- $res = db2_commit($this->conn);
- if ( $res ){
- echo db2_stmt_errormsg();
+ $res = db2_commit( $this->conn );
+ if ( $res ) {
+ echo db2_stmt_errormsg();
+ }
+ $delta = microtime( true ) - $this->time;
+ $this->time = microtime( true );
+ echo 'took ' . number_format( $delta, 1 ) . "s \n";
+ return parent::wFile( $fn, $min, $inc );
}
- $delta = microtime(true) - $this->time ;
- $this->time = microtime(true);
- echo 'took '. number_format($delta ,1) ."s \n";
- return parent::wFile( $fn, $min, $inc );
-}
public function execute() {
global $wgMathSearchDB2ConnStr;
- $this->time = microtime(true);
- $this->conn = db2_connect($wgMathSearchDB2ConnStr, '', '');
- if ( $this->conn ){
- if ( $this->getOption('truncate' , false ) ){
- db2_exec( $this->conn , 'DROP TABLE "math"');
- db2_exec( $this->conn , 'CREATE TABLE "math"
("math_md5" CHAR(32), "math_tex" VARCHAR(1000), "mathindex_revision_id"
INTEGER, "mathindex_anchord" INTEGER, "math_mathml" XML)');
+ $this->time = microtime( true );
+ $this->conn = db2_connect( $wgMathSearchDB2ConnStr, '', '' );
+ if ( $this->conn ) {
+ if ( $this->getOption( 'truncate', false ) ) {
+ db2_exec( $this->conn, 'DROP TABLE "math"' );
+ db2_exec( $this->conn,
+ 'CREATE TABLE "math" ("math_md5"
CHAR(32), "math_tex" VARCHAR(1000), "mathindex_revision_id" INTEGER,
"mathindex_anchord" INTEGER, "math_mathml" XML)' );
}
- $this->statment = db2_prepare( $this->conn ,'insert
into "math" ("math_md5", "math_tex", "mathindex_revision_id",
"mathindex_anchord", "math_mathml") values(?, ?, ?, ?, ?)');
+ $this->statment =
+ db2_prepare( $this->conn,
+ 'INSERT INTO "math" ("math_md5",
"math_tex", "mathindex_revision_id", "mathindex_anchord", "math_mathml")
VALUES(?, ?, ?, ?, ?)' );
//db2_autocommit($this->conn , DB2_AUTOCOMMIT_OFF);
}
parent::execute();
@@ -82,4 +103,5 @@
}
$maintClass = "CreateDB2MathTable";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/CreateMWSHarvest.php b/maintenance/CreateMWSHarvest.php
index 38e5ac1..ed9d262 100644
--- a/maintenance/CreateMWSHarvest.php
+++ b/maintenance/CreateMWSHarvest.php
@@ -37,32 +37,34 @@
*/
public function __construct() {
parent::__construct();
- $this->mDescription = 'Generates harvest files for the
MathWebSearch Deamon.';
+ $this->mDescription = 'Generates harvest files for the
MathWebSearch Deacon.';
$this->addOption( 'mwsns', 'The namespace or mws normally
"mws:"', false );
}
/**
- * @param unknown $row
+ * @param stdClass $row
+ *
* @return string
*/
protected function generateIndexString( $row ) {
$out = "";
- $xml = simplexml_load_string( utf8_decode($row->math_mathml) );
+ $xml = simplexml_load_string( utf8_decode( $row->math_mathml )
);
if ( !$xml ) {
echo "ERROR while converting:\n " . var_export(
$row->math_mathml, true ) . "\n";
- foreach ( libxml_get_errors() as $error )
+ foreach ( libxml_get_errors() as $error ) {
echo "\t", $error->message;
+ }
libxml_clear_errors();
return "";
}
// if ( $xml->math ) {
- // $smath = $xml->math->semantics-> { 'annotation-xml'
} ->children()->asXML();
+ // $smath = $xml->math->semantics-> { 'annotation-xml' }
->children()->asXML();
$out .= "\n<" . self::$mwsns . "expr url=\"" .
- MathSearchHooks::generateMathAnchorString(
$row->mathindex_revision_id, $row->mathindex_anchor, '' ) .
- "\">\n\t";
- $out .= utf8_decode( $row->math_mathml );//
$xml->math->children()->asXML();
- $out .= "\n</" . self::$mwsns . "expr>\n";
- return $out;
+ MathSearchHooks::generateMathAnchorString(
$row->mathindex_revision_id,
+ $row->mathindex_anchor, '' ) .
"\">\n\t";
+ $out .= utf8_decode( $row->math_mathml );//
$xml->math->children()->asXML();
+ $out .= "\n</" . self::$mwsns . "expr>\n";
+ return $out;
/*} else {
var_dump($xml);
die("nomath");
@@ -70,22 +72,27 @@
}
- protected function getHead(){
+ protected function getHead() {
return self::$XMLHead;
}
- protected function getFooter(){
+
+ protected function getFooter() {
return self::$XMLFooter;
}
+
/**
*
*/
public function execute() {
self::$mwsns = $this->getOption( 'mwsns', '' );
- self::$XMLHead = "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
"harvest xmlns:mws=\"http://search.mathweb.org/ns\"
xmlns:m=\"http://www.w3.org/1998/Math/MathML\">";
+ self::$XMLHead =
+ "<?xml version=\"1.0\"?>\n<" . self::$mwsns .
+ "harvest xmlns:mws=\"http://search.mathweb.org/ns\"
xmlns:m=\"http://www.w3.org/1998/Math/MathML\">";
self::$XMLFooter = "</" . self::$mwsns . "harvest>";
parent::execute();
}
}
$maintClass = "CreateMWSHarvest";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/ExtractFeatures.php b/maintenance/ExtractFeatures.php
index 838d775..17aca4c 100644
--- a/maintenance/ExtractFeatures.php
+++ b/maintenance/ExtractFeatures.php
@@ -21,29 +21,42 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class ExtractFeatures
+ */
class ExtractFeatures extends Maintenance {
const RTI_CHUNK_SIZE = 100;
public $purge = false;
+ /** @type DatabaseMysql */
public $dbw = null;
/**
* @var DatabaseBase
*/
private $db;
+
/**
*
*/
public function __construct() {
parent::__construct();
$this->mDescription = 'Outputs page text to stdout';
- $this->addOption( 'purge', "If set all formulae are rendered
again from strech. (Very time consuming!)", false, false, "f" );
- $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min", false );
- $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max", false );
+ $this->addOption( 'purge',
+ "If set all formulae are rendered again from strech.
(Very time consuming!)", false,
+ false, "f" );
+ $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min",
+ false );
+ $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max",
+ false );
}
+
/**
* Populates the search index with content from all pages
+ *
+ * @param int $n
+ * @param int $cmax
*/
- protected function populateSearchIndex( $n = 0, $cmax = -1 ) {
+ protected function populateSearchIndex( $n = 0, $cmax = - 1 ) {
$res = $this->db->select( 'page', 'MAX(page_id) AS count' );
$s = $this->db->fetchObject( $res );
$count = $s->count;
@@ -59,21 +72,25 @@
}
$end = $n + self::RTI_CHUNK_SIZE - 1;
- $res = $this->db->select( array( 'page', 'revision',
'text' ),
+ $res =
+ $this->db->select( array( 'page', 'revision',
'text' ),
array( 'page_id', 'page_namespace',
'page_title', 'old_flags', 'old_text' ),
- array( "page_id BETWEEN $n AND $end",
'page_latest = rev_id', 'rev_text_id = old_id' ),
- __METHOD__
- );
+ array(
+ "page_id BETWEEN $n AND $end",
+ 'page_latest = rev_id',
+ 'rev_text_id = old_id'
+ ), __METHOD__ );
$this->dbw->begin();
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
foreach ( $res as $s ) {
$revtext = Revision::getRevisionText( $s );
- $fcount += self::doUpdate( $s->page_id,
$revtext, $s->page_title, $this->purge, $this->dbw );
+ $fcount += self::doUpdate( $s->page_id,
$revtext, $s->page_title, $this->purge,
+ $this->dbw );
}
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$start = microtime( true );
$this->dbw->commit();
- echo " committed in " . ( microtime( true ) -$start ) .
"s\n\n";
+ echo " committed in " . ( microtime( true ) - $start )
. "s\n\n";
// echo "after" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$n += self::RTI_CHUNK_SIZE;
}
@@ -81,35 +98,40 @@
$sql = "TRUNCATE TABLE `mathvarstat`";
$this->dbw->query( $sql );
$this->output( "Generate mathvarstat\n" );
- $sql = "INSERT INTO `mathvarstat` (`varstat_featurename` ,
`varstat_featuretype`, `varstat_featurecount`)\n"
- . "SELECT `mathobservation_featurename` ,
`mathobservation_featuretype` , count( * ) AS CNT\n"
- . "FROM `mathobservation`\n"
- . "JOIN mathindex ON
`mathobservation_inputhash` = mathindex_inputhash\n"
- . "GROUP BY
`mathobservation_featurename` , `mathobservation_featuretype`\n"
-
. "ORDER BY CNT DESC";
+ $sql =
+ "INSERT INTO `mathvarstat` (`varstat_featurename` ,
`varstat_featuretype`, `varstat_featurecount`)\n" .
+ "SELECT `mathobservation_featurename` ,
`mathobservation_featuretype` , count( * ) AS CNT\n" .
+ "FROM `mathobservation`\n" .
+ "JOIN mathindex ON `mathobservation_inputhash` =
mathindex_inputhash\n" .
+ "GROUP BY `mathobservation_featurename` ,
`mathobservation_featuretype`\n" .
+ "ORDER BY CNT DESC";
$this->dbw->query( $sql );
$this->output( "Clear mathpagestat\n" );
$sql = "TRUNCATE TABLE `mathpagestat`";
$this->dbw->query( $sql );
$this->output( "Generate mathpagestat\n" );
- $sql = "INSERT INTO
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n"
- . "SELECT varstat_id, mathindex_revision_id,
count(*) as CNT FROM `mathobservation` JOIN mathindex on
`mathobservation_inputhash` =mathindex_inputhash\n"
- . "JOIN mathvarstat on varstat_featurename =
`mathobservation_featurename` and varstat_featuretype =
`mathobservation_featuretype`\n"
- . " GROUP by
`mathobservation_featurename`,
`mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC";
+ $sql =
+ "INSERT INTO
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n"
.
+ "SELECT varstat_id, mathindex_revision_id, count(*) AS
CNT FROM `mathobservation` JOIN mathindex ON `mathobservation_inputhash`
=mathindex_inputhash\n" .
+ "JOIN mathvarstat ON varstat_featurename =
`mathobservation_featurename` AND varstat_featuretype =
`mathobservation_featuretype`\n" .
+ " GROUP BY `mathobservation_featurename`,
`mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC";
$this->dbw->query( $sql );
$this->output( "Updated {$fcount} formulae!\n" );
}
+
/**
- * @param unknown $pId
- * @param unknown $pText
- * @param string $pTitle
- * @param string $purge
+ * @param $pid
+ * @param string $pText
+ * @param string $pTitle
+ * @param bool|string $purge
+ * @param $dbw
+ *
* @return number
+ * @internal param unknown $pId
*/
- private static function doUpdate( $pid, $pText, $pTitle = "", $purge =
false , $dbw ) {
+ private static function doUpdate( $pid, $pText, $pTitle = "", $purge =
false, $dbw ) {
// TODO: fix link id problem
$anchorID = 0;
- $res = "";
$math = MathObject::extractMathTagsFromWikiText( $pText );
$matches = sizeof( $math );
if ( $matches ) {
@@ -118,12 +140,13 @@
$mo = new MathObject( $formula[1] );
$mo->updateObservations( $dbw );
// Enable indexing of math formula
- $anchorID++;
+ $anchorID ++;
}
return $matches;
}
return 0;
}
+
/**
*
*/
@@ -132,9 +155,10 @@
$this->purge = $this->getOption( "purge", false );
$this->db = wfGetDB( DB_MASTER );
$this->output( "Done.\n" );
- $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, -1 ) );
+ $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, - 1 ) );
}
}
$maintClass = "ExtractFeatures";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/GenerateFeatureTable.php
b/maintenance/GenerateFeatureTable.php
index 59c75ad..ff72f37 100644
--- a/maintenance/GenerateFeatureTable.php
+++ b/maintenance/GenerateFeatureTable.php
@@ -21,29 +21,42 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
-class UpdateMath extends Maintenance {
+/**
+ * Class UpdateMath
+ */
+class GenerateFeatureTable extends Maintenance {
const RTI_CHUNK_SIZE = 100000;
public $purge = false;
+ /** @type DatabaseMysql */
public $dbw = null;
/**
* @var DatabaseBase
*/
private $db;
+
/**
*
*/
public function __construct() {
parent::__construct();
$this->mDescription = 'Outputs page text to stdout';
- $this->addOption( 'purge', "If set all formulae are rendered
again from strech. (Very time consuming!)", false, false, "f" );
- $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min", false );
- $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max", false );
+ $this->addOption( 'purge',
+ "If set all formulae are rendered again from strech.
(Very time consuming!)", false,
+ false, "f" );
+ $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min",
+ false );
+ $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max",
+ false );
}
+
/**
* Populates the search index with content from all pages
+ *
+ * @param int $n
+ * @param int $cmax
*/
- protected function populateSearchIndex( $n = 0, $cmax = -1 ) {
+ protected function populateSearchIndex( $n = 0, $cmax = - 1 ) {
$res = $this->db->select( 'page', 'MAX(page_id) AS count' );
$s = $this->db->fetchObject( $res );
$count = $s->count;
@@ -59,11 +72,12 @@
}
$end = $n + self::RTI_CHUNK_SIZE - 1;
- $res = $this->db->select( array( 'page', 'revision',
'text' ),
- array( 'page_id' ),
- array( "page_id BETWEEN $n AND $end",
'page_latest = rev_id', 'rev_text_id = old_id' ),
- __METHOD__
- );
+ $res =
+ $this->db->select( array( 'page', 'revision',
'text' ), array( 'page_id' ), array(
+ "page_id BETWEEN $n AND $end",
+ 'page_latest = rev_id',
+ 'rev_text_id = old_id'
+ ), __METHOD__ );
$this->dbw->begin();
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
foreach ( $res as $s ) {
@@ -74,28 +88,40 @@
}
// $this->output( "Updated {$fcount} formulae!\n" );
}
+
/**
- * @param unknown $pId
- * @param unknown $pText
- * @param string $pTitle
- * @param string $purge
+ * @param $pid
+ *
* @return number
+ * @internal param unknown $pId
+ * @internal param unknown $pText
+ * @internal param string $pTitle
+ * @internal param string $purge
*/
private function doUpdate( $pid ) {
// TODO: fix link id problem
- $anchorID = 0;
- $res = $this->db->select( array( 'mathpagestat', 'mathvarstat'
),
- array( 'pagestat_pageid',
'pagestat_featurename', 'pagestat_featuretype', 'pagestat_featurecount',
'varstat_id', 'varstat_featurecount' ),
- array( 'pagestat_pageid' => $pid,
'pagestat_featurename = varstat_featurename',
'pagestat_featuretype=varstat_featuretype' ),
- __METHOD__
- );
+ $res =
+ $this->db->select( array( 'mathpagestat', 'mathvarstat'
), array(
+ 'pagestat_pageid',
+ 'pagestat_featurename',
+ 'pagestat_featuretype',
+ 'pagestat_featurecount',
+ 'varstat_id',
+ 'varstat_featurecount'
+ ), array(
+ 'pagestat_pageid' => $pid,
+ 'pagestat_featurename =
varstat_featurename',
+
'pagestat_featuretype=varstat_featuretype'
+ ), __METHOD__ );
foreach ( $res as $row ) {
$this->output( $pid . ',' . $row->varstat_id . ',' .
$row->pagestat_featurecount
- /// $row->varstat_featurecount
- . "\n" );//
.';'.$row->pagestat_featuretype.utf8_decode($row->pagestat_featurename)."\n");
+ ///
$row->varstat_featurecount
+ .
+ "\n" );//
.';'.$row->pagestat_featuretype.utf8_decode($row->pagestat_featurename)."\n");
}
return 0;
}
+
/**
*
*/
@@ -103,9 +129,10 @@
$this->dbw = wfGetDB( DB_MASTER );
$this->purge = $this->getOption( "purge", false );
$this->db = wfGetDB( DB_MASTER );
- $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, -1 ) );
+ $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, - 1 ) );
}
}
-$maintClass = "UpdateMath";
+$maintClass = "GenerateFeatureTable";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/GenerateWorkload.php b/maintenance/GenerateWorkload.php
index abaef0c..0445827 100644
--- a/maintenance/GenerateWorkload.php
+++ b/maintenance/GenerateWorkload.php
@@ -31,29 +31,38 @@
private $id = 0;
private $selectivity = PHP_INT_MAX;
+ /**
+ *
+ */
public function __construct() {
parent::__construct();
$this->mDescription = 'Generates a workload of sample queries.';
- $this->addOption( 'selectivity' , "Specifies the selectivity
for each individual equation", false, true, "S");
- $this->addOption ( 'lastId', "Specifies to start the ID counter
after the given id. For example '-l 1' would start with id 2.", false, true,
"l" );
- $this->addOption ( 'overwrite', "Overwrite existing draft
queries ", false, false, "o" );
+ $this->addOption( 'selectivity', "Specifies the selectivity for
each individual equation",
+ false, true, "S" );
+ $this->addOption( 'lastId',
+ "Specifies to start the ID counter after the given id.
For example '-l 1' would start with id 2.",
+ false, true, "l" );
+ $this->addOption( 'overwrite', "Overwrite existing draft
queries ", false, false, "o" );
}
/**
* @param ResultWrapper $row
+ *
* @return string
*/
- protected function generateIndexString( $row ){
- if ( mt_rand() <= $this->selectivity ){
- $q = MathQueryObject::newQueryFromEquationRow($row,
++$this->id );
- $q->saveToDatabase( $this->getOption("overwrite",
false) );
+ protected function generateIndexString( $row ) {
+ if ( mt_rand() <= $this->selectivity ) {
+ $q = MathQueryObject::newQueryFromEquationRow( $row, ++
$this->id );
+ $q->saveToDatabase( $this->getOption( "overwrite",
false ) );
$out = $q->exportTexDocument();
- if( $out == false ){
- echo 'problem with ' . var_export($q,true) .
"\n";
+ if ( $out == false ) {
+ echo 'problem with ' . var_export( $q, true ) .
"\n";
$out = '';
}
return $out;
- } else return '';
+ } else {
+ return '';
+ }
}
@@ -61,18 +70,17 @@
$i = 0;
$inc = $this->getArg( 1, 100 );
$this->id = $this->getOption( 'lastId', 0 );
- $sel = $this->getOption( "selectivity", .1 );
- $this->selectivity = (int) ($sel * mt_getrandmax()) ;
+ $sel = $this->getOption( "selectivity", .1 );
+ $this->selectivity = (int)( $sel * mt_getrandmax() );
$db = wfGetDB( DB_SLAVE );
echo "getting list of all equations from the database\n";
- $this->res = $db->select(
- array( 'mathindex' ),
- array( 'mathindex_revision_id', 'mathindex_anchor',
'mathindex_inputhash' ),
- true
- , __METHOD__
- ,array('LIMIT' => $this->getOption( 'limit',
(int) (100/$sel) ) ,
- 'ORDER BY' => 'mathindex_inputhash' )
- );
+ $this->res =
+ $db->select( array( 'mathindex' ),
+ array( 'mathindex_revision_id',
'mathindex_anchor', 'mathindex_inputhash' ), true,
+ __METHOD__, array(
+ 'LIMIT' => $this->getOption( 'limit',
(int)( 100 / $sel ) ),
+ 'ORDER BY' => 'mathindex_inputhash'
+ ) );
do {
$fn = $this->getArg( 0 ) . '/math' . sprintf( '%012d',
$i ) . '.tex';
$res = $this->wFile( $fn, $i, $inc );
@@ -82,5 +90,7 @@
echo( "done" );
}
}
+
$maintClass = "GenerateWorkload";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
\ No newline at end of file
diff --git a/maintenance/IndexBase.php b/maintenance/IndexBase.php
index e87bca9..9583f96 100644
--- a/maintenance/IndexBase.php
+++ b/maintenance/IndexBase.php
@@ -1,6 +1,6 @@
<?php
/**
- * Generates harvest files for the MathWebSearch Deamon.
+ * Generates harvest files for the MathWebSearch Daemon.
* Example: php CreateMathIndex.php ~/mws_harvest_files
*
* This program is free software; you can redistribute it and/or modify
@@ -28,6 +28,7 @@
*
*/
abstract class IndexBase extends Maintenance {
+ /** @type ResultWrapper */
protected $res;
/**
@@ -38,30 +39,35 @@
$this->mDescription = 'Exports data';
$this->addArg( 'dir', 'The directory where the harvest files go
to.' );
$this->addArg( 'ffmax', "The maximal number of formula per
file.", false );
- $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min", false );
- $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max", false );
- $this->addOption( 'limit', 'The maximal number of database
entries to be considered', false ,true , "L");
+ $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min",
+ false );
+ $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max",
+ false );
+ $this->addOption( 'limit', 'The maximal number of database
entries to be considered', false,
+ true, "L" );
}
/**
- * @param unknown $row
+ * @param stdClass $row
+ *
* @return string
*/
protected abstract function generateIndexString( $row );
/**
- * @param unknown $fn
- * @param unknown $min
- * @param unknown $inc
+ * @param string $fn
+ * @param int $min
+ * @param int $inc
+ *
* @return boolean
*/
protected function wFile( $fn, $min, $inc ) {
$out = $this->getHead();
$max = min( $min + $inc, $this->res->numRows() );
- for ( $i = $min; $i < $max; $i++ ) {
+ for ( $i = $min; $i < $max; $i ++ ) {
$this->res->seek( $i );
$out .= $this->generateIndexString(
$this->res->fetchObject() );
- restore_error_handler ( );
+ restore_error_handler();
}
$out .= "\n" . $this->getFooter();
$fh = fopen( $fn, 'w' );
@@ -70,10 +76,11 @@
fwrite( $fh, $out );
fclose( $fh );
echo "written file $fn with entries($min ... $max)\n";
- if ( $max < $this->res->numRows() -1 )
+ if ( $max < $this->res->numRows() - 1 ) {
return true;
- else
+ } else {
return false;
+ }
}
/**
@@ -85,17 +92,21 @@
$inc = $this->getArg( 1, 100 );
$db = wfGetDB( DB_SLAVE );
echo "getting list of all equations from the database\n";
- $this->res = $db->select(
- array( 'mathindex', 'mathlatexml' ),
- array( 'mathindex_revision_id', 'mathindex_anchor',
'math_mathml', 'math_inputhash', 'mathindex_inputhash' ),
- array( 'math_inputhash = mathindex_inputhash',
- 'mathindex_revision_id >= '. $this->getArg( 2,
0),
- 'mathindex_revision_id <= '. $this->getArg( 3,
PHP_INT_MAX))
- , __METHOD__
- ,array(
- 'LIMIT' => $this->getOption(
'limit', PHP_INT_MAX ) ,
- 'ORDER BY' => 'mathindex_revision_id' )
- );
+ $this->res =
+ $db->select( array( 'mathindex', 'mathlatexml' ), array(
+ 'mathindex_revision_id',
+ 'mathindex_anchor',
+ 'math_mathml',
+ 'math_inputhash',
+ 'mathindex_inputhash'
+ ), array(
+ 'math_inputhash = mathindex_inputhash',
+ 'mathindex_revision_id >= ' .
$this->getArg( 2, 0 ),
+ 'mathindex_revision_id <= ' .
$this->getArg( 3, PHP_INT_MAX )
+ ), __METHOD__, array(
+ 'LIMIT' => $this->getOption( 'limit',
PHP_INT_MAX ),
+ 'ORDER BY' => 'mathindex_revision_id'
+ ) );
echo "write " . $this->res->numRows() . " results to index\n";
do {
$fn = $this->getArg( 0 ) . '/math' . sprintf( '%012d',
$i ) . '.xml';
@@ -104,10 +115,18 @@
} while ( $res );
echo( "done" );
}
- protected function getHead(){
+
+ /**
+ * @return string
+ */
+ protected function getHead() {
return "";
}
- protected function getFooter(){
+
+ /**
+ * @return string
+ */
+ protected function getFooter() {
return "";
}
}
diff --git a/maintenance/MathMLFilter.php b/maintenance/MathMLFilter.php
index 9abf1c9..44e807c 100644
--- a/maintenance/MathMLFilter.php
+++ b/maintenance/MathMLFilter.php
@@ -20,17 +20,20 @@
*/
# Alert the user that this is not a valid entry point to MediaWiki if they try
to access the special pages file directly.
if ( !defined( 'MEDIAWIKI' ) ) {
- die( "This is not a valid entry point to MediaWiki.\n"
- . "To run the script use:\n"
- . 'php ../../../maintenance/dumpBackup.php --current
--plugin=MathMLFilter:./MathDump.php --filter=mathml'
- . "\n" );
+ die( "This is not a valid entry point to MediaWiki.\n" . "To run the
script use:\n" .
+ 'php ../../../maintenance/dumpBackup.php --current
--plugin=MathMLFilter:./MathDump.php --filter=mathml' .
+ "\n" );
}
/**
* Simple dump output filter to exclude all talk pages.
+ *
* @ingroup Dump
*/
class MathMLFilter extends DumpFilter {
+ /**
+ * @param $backupDumper
+ */
public static function register( $backupDumper ) {
$backupDumper->registerFilter( 'mathml', 'MathMLFilter' );
@@ -38,7 +41,9 @@
/**
* Callback function that replaces TeX by MathML
+ *
* @param array $match
+ *
* @return string
*/
private static function renderMath( $match ) {
@@ -51,19 +56,22 @@
}
/**
- * Replaces the math tags with rendered Mathml
- * @param unknown $pText
+ * Replaces the math tags with rendered MathML
+ *
+ * @param string $pText
+ *
* @return string
*/
private static function replaceMath( $pText ) {
$pText = Sanitizer::removeHTMLcomments( $pText );
- return preg_replace_callback(
"#<math>(.*?)</math>#s", 'self::renderMath', $pText );
+ return preg_replace_callback(
"#<math>(.*?)</math>#s", 'self::renderMath',
+ $pText );
}
/**
- * @param $rev
- * @param $string the revision text
+ * @param object $rev
+ * @param string $string the revision text
*/
function writeRevision( $rev, $string ) {
if ( $this->sendingThisPage ) {
diff --git a/maintenance/UpdateMath.php b/maintenance/UpdateMath.php
index e31926c..6f6e809 100644
--- a/maintenance/UpdateMath.php
+++ b/maintenance/UpdateMath.php
@@ -21,6 +21,9 @@
require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+/**
+ * Class UpdateMath
+ */
class UpdateMath extends Maintenance {
const RTI_CHUNK_SIZE = 100;
public $purge = false;
@@ -28,56 +31,75 @@
private $verbose;
/** @var DatabaseBase */
public $dbw = null;
- /** @var MathRenderer */
+ /** @var MathRenderer */
private $current;
private $time = 0;//microtime( true );
private $performance = array();
- private $renderingMode = MW_MATH_LATEXML;
+ private $renderingMode = MW_MATH_LATEXML;
/**
* @var DatabaseBase
*/
private $db;
+
/**
*
*/
public function __construct() {
- $this->verbose = $this->verbose;
parent::__construct();
$this->mDescription = 'Updates the index of Mathematical
formulae.';
- $this->addOption( 'purge', "If set all formulae are rendered
again without using caches. (Very time consuming!)", false, false, "f" );
- $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min", false );
- $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max", false );
- $this->addOption( 'verbose', "If set output for successful
rendering will produced",false,false,'v' );
+ $this->addOption( 'purge',
+ "If set all formulae are rendered again without using
caches. (Very time consuming!)",
+ false, false, "f" );
+ $this->addArg( 'min', "If set processing is started at the page
with rank(pageID)>min",
+ false );
+ $this->addArg( 'max', "If set processing is stopped at the page
with rank(pageID)<=max",
+ false );
+ $this->addOption( 'verbose', "If set output for successful
rendering will produced", false,
+ false, 'v' );
$this->addOption( 'SVG', "If set SVG images will be produced",
false, false );
$this->addOption( 'hoooks', "If set hooks will be skipped",
false, false );
$this->addOption( 'texvccheck', "If set texvccheck will be
skipped", false, false );
- $this->addOption( 'mode' , 'Rendering mode to be used (0 = PNG,
5= MathML, 7=MathML)',false,true,'m');
+ $this->addOption( 'mode', 'Rendering mode to be used (0 = PNG,
5= MathML, 7=MathML)', false,
+ true, 'm' );
}
- private function time($category='default'){
+
+ /**
+ * @param string $category
+ *
+ * @return int
+ */
+ private function time( $category = 'default' ) {
global $wgMathDebug;
- $delta = (microtime(true) - $this->time)*1000;
- if (isset ($this->performance[$category] ))
+ $delta = ( microtime( true ) - $this->time ) * 1000;
+ if ( isset ( $this->performance[$category] ) ) {
$this->performance[$category] += $delta;
- else
+ } else {
$this->performance[$category] = $delta;
- if($wgMathDebug){
- $this->db->insert('mathperformance',array(
+ }
+ if ( $wgMathDebug ) {
+ $this->db->insert( 'mathperformance', array(
'math_inputhash' =>
$this->current->getInputHash(),
- 'mathperformance_name' =>
substr($category,0,10),
+ 'mathperformance_name' => substr( $category, 0,
10 ),
'mathperformance_time' => $delta,
- 'mathperformance_mode' => $this->renderingMode
- ));
+ 'mathperformance_mode' => $this->renderingMode
+ ) );
}
- $this->time = microtime(true);
+ $this->time = microtime( true );
- return (int) $delta;
+ return (int)$delta;
}
+
/**
* Populates the search index with content from all pages
+ *
+ * @param int $n
+ * @param int $cmax
+ *
+ * @throws DBUnexpectedError
*/
- protected function populateSearchIndex( $n = 0, $cmax = -1 ) {
+ protected function populateSearchIndex( $n = 0, $cmax = - 1 ) {
$res = $this->db->select( 'page', 'MAX(page_id) AS count' );
$s = $this->db->fetchObject( $res );
$count = $s->count;
@@ -93,25 +115,28 @@
}
$end = $n + self::RTI_CHUNK_SIZE - 1;
- $res = $this->db->select( array( 'page', 'revision',
'text' ),
+ $res =
+ $this->db->select( array( 'page', 'revision',
'text' ),
array( 'page_id', 'page_namespace',
'page_title', 'old_flags', 'old_text' ),
- array( "page_id BETWEEN $n AND $end",
'page_latest = rev_id', 'rev_text_id = old_id' ),
- __METHOD__
- );
+ array(
+ "page_id BETWEEN $n AND $end",
+ 'page_latest = rev_id',
+ 'rev_text_id = old_id'
+ ), __METHOD__ );
$this->dbw->begin();
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$i = $n;
foreach ( $res as $s ) {
echo "\np$i:";
$revtext = Revision::getRevisionText( $s );
- $fcount += $this->doUpdate( $s->page_id,
$revtext, $s->page_title);
- $i++;
+ $fcount += $this->doUpdate( $s->page_id,
$revtext, $s->page_title );
+ $i ++;
}
// echo "before" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$start = microtime( true );
$this->dbw->commit();
- echo " committed in " . ( microtime( true ) -$start ) .
"s\n\n";
- var_export($this->performance);
+ echo " committed in " . ( microtime( true ) - $start )
. "s\n\n";
+ var_export( $this->performance );
// echo "after" +$this->dbw->selectField('mathindex',
'count(*)')."\n";
$n += self::RTI_CHUNK_SIZE;
}
@@ -119,14 +144,15 @@
}
/**
- * @param $pid
- * @param unknown $pText
+ * @param $pid
+ * @param string $pText
* @param string $pTitle
+ *
* @internal param unknown $pId
* @internal param string $purge
* @return number
*/
- private function doUpdate( $pid, $pText, $pTitle = "") {
+ private function doUpdate( $pid, $pText, $pTitle = "" ) {
$notused = '';
// TODO: fix link id problem
$anchorID = 0;
@@ -135,22 +161,23 @@
if ( $matches ) {
echo( "\t processing $matches math fields for {$pTitle}
page\n" );
foreach ( $math as $formula ) {
- $this->time = microtime(true);
- $renderer = MathRenderer::getRenderer(
$formula[1], $formula[2], $this->renderingMode );
+ $this->time = microtime( true );
+ $renderer =
+ MathRenderer::getRenderer( $formula[1],
$formula[2], $this->renderingMode );
$this->current = $renderer;
- $this->time("loadClass");
+ $this->time( "loadClass" );
if ( $this->getOption( "texvccheck", false ) ) {
$checked = true;
} else {
$checked = $renderer->checkTex();
- $this->time("checkTex");
+ $this->time( "checkTex" );
}
if ( $checked ) {
$renderer->render( $this->purge );
- if( $renderer->getMathml() ){
- $this->time("Rendering");
+ if ( $renderer->getMathml() ) {
+ $this->time( "Rendering" );
} else {
- $this->time("Failing");
+ $this->time( "Failing" );
}
if ( $this->getOption( "SVG", false ) )
{
$svg = $renderer->getSvg();
@@ -161,25 +188,27 @@
}
}
} else {
- $this->time("checkTex-Fail");
- echo "\nF:\t\t".$renderer->getMd5()."
texvccheck error:" . $renderer->getLastError();
+ $this->time( "checkTex-Fail" );
+ echo "\nF:\t\t" . $renderer->getMd5() .
" texvccheck error:" .
+ $renderer->getLastError();
continue;
}
- if ( ! $this->getOption( "hooks", false ) ) {
- wfRunHooks( 'MathFormulaRendered',
array( &$renderer, &$notused, $pid, $anchorID ) );
+ if ( !$this->getOption( "hooks", false ) ) {
+ wfRunHooks( 'MathFormulaRendered',
+ array( &$renderer, &$notused,
$pid, $anchorID ) );
$this->time( "hooks" );
- $anchorID++;
+ $anchorID ++;
}
- $renderer->writeCache($this->dbw);
- $this->time("write Cache");
+ $renderer->writeCache( $this->dbw );
+ $this->time( "write Cache" );
if ( $renderer->getLastError() ) {
- echo "\n\t\t".
$renderer->getLastError() ;
- echo "\nF:\t\t".$renderer->getMd5()."
equation " . ( $anchorID -1 ) .
- "-failed beginning with\n\t\t'"
. substr( $formula, 0, 100 )
- . "'\n\t\tmathml:" .
substr($renderer->getMathml(),0,10) ."\n ";
- } else{
- if($this->verbose){
- echo
"\nS:\t\t".$renderer->getMd5();
+ echo "\n\t\t" .
$renderer->getLastError();
+ echo "\nF:\t\t" . $renderer->getMd5() .
" equation " . ( $anchorID - 1 ) .
+ "-failed beginning
with\n\t\t'" . substr( $formula, 0, 100 ) .
+ "'\n\t\tmathml:" . substr(
$renderer->getMathml(), 0, 10 ) . "\n ";
+ } else {
+ if ( $this->verbose ) {
+ echo "\nS:\t\t" .
$renderer->getMd5();
}
}
}
@@ -187,6 +216,7 @@
}
return 0;
}
+
/**
*
*/
@@ -194,15 +224,16 @@
global $wgMathValidModes;
$this->dbw = wfGetDB( DB_MASTER );
$this->purge = $this->getOption( "purge", false );
- $this->verbose = $this->getOption("verbose",false);
- $this->renderingMode = $this->getOption( "mode" , 7);
+ $this->verbose = $this->getOption( "verbose", false );
+ $this->renderingMode = $this->getOption( "mode", 7 );
$this->db = wfGetDB( DB_MASTER );
$wgMathValidModes[] = $this->renderingMode;
$this->output( "Loaded.\n" );
$this->time = microtime( true );
- $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, -1 ) );
+ $this->populateSearchIndex( $this->getArg( 0, 0 ),
$this->getArg( 1, - 1 ) );
}
}
$maintClass = "UpdateMath";
+/** @noinspection PhpIncludeInspection */
require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/maintenance/batch.sh b/maintenance/batch.sh
index ece28dd..6cbce26 100644
--- a/maintenance/batch.sh
+++ b/maintenance/batch.sh
@@ -1,9 +1,9 @@
#!/bin/sh
i=0
-while [ $i -le 28 ]
+while [ ${i} -le 28 ]
do
- j=`expr $i + 1`
- echo $i
- php UpdateMath.php ${i}000 ${j}000 -f >$i&
- i=$j
+ j=`expr ${i} + 1`
+ echo ${i}
+ php UpdateMath.php ${i}000 ${j}000 -f >${i}&
+ i=${j}
done
\ No newline at end of file
diff --git a/maintenance/ibm_driver.sh b/maintenance/ibm_driver.sh
index 097860a..5cf8b2e 100755
--- a/maintenance/ibm_driver.sh
+++ b/maintenance/ibm_driver.sh
@@ -1,4 +1,4 @@
-#/bin/bash
+#!/bin/bash
echo "Warning: Experimental don't use in any kind of production environment."
echo "Make sure that you have downloaded and extracted the"
echo "Data Server Driver Package (dsdriver) to /vagrant/ibm/dsdriver"
@@ -6,7 +6,7 @@
echo "Are the drivers downloaded and extracted?"
select yn in "Yes" "No"
do
- case $yn in
+ case ${yn} in
Yes ) break;;
No ) exit;;
esac
--
To view, visit https://gerrit.wikimedia.org/r/186633
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I39cc729f79b7045e712b4079d8497d065dc5fdad
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits