jenkins-bot has submitted this change and it was merged.
Change subject: Replace pageIds by revisionIds
......................................................................
Replace pageIds by revisionIds
Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142
---
M MathObject.php
M MathSearch.hooks.php
M db/mathobservation.sql
D db/mathpagestat.sql
A db/mathrevisionstat.sql
M db/snippets/CosProd.sql
M db/snippets/getNorm.sql
M maintenance/CalculateDistances.php
M maintenance/ExtractFeatures.php
M maintenance/GenerateFeatureTable.php
10 files changed, 51 insertions(+), 46 deletions(-)
Approvals:
Physikerwelt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/MathObject.php b/MathObject.php
index f00f7f5..bc2670c 100644
--- a/MathObject.php
+++ b/MathObject.php
@@ -104,14 +104,14 @@
global $wgOut;
$dbr = wfGetDB( DB_SLAVE );
try {
- $res = $dbr->select( array( "mathobservation",
"mathvarstat", 'mathpagestat' )
+ $res = $dbr->select( array( "mathobservation",
"mathvarstat", 'mathrevisionstat' )
, array( "mathobservation_featurename",
"mathobservation_featuretype", 'varstat_featurecount',
- 'pagestat_featurecount', "count(*) as
localcnt" ),
+ 'revstat_featurecount', "count(*) as
localcnt" ),
array( "mathobservation_inputhash" =>
$this->getInputHash(),
'varstat_featurename =
mathobservation_featurename',
'varstat_featuretype =
mathobservation_featuretype',
- 'pagestat_pageid' =>
$this->getRevisionID(),
- 'pagestat_featureid = varstat_id'
+ 'revstat_revid' =>
$this->getRevisionID(),
+ 'revstat_featureid = varstat_id'
)
, __METHOD__, array( 'GROUP BY' =>
'mathobservation_featurename',
'ORDER BY' => 'varstat_featurecount' )
@@ -180,12 +180,15 @@
$dbgiven = true;
}
$dbw->delete( "mathobservation", array(
"mathobservation_inputhash" => $this->getInputHash() ) );
+ wfDebugLog('MathSearch', 'delete obervations for
'.bin2hex($this->getInputHash()));
foreach ( $rule as $feature ) {
$dbw->insert( "mathobservation", array(
"mathobservation_inputhash" =>
$this->getInputHash(),
"mathobservation_featurename" => utf8_encode(
trim( $feature[ 4 ] ) ),
"mathobservation_featuretype" => utf8_encode(
$feature[ 1 ] ),
) );
+ wfDebugLog('MathSearch', 'insert observation for
'.bin2hex($this->getInputHash())
+ . utf8_encode( trim( $feature[ 4 ] ) ));
}
if ( !$dbgiven ) {
$dbw->commit();
@@ -339,8 +342,8 @@
. "JOIN mathindex ON `mathobservation_inputhash` =
mathindex_inputhash\n"
. "GROUP BY `mathobservation_featurename` ,
`mathobservation_featuretype`\n"
. "ORDER BY CNT DESC");
- $dbw->query( 'TRUNCATE TABLE `mathpagestat`' );
- $dbw->query( 'INSERT INTO
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`) '
+ $dbw->query( 'TRUNCATE TABLE `mathrevisionstat`' );
+ $dbw->query( 'INSERT INTO
`mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`) '
. 'SELECT varstat_id, mathindex_revision_id, count(*)
AS CNT FROM `mathobservation` '
. 'JOIN mathindex ON `mathobservation_inputhash` =
mathindex_inputhash '
. 'JOIN mathvarstat ON varstat_featurename =
`mathobservation_featurename` AND varstat_featuretype =
`mathobservation_featuretype` '
diff --git a/MathSearch.hooks.php b/MathSearch.hooks.php
index 559a91a..169c1c2 100644
--- a/MathSearch.hooks.php
+++ b/MathSearch.hooks.php
@@ -26,7 +26,7 @@
$updater->addExtensionTable( 'mathindex', $dir .
'mathindex.sql' );
$updater->addExtensionTable( 'mathobservation', $dir .
'mathobservation.sql' );
$updater->addExtensionTable( 'mathvarstat', $dir .
'mathvarstat.sql' );
- $updater->addExtensionTable( 'mathpagestat', $dir .
'mathpagestat.sql' );
+ $updater->addExtensionTable( 'mathrevisionstat', $dir .
'mathrevisionstat.sql' );
$updater->addExtensionTable( 'mathsemantics', $dir .
'mathsemantics.sql' );
$updater->addExtensionTable( 'mathperformance', $dir .
'mathperformance.sql' );
$updater->addExtensionTable( 'mathidentifier', $dir .
'mathidentifier.sql' );
@@ -85,7 +85,8 @@
'mathindex_inputhash' => $inputHash)
) ;
if ( $exists ) {
- wfDebugLog( "MathSearch", 'Index $' . $tex . '$
already in database.' );
+ wfDebugLog( 'MathSearch', 'Index $' . $tex . '$
already in database.');
+ wfDebugLog( 'MathSearch', "$revId-$eid with
hash ". bin2hex($inputHash) );
} else {
self::writeMathIndex( $revId, $eid, $inputHash,
$tex );
}
diff --git a/db/mathobservation.sql b/db/mathobservation.sql
index 21db97e..3be2af2 100644
--- a/db/mathobservation.sql
+++ b/db/mathobservation.sql
@@ -7,10 +7,10 @@
-- Binary MD5 hash of the latex fragment, used as an identifier key.
mathobservation_inputhash varbinary(16) NOT NULL,
- --Type of the feature e.g. mo, mi
+ -- Type of the feature e.g. mo, mi
mathobservation_featuretype varchar(10) NOT NULL,
- --Name of the feature. eg name of the variable
+ -- Name of the feature. eg name of the variable
mathobservation_featurename varchar(10) NOT NULL,
-- Timestamp. Is set by the database autmatically
diff --git a/db/mathpagestat.sql b/db/mathpagestat.sql
deleted file mode 100644
index d54e779..0000000
--- a/db/mathpagestat.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- Used by the math search module to analyse the variables in the equations.
---
-CREATE TABLE /*_*/mathpagestat (
- pagestat_pageid int(10) NOT NULL,
- pagestat_featureid int(6) NOT NULL,
- pagestat_featurecount int(11) NOT NULL,
- PRIMARY KEY (pagestat_pageid,pagestat_featureid),
- KEY `pagestat_pageid` (`pagestat_pageid`)
-) /*$wgDBTableOptions*/;
-
diff --git a/db/mathrevisionstat.sql b/db/mathrevisionstat.sql
new file mode 100644
index 0000000..33574eb
--- /dev/null
+++ b/db/mathrevisionstat.sql
@@ -0,0 +1,12 @@
+--
+-- Used by the math search module to analyse the variables in the equations.
+--
+CREATE TABLE /*_*/mathrevisionstat (
+ revstat_revid int(10) NOT NULL,
+ revstat_featureid int(6) NOT NULL,
+ revstat_featurecount int(11) NOT NULL,
+ PRIMARY KEY (revstat_revid,revstat_featureid),
+ FOREIGN KEY `revision` ( revstat_revid ) REFERENCES revision( rev_id ),
+ FOREIGN KEY `featureID` ( revstat_featureid ) REFERENCES mathvarstat (
varstat_featurename )
+) /*$wgDBTableOptions*/;
+
diff --git a/db/snippets/CosProd.sql b/db/snippets/CosProd.sql
index b9add42..fa52f33 100644
--- a/db/snippets/CosProd.sql
+++ b/db/snippets/CosProd.sql
@@ -8,10 +8,10 @@
return (
SELECT SUM(CAST(LOG( a.`pagestat_featurecount`)*
LOG(b.`pagestat_featurecount`) as DECIMAL(20,10))
/(LOG(varstat_featurecount)*LOG(varstat_featurecount))
)/(getNorm(IDA)* getNorm(IDB))
- from mathpagestat as a, mathpagestat as b, mathvarstat as s
- WHERE (b.`pagestat_pageid`= IDA and a.`pagestat_pageid`=IDB
- and a.`pagestat_featureid`=b.`pagestat_featureid`
- and a.`pagestat_featureid`=s.varstat_id)
+ from mathrevisionstat as a, mathrevisionstat as b, mathvarstat as s
+ WHERE (b.revstat_revid= IDA and a.revstat_revid=IDB
+ and a.revstat_featureid=b.revstat_featureid
+ and a.revstat_featureid=s.varstat_id)
);
END$$
diff --git a/db/snippets/getNorm.sql b/db/snippets/getNorm.sql
index b0581c2..3b0120c 100644
--- a/db/snippets/getNorm.sql
+++ b/db/snippets/getNorm.sql
@@ -7,9 +7,9 @@
DECLARE output DECIMAL(20,10);
SELECT SUM(POW(LOG(CAST(`pagestat_featurecount`as
decimal(20,10)))/LOG(varstat_featurecount),2)) as norm
INTO output
-FROM `mathpagestat`
-JOIN mathvarstat on `pagestat_featureid` = varstat_id
-WHERE `pagestat_pageid` =pid order by norm desc;
+FROM mathrevisionstat
+JOIN mathvarstat on revstat_featureid = varstat_id
+WHERE revstat_revid =pid order by norm desc;
return POW(output,1/2);
END$$
diff --git a/maintenance/CalculateDistances.php
b/maintenance/CalculateDistances.php
index d377aa8..87afb42 100644
--- a/maintenance/CalculateDistances.php
+++ b/maintenance/CalculateDistances.php
@@ -56,18 +56,18 @@
$this->pagelist = array();
$min = $this->getArg( 0, 0 );
$max = $this->getArg( 1, PHP_INT_MAX );
- $conds = "pagestat_pageid >= $min";
+ $conds = "revstat_revid >= $min";
if ( $max < PHP_INT_MAX ) {
- $conds .= " AND pagestat_pageid <= $max";
+ $conds .= " AND revstat_revid <= $max";
}
if ( $this->getOption( 'page9', false ) ) {
$res =
- $this->db->select( array( 'mathpage9',
'mathpagestat' ),
- array( 'page_id', 'pagestat_pageid' ),
- $conds . ' AND pagestat_pageid =
page_id', __METHOD__, array( 'DISTINCT' ) );
+ $this->db->select( array( 'mathpage9',
'mathrevisionstat' ),
+ array( 'page_id', 'revstat_revid' ),
+ $conds . ' AND revstat_revid =
page_id', __METHOD__, array( 'DISTINCT' ) );
} else {
$res =
- $this->db->select( 'mathpagestat',
'pagestat_pageid', $conds, __METHOD__,
+ $this->db->select( 'mathrevisionstat',
'revstat_revid', $conds, __METHOD__,
array( 'DISTINCT' ) );
}
foreach ( $res as $row ) {
@@ -94,14 +94,14 @@
$pid = $this->pagelist[$n];
$sql =
"INSERT IGNORE INTO
mathpagesimilarity(pagesimilarity_A,pagesimilarity_B,pagesimilarity_Value)\n" .
- "SELECT DISTINCT
$pid,`pagestat_pageid`,\n" .
- "CosProd( $pid,`pagestat_pageid`) FROM
`mathpagestat` m ";
+ "SELECT DISTINCT
$pid,`revstat_revid`,\n" .
+ "CosProd( $pid,`revstat_revid`) FROM
`mathrevisionstat` m ";
if ( $this->getOption( 'page9', false ) ) {
- $sql .= " JOIN (SELECT page_id from
mathpage9) as r WHERE m.pagestat_pageid=r.page_id AND ";
+ $sql .= " JOIN (SELECT page_id from
mathpage9) as r WHERE m.revstat_revid=r.page_id AND ";
} else {
$sql .= " WHERE ";
}
- $sql .= "m.pagestat_pageid < $pid ";
+ $sql .= "m.revstat_revid < $pid ";
echo "writing entries for page $pid...";
$start = microtime( true );
$this->dbw->query( $sql );
diff --git a/maintenance/ExtractFeatures.php b/maintenance/ExtractFeatures.php
index 1e55e97..ffe12f7 100644
--- a/maintenance/ExtractFeatures.php
+++ b/maintenance/ExtractFeatures.php
@@ -106,12 +106,12 @@
'GROUP BY `mathobservation_featurename` ,
`mathobservation_featuretype`\n' .
'ORDER BY CNT DESC';
$this->dbw->query( $sql );
- $this->output( "Clear mathpagestat\n" );
- $sql = 'TRUNCATE TABLE `mathpagestat`';
+ $this->output( "Clear mathrevisionstat\n" );
+ $sql = 'TRUNCATE TABLE `mathrevisionstat`';
$this->dbw->query( $sql );
- $this->output( "Generate mathpagestat\n" );
+ $this->output( "Generate mathrevisionstat\n" );
$sql =
- 'INSERT INTO
`mathpagestat`(`pagestat_featureid`,`pagestat_pageid`,`pagestat_featurecount`)\n'
.
+ 'INSERT INTO
`mathrevisionstat`(`revstat_featureid`,`revstat_revid`,`revstat_featurecount`)\n'
.
'SELECT varstat_id, mathindex_revision_id, count(*) AS
CNT FROM `mathobservation` JOIN mathindex ON `mathobservation_inputhash`
=mathindex_inputhash\n' .
'JOIN mathvarstat ON varstat_featurename =
`mathobservation_featurename` AND varstat_featuretype =
`mathobservation_featuretype`\n' .
' GROUP BY `mathobservation_featurename`,
`mathobservation_featuretype`,mathindex_revision_id ORDER BY CNT DESC';
diff --git a/maintenance/GenerateFeatureTable.php
b/maintenance/GenerateFeatureTable.php
index c719afb..035f2f5 100644
--- a/maintenance/GenerateFeatureTable.php
+++ b/maintenance/GenerateFeatureTable.php
@@ -92,15 +92,15 @@
private function doUpdate( $pid ) {
// TODO: fix link id problem
$res =
- $this->db->select( array( 'mathpagestat', 'mathvarstat'
), array(
- 'pagestat_pageid',
+ $this->db->select( array( 'mathrevisionstat',
'mathvarstat' ), array(
+ 'revstat_revid',
'pagestat_featurename',
'pagestat_featuretype',
- 'pagestat_featurecount',
+ 'revstat_featurecount',
'varstat_id',
'varstat_featurecount'
), array(
- 'pagestat_pageid' => $pid,
+ 'revstat_revid' => $pid,
'pagestat_featurename =
varstat_featurename',
'pagestat_featuretype=varstat_featuretype'
), __METHOD__ );
--
To view, visit https://gerrit.wikimedia.org/r/193101
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1515abf2a01ec0153bb67cf00ec72cc56ca82142
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MathSearch
Gerrit-Branch: master
Gerrit-Owner: Physikerwelt <[email protected]>
Gerrit-Reviewer: Physikerwelt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits