Tim Starling has uploaded a new change for review.
https://gerrit.wikimedia.org/r/190416
Change subject: In namespaceDupes.php, fix link tables
......................................................................
In namespaceDupes.php, fix link tables
* Fix link destinations where a link was previously made to the
pseudo-namespace. Don't do this for --source-pseudo-namespace since it
only makes sense when the PDBK is essentially unchanged.
* Update pl_from_namespace, il_from_namespace and tl_from_namespace when
moving a page.
* Run LinksDeletionUpdate::doUpdate() when deleting a page, so that
referential integrity is preserved.
Change-Id: I584ead93d6267d1a2928ecbcdf8a4cd8e5aeef94
---
M maintenance/namespaceDupes.php
1 file changed, 153 insertions(+), 23 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/16/190416/1
diff --git a/maintenance/namespaceDupes.php b/maintenance/namespaceDupes.php
index 96e01fe..7600adb 100644
--- a/maintenance/namespaceDupes.php
+++ b/maintenance/namespaceDupes.php
@@ -39,8 +39,11 @@
*/
protected $db;
- private $resolvableCount = 0;
+ private $resolvablePages = 0;
private $totalPages = 0;
+
+ private $resolvableLinks = 0;
+ private $totalLinks = 0;
public function __construct() {
parent::__construct();
@@ -172,7 +175,33 @@
}
$this->output( "{$this->totalPages} pages to fix, " .
- "{$this->resolvableCount} were resolvable.\n" );
+ "{$this->resolvablePages} were resolvable.\n\n" );
+
+ foreach ( $spaces as $name => $ns ) {
+ if ( $ns != 0 ) {
+ // Fix up link destinations for non-interwiki
links only.
+ //
+ // For example if a page has [[Foo:Bar]] and
then a Foo namespace
+ // is introduced, pagelinks needs to be updated
to have
+ // page_namespace = NS_FOO.
+ //
+ // If instead an interwiki prefix was
introduced called "Foo",
+ // the link should instead be moved to the
iwlinks table. If a new
+ // language is introduced called "Foo", or if
there is a pagelink
+ // [[fr:Bar]] when interlanguage magic links
are turned on, the
+ // link would have to be moved to the langlinks
table. Let's put
+ // those cases in the too-hard basket for now.
The consequences are
+ // not especially severe.
+ //
+ // @fixme Handle interwiki links.
+
+ $this->checkLinkTable( 'pagelinks', 'pl', $ns,
$name, $options );
+ $this->checkLinkTable( 'templatelinks', 'tl',
$ns, $name, $options );
+ }
+ }
+
+ $this->output( "{$this->totalLinks} links to fix, " .
+ "{$this->resolvableLinks} were resolvable.\n" );
return $ok;
}
@@ -215,7 +244,8 @@
// Find the new title and determine the action to take
- $newTitle = $this->getDestinationTitle( $ns, $name,
$row, $options );
+ $newTitle = $this->getDestinationTitle( $ns, $name,
+ $row->page_namespace, $row->page_title,
$options );
$logStatus = false;
if ( !$newTitle ) {
$logStatus = 'invalid title';
@@ -271,24 +301,96 @@
$newTitle->getPrefixedDBkey() .
" (merge)$dryRunNote\n" );
if ( $options['fix'] ) {
- $pageOK = $this->mergePage(
$row->page_id, $newTitle );
+ $pageOK = $this->mergePage(
$row, $newTitle );
}
break;
}
if ( $pageOK ) {
- $this->resolvableCount++;
+ $this->resolvablePages++;
} else {
$ok = false;
}
}
- // @fixme Also needs to do like self::getTargetList() on the
- // *_namespace and *_title fields of pagelinks, templatelinks,
and
- // redirects, and schedule a LinksUpdate job or similar for
each found
- // *_from.
-
return $ok;
+ }
+
+ /**
+ * Check and repair the destination fields in a link table
+ * @param string $table The link table name
+ * @param string $fieldPrefix The field prefix in the link table
+ * @param int $ns Destination namespace id
+ * @param string $name
+ * @param array $options Associative array of validated command-line
options
+ */
+ private function checkLinkTable( $table, $fieldPrefix, $ns, $name,
$options ) {
+ $batchConds = array();
+ $fromField = "{$fieldPrefix}_from";
+ $namespaceField = "{$fieldPrefix}_namespace";
+ $titleField = "{$fieldPrefix}_title";
+ $batchSize = 500;
+ while ( true ) {
+ $res = $this->db->select(
+ $table,
+ array( $fromField, $namespaceField, $titleField
),
+ array_merge( $batchConds, array(
+ $namespaceField => 0,
+ $titleField . $this->db->buildLike(
"$name:", $this->db->anyString() )
+ ) ),
+ __METHOD__,
+ array(
+ 'ORDER BY' => array( $titleField,
$fromField ),
+ 'LIMIT' => $batchSize
+ )
+ );
+
+ if ( $res->numRows() == 0 ) {
+ break;
+ }
+ foreach ( $res as $row ) {
+ $logTitle = "from={$row->$fromField}
ns={$row->$namespaceField} " .
+ "dbk={$row->$titleField}";
+ $destTitle = $this->getDestinationTitle( $ns,
$name,
+ $row->$namespaceField,
$row->$titleField, $options );
+ $this->totalLinks++;
+ if ( !$destTitle ) {
+ $this->output( "$table $logTitle ***
INVALID\n" );
+ continue;
+ }
+ $this->resolvableLinks++;
+ if ( !$options['fix'] ) {
+ $this->output( "$table $logTitle -> " .
+ $destTitle->getPrefixedDBkey()
. " DRY RUN\n" );
+ continue;
+ }
+
+ $this->db->update( $table,
+ // SET
+ array(
+ $namespaceField =>
$destTitle->getNamespace(),
+ $titleField =>
$destTitle->getDBkey()
+ ),
+ // WHERE
+ array(
+ $namespaceField => 0,
+ $titleField =>
$row->$titleField,
+ $fromField => $row->$fromField
+ ),
+ __METHOD__
+ );
+ $this->output( "$table $logTitle -> " .
+ $destTitle->getPrefixedDBkey() . "\n" );
+ }
+ $encLastTitle = $this->db->addQuotes( $row->$titleField
);
+ $encLastFrom = $this->db->addQuotes( $row->$fromField );
+
+ $batchConds = array(
+ "$titleField > $encLastTitle " .
+ "OR ($titleField = $encLastTitle AND $fromField
> $encLastFrom)" );
+
+ wfWaitForSlaves();
+ }
}
/**
@@ -338,21 +440,22 @@
}
/**
- * Get the preferred destination title for a given target page row.
+ * Get the preferred destination title for a given target page.
* @param integer $ns The destination namespace ID
* @param string $name The conflicting prefix
- * @param stdClass $row
+ * @param integer $sourceNs The source namespace
+ * @param integer $sourceDbk The source DB key (i.e. page_title)
* @param array $options Associative array of validated command-line
options
* @return Title|false
*/
- private function getDestinationTitle( $ns, $name, $row, $options ) {
- $dbk = substr( $row->page_title, strlen( "$name:" ) );
+ private function getDestinationTitle( $ns, $name, $sourceNs,
$sourceDbk, $options ) {
+ $dbk = substr( $sourceDbk, strlen( "$name:" ) );
if ( $ns == 0 ) {
// An interwiki; try an alternate encoding with '-' for
':'
$dbk = "$name-" . $dbk;
}
$destNS = $ns;
- if ( $row->page_namespace == NS_TALK && MWNamespace::isSubject(
$ns ) ) {
+ if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
// This is an associated talk page moved with the
--move-talk feature.
$destNS = MWNamespace::getTalk( $destNS );
}
@@ -392,8 +495,6 @@
/**
* Move a page
*
- * @fixme Update pl_from_namespace etc.
- *
* @param integer $id The page_id
* @param Title $newTitle The new title
* @return bool
@@ -409,8 +510,20 @@
),
__METHOD__ );
- // @fixme Needs updating the *_from_namespace fields in
categorylinks,
- // pagelinks, templatelinks and imagelinks.
+ // Update *_from_namespace in links tables
+ $fromNamespaceTables = array(
+ array( 'pagelinks', 'pl' ),
+ array( 'templatelinks', 'tl' ),
+ array( 'imagelinks', 'il' ) );
+ foreach ( $fromNamespaceTables as $tableInfo ) {
+ list( $table, $fieldPrefix ) = $tableInfo;
+ $this->db->update( $table,
+ // SET
+ array( "{$fieldPrefix}_from_namespace" =>
$newTitle->getNamespace() ),
+ // WHERE
+ array( "{$fieldPrefix}_from" => $id ),
+ __METHOD__ );
+ }
return true;
}
@@ -444,7 +557,17 @@
* @param integer $id The page_id
* @param Title $newTitle The new title
*/
- private function mergePage( $id, Title $newTitle ) {
+ private function mergePage( $row, Title $newTitle ) {
+ $id = $row->page_id;
+
+ // Construct the WikiPage object we will need later, while the
+ // page_id still exists. Note that this cannot use
makeTitleSafe(),
+ // we are deliberately constructing an invalid title.
+ $sourceTitle = Title::makeTitle( $row->page_namespace,
$row->page_title );
+ $sourceTitle->resetArticleID( $id );
+ $wikiPage = new WikiPage( $sourceTitle );
+ $wikiPage->loadPageData( 'fromdbmaster' );
+
$destId = $newTitle->getArticleId();
$this->db->begin( __METHOD__ );
$this->db->update( 'revision',
@@ -456,10 +579,17 @@
$this->db->delete( 'page', array( 'page_id' => $id ),
__METHOD__ );
- // @fixme Need WikiPage::doDeleteUpdates() or similar to avoid
orphan
- // rows in the links tables.
+ // Call LinksDeletionUpdate to delete outgoing links from the
old title,
+ // and update category counts.
+ //
+ // Calling external code with a fake broken Title is a fairly
dubious
+ // idea. It's necessary because it's quite a lot of code to
duplicate,
+ // but that also makes it fragile since it would be easy for
someone to
+ // accidentally introduce an assumption of title validity to
the code we
+ // are calling.
+ $update = new LinksDeletionUpdate( $wikiPage );
+ $update->doUpdate();
- $this->db->commit( __METHOD__ );
return true;
}
}
--
To view, visit https://gerrit.wikimedia.org/r/190416
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I584ead93d6267d1a2928ecbcdf8a4cd8e5aeef94
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits