jenkins-bot has submitted this change and it was merged.
Change subject: Add Special:ListDuplicatedFiles expensive query special page.
......................................................................
Add Special:ListDuplicatedFiles expensive query special page.
I saw some comments recently on commons suggesting that
better ways are needed to manage duplicate files (There are tools
for if a specific file is a duplicate, but no backlog of
outstanding duplicate files).
This seems like a fairly easy first step in that direction.
Wasn't sure if this should be an image gallery type
query page, or just a list. I think in this case a plain
list is more useful.
Change-Id: Ibe4b9da71ca6451ec4e6b0050feaf3ca70e1b888
---
M includes/AutoLoader.php
M includes/QueryPage.php
M includes/specialpage/SpecialPageFactory.php
A includes/specials/SpecialListDuplicatedFiles.php
M languages/messages/MessagesEn.php
M languages/messages/MessagesQqq.php
M maintenance/language/messages.inc
7 files changed, 131 insertions(+), 0 deletions(-)
Approvals:
Aaron Schulz: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php
index 9d764e1..1a364c9 100644
--- a/includes/AutoLoader.php
+++ b/includes/AutoLoader.php
@@ -953,6 +953,7 @@
'IPBlockForm' => 'includes/specials/SpecialBlock.php',
'LinkSearchPage' => 'includes/specials/SpecialLinkSearch.php',
'ListredirectsPage' => 'includes/specials/SpecialListredirects.php',
+ 'ListDuplicatedFilesPage' =>
'includes/specials/SpecialListDuplicatedFiles.php',
'LoginForm' => 'includes/specials/SpecialUserlogin.php',
'LonelyPagesPage' => 'includes/specials/SpecialLonelypages.php',
'LongPagesPage' => 'includes/specials/SpecialLongpages.php',
diff --git a/includes/QueryPage.php b/includes/QueryPage.php
index 69629e0..29bbd52 100644
--- a/includes/QueryPage.php
+++ b/includes/QueryPage.php
@@ -77,6 +77,7 @@
array( 'DeadendPagesPage', 'Deadendpages' ),
array( 'DoubleRedirectsPage', 'DoubleRedirects'
),
array( 'FileDuplicateSearchPage',
'FileDuplicateSearch' ),
+ array( 'ListDuplicatedFilesPage',
'ListDuplicatedFiles'),
array( 'LinkSearchPage', 'LinkSearch' ),
array( 'ListredirectsPage', 'Listredirects' ),
array( 'LonelyPagesPage', 'Lonelypages' ),
diff --git a/includes/specialpage/SpecialPageFactory.php
b/includes/specialpage/SpecialPageFactory.php
index c6735e6..654e7ea 100644
--- a/includes/specialpage/SpecialPageFactory.php
+++ b/includes/specialpage/SpecialPageFactory.php
@@ -117,6 +117,7 @@
'FileDuplicateSearch' => 'FileDuplicateSearchPage',
'Upload' => 'SpecialUpload',
'UploadStash' => 'SpecialUploadStash',
+ 'ListDuplicatedFiles' => 'ListDuplicatedFilesPage',
// Data and tools
'Statistics' => 'SpecialStatistics',
diff --git a/includes/specials/SpecialListDuplicatedFiles.php
b/includes/specials/SpecialListDuplicatedFiles.php
new file mode 100644
index 0000000..9401fca
--- /dev/null
+++ b/includes/specials/SpecialListDuplicatedFiles.php
@@ -0,0 +1,112 @@
+<?php
+/**
+ * Implements Special:ListDuplicatedFiles
+ *
+ * Copyright © 2013 Brian Wolff
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup SpecialPage
+ * @author Brian Wolff
+ */
+
+/**
+ * Special:ListDuplicatedFiles Lists all files where the current version is
+ * a duplicate of the current version of some other file.
+ * @ingroup SpecialPage
+ */
+class ListDuplicatedFilesPage extends QueryPage {
+ function __construct( $name = 'ListDuplicatedFiles' ) {
+ parent::__construct( $name );
+ }
+
+ function isExpensive() {
+ return true;
+ }
+
+ function isSyndicated() {
+ return false;
+ }
+
+ /**
+ * Get all the duplicates by grouping on sha1s.
+ *
+ * A cheaper (but less useful) version of this
+ * query would be to not care how many duplicates a
+ * particular file has, and do a self-join on image table.
+ * However this version should be no more expensive then
+ * Special:MostLinked, which seems to get handled fine
+ * with however we are doing cached special pages.
+ */
+ function getQueryInfo() {
+ return array(
+ 'tables' => array( 'image' ),
+ 'fields' => array(
+ 'namespace' => NS_FILE,
+ 'title' => 'MIN(img_name)',
+ 'value' => 'count(*)'
+ ),
+ 'options' => array(
+ 'GROUP BY' => 'img_sha1',
+ 'HAVING' => 'count(*) > 1',
+ ),
+ );
+ }
+
+ /**
+ * Pre-fill the link cache
+ *
+ * @param DatabaseBase $db
+ * @param ResultWrapper $res
+ */
+ function preprocessResults( $db, $res ) {
+ if ( $res->numRows() > 0 ) {
+ $linkBatch = new LinkBatch();
+
+ foreach ( $res as $row ) {
+ $linkBatch->add( $row->namespace, $row->title );
+ }
+
+ $res->seek( 0 );
+ $linkBatch->execute();
+ }
+ }
+
+
+ /**
+ * @param Skin $skin
+ * @param object $result Result row
+ * @return string
+ */
+ function formatResult( $skin, $result ) {
+ // Future version might include a list of the first 5 duplicates
+ // perhaps separated by an "↔".
+ $image1 = Title::makeTitle( $result->namespace, $result->title
);
+ $dupeSearch = SpecialPage::getTitleFor( 'FileDuplicateSearch',
$image1->getDBKey() );
+
+ $msg = wfMessage( 'listduplicatedfiles-entry' )
+ ->params( $image1->getText() )
+ ->numParams( $result->value - 1 )
+ ->params( $dupeSearch->getPrefixedDBKey() );
+
+ return $msg->parse();
+ }
+
+ protected function getGroupName() {
+ return 'media';
+ }
+}
diff --git a/languages/messages/MessagesEn.php
b/languages/messages/MessagesEn.php
index 222333b..192ef8d 100644
--- a/languages/messages/MessagesEn.php
+++ b/languages/messages/MessagesEn.php
@@ -423,6 +423,7 @@
'Listfiles' => array( 'ListFiles', 'FileList',
'ImageList' ),
'Listgrouprights' => array( 'ListGroupRights',
'UserGroupRights' ),
'Listredirects' => array( 'ListRedirects' ),
+ 'ListDuplicatedFiles' => array( 'ListDuplicatedFiles',
'ListFileDuplicates' ),
'Listusers' => array( 'ListUsers', 'UserList' ),
'Lockdb' => array( 'LockDB' ),
'Log' => array( 'Log', 'Logs' ),
@@ -2641,6 +2642,10 @@
'listredirects' => 'List of redirects',
'listredirects-summary' => '', # do not translate or duplicate this message to
other languages
+'listduplicatedfiles' => 'List of files with duplicates',
+'listduplicatedfiles-summary' => 'This is a list of files where the most
recent version of the file is a duplicate of the most recent version of some
other file. Only local files are considered.',
+'listduplicatedfiles-entry' => '[[:File:$1|$1]] has [[$3|{{PLURAL:$2|a
duplicate|$2 duplicates}}]].',
+
# Unused templates
'unusedtemplates' => 'Unused templates',
'unusedtemplates-summary' => '', # do not translate or duplicate this message
to other languages
diff --git a/languages/messages/MessagesQqq.php
b/languages/messages/MessagesQqq.php
index 83383c1..520f6c6 100644
--- a/languages/messages/MessagesQqq.php
+++ b/languages/messages/MessagesQqq.php
@@ -4895,6 +4895,11 @@
# List redirects
'listredirects' => '{{doc-special|ListRedirects}}',
+# List duplicates
+'listduplicatedfiles' => '{{doc-special|ListDuplicatedFiles}}',
+'listduplicatedfiles-summary' => 'Summary at top of
Special:ListDuplicatedFiles',
+'listduplicatedfiles-entry' => 'A list item on Special:ListDuplicatedFiles. $1
is the file name (no namespace prefix). $2 is the number of duplicates this
file has. $3 is the name of the duplicate search page (aka
"Special:FileDuplicateSearch/Foo.png" or
"Spécial:Recherche_fichier_en_double/Firefox.png")',
+
# Unused templates
'unusedtemplates' => '{{doc-special|UnusedTemplates}}',
'unusedtemplatestext' => 'Shown on top of [[Special:Unusedtemplates]]',
diff --git a/maintenance/language/messages.inc
b/maintenance/language/messages.inc
index 746c7d7..6b5506e 100644
--- a/maintenance/language/messages.inc
+++ b/maintenance/language/messages.inc
@@ -1690,6 +1690,11 @@
'listredirects',
'listredirects-summary',
),
+ 'listduplicatedfiles' => array(
+ 'listduplicatedfiles',
+ 'listduplicatedfiles-summary',
+ 'listduplicatedfiles-entry',
+ ),
'unusedtemplates' => array(
'unusedtemplates',
'unusedtemplates-summary',
@@ -4103,6 +4108,7 @@
'filedelete' => 'File deletion',
'mimesearch' => 'MIME search',
'unwatchedpages' => 'Unwatched pages',
+ 'listduplicatedfiles' => 'List duplicated files special page',
'listredirects' => 'List redirects',
'unusedtemplates' => 'Unused templates',
'randompage' => 'Random page',
--
To view, visit https://gerrit.wikimedia.org/r/85446
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibe4b9da71ca6451ec4e6b0050feaf3ca70e1b888
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Brian Wolff <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: Brion VIBBER <[email protected]>
Gerrit-Reviewer: Gergő Tisza <[email protected]>
Gerrit-Reviewer: IAlex <[email protected]>
Gerrit-Reviewer: MarkTraceur <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: Reedy <[email protected]>
Gerrit-Reviewer: Rillke <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits