Polybuildr has uploaded a new change for review.
https://gerrit.wikimedia.org/r/214303
Change subject: [WIP] Initial work on SmiteSpam extension
......................................................................
[WIP] Initial work on SmiteSpam extension
Change-Id: Iac3a53277a81eea5b13a73cce7b40eba04829b58
---
M SmiteSpam.php
M SpecialSmiteSpam.php
A autoload.php
A checks/ExternalLinksCheck.php
A checks/RepeatedExternalLinksCheck.php
A generate-autoloads.php
M i18n/en.json
M i18n/qqq.json
A includes/SpamSmiter.php
9 files changed, 147 insertions(+), 6 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/SmiteSpam
refs/changes/03/214303/1
diff --git a/SmiteSpam.php b/SmiteSpam.php
index 6ea274d..a4390fa 100644
--- a/SmiteSpam.php
+++ b/SmiteSpam.php
@@ -6,7 +6,8 @@
$ssRoot = __DIR__;
-$wgAutoloadClasses['SpecialSmiteSpam'] = "$ssRoot/SpecialSmiteSpam.php";
+require_once "$ssRoot/autoload.php";
+
$wgMessagesDirs['SmiteSpam'] = "$ssRoot/i18n";
$wgExtensionMessagesFiles['SmiteSpamAlias'] = "$ssRoot/SmiteSpam.alias.php";
$wgSpecialPages['SmiteSpam'] = 'SpecialSmiteSpam';
diff --git a/SpecialSmiteSpam.php b/SpecialSmiteSpam.php
index aedd3e6..b038618 100644
--- a/SpecialSmiteSpam.php
+++ b/SpecialSmiteSpam.php
@@ -7,11 +7,33 @@
}
public function execute() {
- $output = $this->getOutput();
- $output->setPageTitle( $this->msg('smitespam') );
+ $out = $this->getOutput();
+ $out->setPageTitle( $this->msg( 'smitespam' ) );
+
+ $ss = new SpamSmiter();
+ $data = $ss->run();
+
+ $out->addHTML( '<h2>' . $this->msg( 'page-list-title' ) .
'</h2>' );
+ $out->addHTML( Html::openElement( 'ul' ) );
+
+ foreach ( $data as $row ) {
+ $title = Title::newFromText( $row['title'] );
+ $out->addHTML( Html::openElement( 'li' ) );
+ $out->addHTML(
+ Html::openElement( 'a', array(
+ 'href' => $title->getLocalUrl()
+ )
+ )
+ );
+ $out->addHTML( htmlspecialchars( $row['title'] ) );
+ $out->addHTML( Html::closeElement( 'a' ) );
+ $out->addHTML( Html::closeElement( 'li' ) );
+ }
+
+ $out->addHTML( Html::closeElement( 'ul' ) );
}
function getGroupName() {
- return 'maintenance';
+ return 'maintenance';
}
}
diff --git a/autoload.php b/autoload.php
new file mode 100644
index 0000000..e0250e0
--- /dev/null
+++ b/autoload.php
@@ -0,0 +1,11 @@
+<?php
+// This file is generated by AutoloadGenerator, do not adjust manually
+// @codingStandardsIgnoreFile
+global $wgAutoloadClasses;
+
+$wgAutoloadClasses += array(
+ 'ExternalLinksCheck' => __DIR__ . '/checks/ExternalLinksCheck.php',
+ 'RepeatedExternalLinksCheck' => __DIR__ .
'/checks/RepeatedExternalLinksCheck.php',
+ 'SpamSmiter' => __DIR__ . '/includes/SpamSmiter.php',
+ 'SpecialSmiteSpam' => __DIR__ . '/SpecialSmiteSpam.php',
+);
diff --git a/checks/ExternalLinksCheck.php b/checks/ExternalLinksCheck.php
new file mode 100644
index 0000000..f4d9acf
--- /dev/null
+++ b/checks/ExternalLinksCheck.php
@@ -0,0 +1,15 @@
+<?php
+
+class ExternalLinksCheck {
+ public function getValue( $pageID ) {
+ $dbr = wfGetDB( DB_SLAVE );
+
+ $wikitext = $dbr->selectField(
+ array( 'text', 'revision', 'page' ),
+ array( 'old_text' ),
+ array( "page_id = $pageID", 'rev_id = page_latest',
'old_id = rev_text_id' )
+ );
+
+ return preg_match_all( '/' . wfUrlProtocols() . '/', $wikitext
);
+ }
+}
diff --git a/checks/RepeatedExternalLinksCheck.php
b/checks/RepeatedExternalLinksCheck.php
new file mode 100644
index 0000000..3339d64
--- /dev/null
+++ b/checks/RepeatedExternalLinksCheck.php
@@ -0,0 +1,29 @@
+<?php
+
+class RepeatedExternalLinksCheck {
+ public function getValue( $pageID ) {
+ $dbr = wfGetDB( DB_SLAVE );
+
+ $wikitext = $dbr->selectField(
+ array( 'text', 'revision', 'page' ),
+ array( 'old_text' ),
+ array( "page_id = $pageID", 'rev_id = page_latest',
'old_id = rev_text_id' )
+ );
+ $matches = array();
+ preg_match_all( '/(' . wfUrlProtocols() . ')([^\s\]\"]*)/',
$wikitext, $matches );
+
+ $linkFrequencies = array_count_values( $matches[2] );
+ $duplicateLinks = array_filter( $linkFrequencies,
+ function ( $value ) {
+ return $value > 1;
+ }
+ );
+ $numDuplicates = array_map(
+ function ( $value ) {
+ return $value - 1;
+ },
+ $duplicateLinks
+ );
+ return array_sum( $numDuplicates );
+ }
+}
diff --git a/generate-autoloads.php b/generate-autoloads.php
new file mode 100644
index 0000000..745528f
--- /dev/null
+++ b/generate-autoloads.php
@@ -0,0 +1,11 @@
+<?php
+
+require_once '../../includes/utils/AutoloadGenerator.php';
+
+$gen = new AutoloadGenerator( __DIR__ );
+
+$gen->readFile( __DIR__ . '/SpecialSmiteSpam.php' );
+$gen->readDir( __DIR__ . '/includes' );
+$gen->readDir( __DIR__ . '/checks' );
+
+$gen->generateAutoload();
diff --git a/i18n/en.json b/i18n/en.json
index e7143e3..ef0e156 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -5,5 +5,6 @@
]
},
"smitespam": "Smite Spam",
- "smitespam-desc": "Provides a [[Special:SmiteSpam|special page]] to
help administrators identify and delete spam wiki pages."
+ "smitespam-desc": "Provides a [[Special:SmiteSpam|special page]] to
help administrators identify and delete spam wiki pages.",
+ "page-list-title": "Possible spam pages"
}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index ee98eba..bc92e1e 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -5,5 +5,6 @@
]
},
"smitespam": "The name of the extension's entry in
Special:SpecialPages.",
- "smitespam-desc": "Description of what SmiteSpam does."
+ "smitespam-desc": "Description of what SmiteSpam does.",
+ "page-list-title": "Title of the list of possible spam pages."
}
diff --git a/includes/SpamSmiter.php b/includes/SpamSmiter.php
new file mode 100644
index 0000000..ede1f62
--- /dev/null
+++ b/includes/SpamSmiter.php
@@ -0,0 +1,50 @@
+<?php
+
+class SpamSmiter {
+
+ protected $config;
+
+ public function __construct() {
+ $this->config = array(
+ 'checks' => array(
+ 'ExternalLinksCheck' => 1,
+ 'RepeatedExternalLinksCheck' => 5,
+ ),
+ 'threshold' => 10,
+ );
+ }
+
+ public function run() {
+ $dbr = wfGetDB( DB_SLAVE );
+ $res = $dbr->select(
+ array( 'page' ),
+ array( 'page_title', 'page_id' )
+ );
+
+ $pages = array();
+ foreach ( $res as $row ) {
+ $pages[] = array(
+ 'id' => $row->page_id,
+ 'title' => $row->page_title,
+ );
+ }
+
+ $checks = $this->config['checks'];
+
+ $spamPages = array();
+ foreach ( $pages as $page ) {
+ $value = 0;
+ foreach ( $checks as $check => $weight ) {
+ $checker = new $check;
+ $checkvalue = $checker->getValue( $page['id'] )
* $weight;
+ $value += $checkvalue;
+ }
+
+ if ( $value > $this->config['threshold'] ) {
+ $spamPages[] = $page;
+ }
+ }
+
+ return $spamPages;
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/214303
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iac3a53277a81eea5b13a73cce7b40eba04829b58
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/SmiteSpam
Gerrit-Branch: master
Gerrit-Owner: Polybuildr <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits