Polybuildr has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/214303

Change subject: [WIP] Initial work on SmiteSpam extension
......................................................................

[WIP] Initial work on SmiteSpam extension

Change-Id: Iac3a53277a81eea5b13a73cce7b40eba04829b58
---
M SmiteSpam.php
M SpecialSmiteSpam.php
A autoload.php
A checks/ExternalLinksCheck.php
A checks/RepeatedExternalLinksCheck.php
A generate-autoloads.php
M i18n/en.json
M i18n/qqq.json
A includes/SpamSmiter.php
9 files changed, 147 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/SmiteSpam 
refs/changes/03/214303/1

diff --git a/SmiteSpam.php b/SmiteSpam.php
index 6ea274d..a4390fa 100644
--- a/SmiteSpam.php
+++ b/SmiteSpam.php
@@ -6,7 +6,8 @@
 
 $ssRoot = __DIR__;
 
-$wgAutoloadClasses['SpecialSmiteSpam'] = "$ssRoot/SpecialSmiteSpam.php";
+require_once "$ssRoot/autoload.php";
+
 $wgMessagesDirs['SmiteSpam'] = "$ssRoot/i18n";
 $wgExtensionMessagesFiles['SmiteSpamAlias'] = "$ssRoot/SmiteSpam.alias.php";
 $wgSpecialPages['SmiteSpam'] = 'SpecialSmiteSpam';
diff --git a/SpecialSmiteSpam.php b/SpecialSmiteSpam.php
index aedd3e6..b038618 100644
--- a/SpecialSmiteSpam.php
+++ b/SpecialSmiteSpam.php
@@ -7,11 +7,33 @@
        }
 
        public function execute() {
-               $output = $this->getOutput();
-               $output->setPageTitle( $this->msg('smitespam') );
+               $out = $this->getOutput();
+               $out->setPageTitle( $this->msg( 'smitespam' ) );
+
+               $ss = new SpamSmiter();
+               $data = $ss->run();
+
+               $out->addHTML( '<h2>' . $this->msg( 'page-list-title' ) . 
'</h2>' );
+               $out->addHTML( Html::openElement( 'ul' ) );
+
+               foreach ( $data as $row ) {
+                       $title = Title::newFromText( $row['title'] );
+                       $out->addHTML( Html::openElement( 'li' ) );
+                       $out->addHTML(
+                               Html::openElement( 'a', array(
+                                               'href' => $title->getLocalUrl()
+                                       )
+                               )
+                       );
+                       $out->addHTML( htmlspecialchars( $row['title'] ) );
+                       $out->addHTML( Html::closeElement( 'a' ) );
+                       $out->addHTML( Html::closeElement( 'li' ) );
+               }
+
+               $out->addHTML( Html::closeElement( 'ul' ) );
        }
 
        function getGroupName() {
-                  return 'maintenance';
+               return 'maintenance';
        }
 }
diff --git a/autoload.php b/autoload.php
new file mode 100644
index 0000000..e0250e0
--- /dev/null
+++ b/autoload.php
@@ -0,0 +1,11 @@
+<?php
+// This file is generated by AutoloadGenerator, do not adjust manually
+// @codingStandardsIgnoreFile
+global $wgAutoloadClasses;
+
+$wgAutoloadClasses += array(
+       'ExternalLinksCheck' => __DIR__ . '/checks/ExternalLinksCheck.php',
+       'RepeatedExternalLinksCheck' => __DIR__ . 
'/checks/RepeatedExternalLinksCheck.php',
+       'SpamSmiter' => __DIR__ . '/includes/SpamSmiter.php',
+       'SpecialSmiteSpam' => __DIR__ . '/SpecialSmiteSpam.php',
+);
diff --git a/checks/ExternalLinksCheck.php b/checks/ExternalLinksCheck.php
new file mode 100644
index 0000000..f4d9acf
--- /dev/null
+++ b/checks/ExternalLinksCheck.php
@@ -0,0 +1,15 @@
+<?php
+
+class ExternalLinksCheck {
+       public function getValue( $pageID ) {
+               $dbr = wfGetDB( DB_SLAVE );
+
+               $wikitext = $dbr->selectField(
+                       array( 'text', 'revision', 'page' ),
+                       array( 'old_text' ),
+                       array( "page_id = $pageID", 'rev_id = page_latest', 
'old_id = rev_text_id' )
+               );
+
+               return preg_match_all( '/' . wfUrlProtocols() . '/', $wikitext 
);
+       }
+}
diff --git a/checks/RepeatedExternalLinksCheck.php 
b/checks/RepeatedExternalLinksCheck.php
new file mode 100644
index 0000000..3339d64
--- /dev/null
+++ b/checks/RepeatedExternalLinksCheck.php
@@ -0,0 +1,29 @@
+<?php
+
+class RepeatedExternalLinksCheck {
+       public function getValue( $pageID ) {
+               $dbr = wfGetDB( DB_SLAVE );
+
+               $wikitext = $dbr->selectField(
+                       array( 'text', 'revision', 'page' ),
+                       array( 'old_text' ),
+                       array( "page_id = $pageID", 'rev_id = page_latest', 
'old_id = rev_text_id' )
+               );
+               $matches = array();
+               preg_match_all( '/(' . wfUrlProtocols() . ')([^\s\]\"]*)/', 
$wikitext, $matches );
+
+               $linkFrequencies = array_count_values( $matches[2] );
+               $duplicateLinks = array_filter( $linkFrequencies,
+                       function ( $value ) {
+                               return $value > 1;
+                       }
+               );
+               $numDuplicates = array_map(
+                       function ( $value ) {
+                               return $value - 1;
+                       },
+                       $duplicateLinks
+               );
+               return array_sum( $numDuplicates );
+       }
+}
diff --git a/generate-autoloads.php b/generate-autoloads.php
new file mode 100644
index 0000000..745528f
--- /dev/null
+++ b/generate-autoloads.php
@@ -0,0 +1,11 @@
+<?php
+
+require_once '../../includes/utils/AutoloadGenerator.php';
+
+$gen = new AutoloadGenerator( __DIR__ );
+
+$gen->readFile( __DIR__ . '/SpecialSmiteSpam.php' );
+$gen->readDir( __DIR__ . '/includes' );
+$gen->readDir( __DIR__ . '/checks' );
+
+$gen->generateAutoload();
diff --git a/i18n/en.json b/i18n/en.json
index e7143e3..ef0e156 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -5,5 +5,6 @@
                ]
        },
        "smitespam": "Smite Spam",
-       "smitespam-desc": "Provides a [[Special:SmiteSpam|special page]] to 
help administrators identify and delete spam wiki pages."
+       "smitespam-desc": "Provides a [[Special:SmiteSpam|special page]] to 
help administrators identify and delete spam wiki pages.",
+       "page-list-title": "Possible spam pages"
 }
diff --git a/i18n/qqq.json b/i18n/qqq.json
index ee98eba..bc92e1e 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -5,5 +5,6 @@
                ]
        },
        "smitespam": "The name of the extension's entry in 
Special:SpecialPages.",
-       "smitespam-desc": "Description of what SmiteSpam does."
+       "smitespam-desc": "Description of what SmiteSpam does.",
+       "page-list-title": "Title of the list of possible spam pages."
 }
diff --git a/includes/SpamSmiter.php b/includes/SpamSmiter.php
new file mode 100644
index 0000000..ede1f62
--- /dev/null
+++ b/includes/SpamSmiter.php
@@ -0,0 +1,50 @@
+<?php
+
+class SpamSmiter {
+
+       protected $config;
+
+       public function __construct() {
+               $this->config = array(
+                       'checks' => array(
+                               'ExternalLinksCheck' => 1,
+                               'RepeatedExternalLinksCheck' => 5,
+                       ),
+                       'threshold' => 10,
+               );
+       }
+
+       public function run() {
+               $dbr = wfGetDB( DB_SLAVE );
+               $res = $dbr->select(
+                       array( 'page' ),
+                       array( 'page_title', 'page_id' )
+               );
+
+               $pages = array();
+               foreach ( $res as $row ) {
+                       $pages[] = array(
+                               'id' => $row->page_id,
+                               'title' => $row->page_title,
+                       );
+               }
+
+               $checks = $this->config['checks'];
+
+               $spamPages = array();
+               foreach ( $pages as $page ) {
+                       $value = 0;
+                       foreach ( $checks as $check => $weight ) {
+                               $checker = new $check;
+                               $checkvalue = $checker->getValue( $page['id'] ) 
* $weight;
+                               $value += $checkvalue;
+                       }
+
+                       if ( $value > $this->config['threshold'] ) {
+                               $spamPages[] = $page;
+                       }
+               }
+
+               return $spamPages;
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/214303
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iac3a53277a81eea5b13a73cce7b40eba04829b58
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/SmiteSpam
Gerrit-Branch: master
Gerrit-Owner: Polybuildr <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to