Daniel Kinzler has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75867


Change subject: (bug 51621) Make SBL aware of ContentHandler.
......................................................................

(bug 51621) Make SBL aware of ContentHandler.

This changes SpamBlacklist to make use of the new, ContentHandler
aware hooks.

This change also includes some refactoring and cleanup which made
the migration to the new hooks easier.

Change-Id: I21e9cc8479f2b95fb53c502f6e279c8a1ea378a5
---
M SpamBlacklist.php
M SpamBlacklistHooks.php
M SpamBlacklist_body.php
3 files changed, 96 insertions(+), 84 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/SpamBlacklist 
refs/changes/67/75867/1

diff --git a/SpamBlacklist.php b/SpamBlacklist.php
index f3e27f3..1b54d28 100644
--- a/SpamBlacklist.php
+++ b/SpamBlacklist.php
@@ -10,7 +10,7 @@
 $wgExtensionCredits['antispam'][] = array(
        'path'           => __FILE__,
        'name'           => 'SpamBlacklist',
-       'author'         => array( 'Tim Starling', 'John Du Hart' ),
+       'author'         => array( 'Tim Starling', 'John Du Hart', 'Daniel 
Kinzler' ),
        'url'            => 
'https://www.mediawiki.org/wiki/Extension:SpamBlacklist',
        'descriptionmsg' => 'spam-blacklist-desc',
 );
@@ -33,10 +33,18 @@
  */
 $wgSpamBlacklistSettings =& $wgBlacklistSettings['spam'];
 
-$wgHooks['EditFilterMerged'][] = 'SpamBlacklistHooks::filterMerged';
-$wgHooks['APIEditBeforeSave'][] = 
'SpamBlacklistHooks::filterAPIEditBeforeSave';
+if ( !defined( 'MW_SUPPORTS_CONTENTHANDLER' ) ) {
+       die( "This version of SpamBlacklist requires a version of MediaWiki 
that supports the ContentHandler facility (supported since MW 1.21)." );
+}
+
+// filter pages on save
+$wgHooks['EditFilterMergedContent'][] = 
'SpamBlacklistHooks::filterMergedContent';
+
+// editing filter rules
 $wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate';
-$wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave';
+$wgHooks['PageContentSaveComplete'][] = 'SpamBlacklistHooks::pageSaveContent';
+
+// email filters
 $wgHooks['UserCanSendEmail'][] = 'SpamBlacklistHooks::userCanSendEmail';
 $wgHooks['AbortNewAccount'][] = 'SpamBlacklistHooks::abortNewAccount';
 
diff --git a/SpamBlacklistHooks.php b/SpamBlacklistHooks.php
index 530df16..5eff624 100644
--- a/SpamBlacklistHooks.php
+++ b/SpamBlacklistHooks.php
@@ -5,49 +5,42 @@
  */
 class SpamBlacklistHooks {
        /**
-        * Hook function for EditFilterMerged
+        * Hook function for EditFilterMergedContent
         *
-        * @param $editPage EditPage
-        * @param $text string
-        * @param $hookErr string
-        * @param $editSummary string
+        * @param IContextSource $context
+        * @param Content        $content
+        * @param Status         $status
+        * @param string         $summary
+        * @param User           $user
+        * @param bool           $minoredit
+        *
         * @return bool
         */
-       static function filterMerged( $editPage, $text, &$hookErr, $editSummary 
) {
-               global $wgTitle;
-               if( is_null( $wgTitle ) ) {
-                       # API mode
-                       # wfSpamBlacklistFilterAPIEditBeforeSave already 
checked the blacklist
-                       return true;
+       static function filterMergedContent( IContextSource $context, Content 
$content, Status $status, $summary, User $user, $minoredit ) {
+               $title = $context->getTitle();
+
+               // get the link from the not-yet-saved page content.
+               $pout = $content->getParserOutput( $title );
+               $links = array_keys( $pout->getExternalLinks() );
+
+               // HACK: treat the edit summary as a link
+               if ( $summary !== '' ) {
+                       $links[] = $summary;
                }
 
                $spamObj = BaseBlacklist::getInstance( 'spam' );
-               $title = $editPage->mArticle->getTitle();
-               $ret = $spamObj->filter( $title, $text, '', $editSummary, 
$editPage );
-               if ( $ret !== false ) {
-                       $editPage->spamPageWithContent( $ret );
-               }
-               // Return convention for hooks is the inverse of 
$wgFilterCallback
-               return ( $ret === false );
-       }
+               $matches = $spamObj->filter( $links, $title );
 
-       /**
-        * Hook function for APIEditBeforeSave
-        *
-        * @param $editPage EditPage
-        * @param $text string
-        * @param $resultArr array
-        * @return bool
-        */
-       static function filterAPIEditBeforeSave( $editPage, $text, &$resultArr 
) {
-               $spamObj = BaseBlacklist::getInstance( 'spam' );
-               $title = $editPage->mArticle->getTitle();
-               $ret = $spamObj->filter( $title, $text, '', '', $editPage );
-               if ( $ret!==false ) {
-                       $resultArr['spamblacklist'] = implode( '|', $ret );
+               if ( $matches !== false ) {
+                       $status->fatal( 'spamprotectiontext' );
+
+                       foreach ( $matches as $match ) {
+                               $status->fatal( 'spamprotectionmatch', $match );
+                       }
                }
-               // Return convention for hooks is the inverse of 
$wgFilterCallback
-               return ( $ret === false );
+
+               // Always return true, EditPage will look at $status->isOk().
+               return true;
        }
 
        /**
@@ -136,20 +129,37 @@
        }
 
        /**
-        * Hook function for ArticleSaveComplete
+        * Hook function for PageContentSaveComplete
         * Clear local spam blacklist caches on page save.
         *
-        * @param $article Article
-        * @param $user User
-        * @param $text string
-        * @param $summary string
-        * @param $isminor
-        * @param $iswatch
-        * @param $section
+        * @param Page $wikiPage
+        * @param User     $user
+        * @param Content  $content
+        * @param string   $summary
+        * @param bool     $isMinor
+        * @param bool     $isWatch
+        * @param string   $section
+        * @param int      $flags
+        * @param int      $revision
+        * @param Status   $status
+        * @param int      $baseRevId
+        *
         * @return bool
         */
-       static function articleSave( &$article, &$user, $text, $summary, 
$isminor, $iswatch, $section ) {
-               if( !BaseBlacklist::isLocalSource( $article->getTitle() ) ) {
+       static function pageSaveContent(
+               Page $wikiPage,
+               User $user,
+               Content $content,
+               $summary,
+               $isMinor,
+               $isWatch,
+               $section,
+               $flags,
+               $revision,
+               Status $status,
+               $baseRevId
+       ) {
+               if( !BaseBlacklist::isLocalSource( $wikiPage->getTitle() ) ) {
                        return true;
                }
                global $wgMemc, $wgDBname;
diff --git a/SpamBlacklist_body.php b/SpamBlacklist_body.php
index 7279e20..d3de841 100644
--- a/SpamBlacklist_body.php
+++ b/SpamBlacklist_body.php
@@ -5,8 +5,7 @@
 }
 
 class SpamBlacklist extends BaseBlacklist {
-       var $files = array( 
"http://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1";
 );
-       var $ignoreEditSummary = false;
+       //var $files = array( 
"http://meta.wikimedia.org/w/index.php?title=Spam_blacklist&action=raw&sb_ver=1";
 );
 
        /**
         * Returns the code for the blacklist implementation
@@ -18,49 +17,44 @@
        }
 
        /**
-        * @param Title $title
-        * @param string $text Text of section, or entire text if 
$editPage!=false
-        * @param string $section Section number or name
-        * @param string $editsummary Edit summary if one exists, some people 
use urls there too
-        * @param EditPage $editPage EditPage if EditFilterMerged was called, 
null otherwise
+        * Apply some basic anti-spoofing to the links before they get filtered,
+        * see @bug 12896
+        *
+        * @param string $text
+        *
+        * @return string
+        */
+       protected function antiSpoof( $text ) {
+               $text = str_replace( '.', '.', $text );
+               return $text;
+       }
+
+       /**
+        * @param string[] $links An array of links to check against the 
blacklist
+        * @param Title  $title The title of hte page to which the filter shall 
be applied.
+        *               This is used to load the old links already on the 
page, so
+        *               the filter is only applied to links that got added. If 
not given,
+        *               the filter is applied to all $links.
+        *
         * @return Array Matched text(s) if the edit should not be allowed, 
false otherwise
         */
-       function filter( &$title, $text, $section, $editsummary = '', EditPage 
&$editPage = null ) {
-               /**
-                * @var $wgParser Parser
-                */
-               global $wgParser, $wgUser;
-
+       function filter( array $links, Title $title = null ) {
                $fname = 'wfSpamBlacklistFilter';
                wfProfileIn( $fname );
-
-               # These don't do anything, commenting out...
-               #$this->title = $title;
-               #$this->text = $text;
-               #$this->section = $section;
-               $text = str_replace( '.', '.', $text ); //@bug 12896
 
                $blacklists = $this->getBlacklists();
                $whitelists = $this->getWhitelists();
 
                if ( count( $blacklists ) ) {
-                       # Run parser to strip SGML comments and such out of the 
markup
-                       # This was being used to circumvent the filter (see bug 
5185)
-                       if ( $editPage ) {
-                               $editInfo = 
$editPage->mArticle->prepareTextForEdit( $text );
-                               $out = $editInfo->output;
-                       } else {
-                               $options = new ParserOptions();
-                               $text = $wgParser->preSaveTransform( $text, 
$title, $wgUser, $options );
-                               $out = $wgParser->parse( $text, $title, 
$options );
-                       }
-                       $newLinks = array_keys( $out->getExternalLinks() );
-                       $oldLinks = $this->getCurrentLinks( $title );
-                       $addedLinks = array_diff( $newLinks, $oldLinks );
+                       // poor man's anti-spoof, see bug 12896
+                       $newLinks = array_map( array( $this, 'antiSpoof' ), 
$links );
 
-                       // We add the edit summary if one exists
-                       if ( !$this->ignoreEditSummary && !empty( $editsummary 
) ) {
-                               $addedLinks[] = $editsummary;
+                       if ( $title !== null ) {
+                               $oldLinks = $this->getCurrentLinks( $title );
+                               $addedLinks = array_diff( $newLinks, $oldLinks 
);
+                       } else {
+                               // can't load old links, so treat all links as 
added.
+                               $addedLinks = $newLinks;
                        }
 
                        wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', 
', $oldLinks ) );

-- 
To view, visit https://gerrit.wikimedia.org/r/75867
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I21e9cc8479f2b95fb53c502f6e279c8a1ea378a5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/SpamBlacklist
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to