jenkins-bot has submitted this change and it was merged.

Change subject: General cleanup
......................................................................


General cleanup

Removed the $dir variable, made one function return true (the pre-existing
"true;" line likely did nothing), moved one hard-coded English string into
the i18n file, updated/added function documentation, added a meaningful
version number into extension credits.

Change-Id: I2d7255fb3c44853c45c2ed5b3cee8a2e7b16f897
---
M BayesianFilter.Body.php
M BayesianFilter.DBHandler.php
M BayesianFilter.Hooks.php
M BayesianFilter.PageView.php
M BayesianFilter.Tokenizer.php
M BayesianFilter.php
M i18n/en.json
M i18n/qqq.json
8 files changed, 288 insertions(+), 331 deletions(-)

Approvals:
  Legoktm: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/BayesianFilter.Body.php b/BayesianFilter.Body.php
index 6d5ed92..f886626 100644
--- a/BayesianFilter.Body.php
+++ b/BayesianFilter.Body.php
@@ -2,28 +2,26 @@
 
 class BayesianFilter {
 
-       public function __construct()
-       {
+       public function __construct() {
                // this is written here instead of autoloader, cause
                // tokenizer is written in a way it can be reused by
                // other scripts which are not neccesarily part of mediawiki
-               if( !class_exists( 'BayesianFilterTokenizer' ) )
-               {
+               if ( !class_exists( 'BayesianFilterTokenizer' ) ) {
                        include( __DIR__ . '/BayesianFilter.Tokenizer.php' );
                }
        }
 
        /**
-       * This function users $wgParser and returns the external links
-       * in the content of the page. As of now it is not used anywhere
-       * but in the later releases links would have a very big role to
-       * play in spam detection
-       * @text content of the page which we are inspecting
-       * return an array of links
-       */
-
-       public function getLinks( $text, $title )
-       {
+        * This function users $wgParser and returns the external links
+        * in the content of the page.
+        * As of now it is not used anywhere but in the later releases links 
would
+        * have a very big role to play in spam detection.
+        *
+        * @param string $text Content of the page which we are inspecting
+        * @param Title $title
+        * @return array An array of links
+        */
+       public function getLinks( $text, $title ) {
                global $wgParser, $wgUser;
                $options = new ParserOptions();
                $modifiedText = $wgParser->preSaveTransform( $text, $title, 
$wgUser, $options );
@@ -33,15 +31,14 @@
        }
 
        /**
-       * This function contains the main logic for spam detection
-       * it evaulated whether content of a WikiPage is
-       * spam or not.
-       * @text content of the page which we are inspecting
-       * returns true if the content is spam, false otherwise
-       */
-
-       public function checkSpam( $text, $title )
-       {
+        * This function contains the main logic for spam detection.
+        * It evaluates whether the content of a WikiPage is spam or not.
+        *
+        * @param string $text Content of the page which we are inspecting
+        * @param Title $title
+        * @return bool True if the content is spam, false otherwise
+        */
+       public function checkSpam( $text, $title ) {
                $links = $this->getLinks( $text, $title );
 
                $tokenizer = new BayesianFilterTokenizer;
@@ -50,18 +47,18 @@
                $words = array();
 
                $token = $tokenizer->tokenize( $text );
-               while( $token )
-               {
+               while ( $token ) {
                        $token = strtolower( $token );
-                       if( !$tokenizer->isStopWord( $token ) )
+                       if ( !$tokenizer->isStopWord( $token ) ) {
                                $words[] = $tokenizer->stem( $token );
+                       }
                        $token = $tokenizer->tokenize();
                }
 
                $filterDbHandler = new BayesianFilterDBHandler;
                global $wgBayesianFilterWordsChunkSize;
                $wordsFrequency;
-               $wordsFrequency = $filterDbHandler->getFrequency( $words , 
$wgBayesianFilterWordsChunkSize );
+               $wordsFrequency = $filterDbHandler->getFrequency( $words, 
$wgBayesianFilterWordsChunkSize );
                $probMsgGivenSpam = 1.0;
                $probMsgGivenHam = 1.0;
 
@@ -69,12 +66,10 @@
                $hamCount = isset( $wordsFrequency['ham_count'] ) ? 
$wordsFrequency['ham_count'] : 0;
                $wordCount = count( $words );
 
-               foreach ($words as $word ) 
-               {
-                       if( isset( $wordsFrequency[$word] ) )
-                       {
-                               $probMsgGivenSpam = $probMsgGivenSpam * ( 
$wordsFrequency[$word]['spam'] + 1);
-                               $probMsgGivenHam = $probMsgGivenHam * ( 
$wordsFrequency[$word]['ham'] + 1);
+               foreach ( $words as $word ) {
+                       if ( isset( $wordsFrequency[$word] ) ) {
+                               $probMsgGivenSpam = $probMsgGivenSpam * ( 
$wordsFrequency[$word]['spam'] + 1 );
+                               $probMsgGivenHam = $probMsgGivenHam * ( 
$wordsFrequency[$word]['ham'] + 1 );
                        }
 
                        $probMsgGivenSpam = $probMsgGivenSpam / ( $spamCount + 
$wordCount );
@@ -85,12 +80,9 @@
                $spamHamCount = $filterDbHandler->getSpamHamCount();
 
                // this if condition is added to prevent divisiob by zero.
-               if( !$spamHamCount['spam'] || !$spamHamCount['ham'] )
-               {
+               if ( !$spamHamCount['spam'] || !$spamHamCount['ham'] ) {
                        $spamProb = ( $spamHamCount['spam'] ) / ( 
$spamHamCount['spam'] + $spamHamCount['ham'] );
-               }
-               else
-               {
+               } else {
                        $spamProb = 0.5;
                }
                $hamProb = 1.0 - $spamProb;
@@ -98,40 +90,35 @@
                $probMsgGivenSpam = $probMsgGivenSpam * $spamProb;
                $probMsgGivenHam = $probMsgGivenHam * $hamProb;
 
-               if( $probMsgGivenSpam > $probMsgGivenHam )
-               {
+               if ( $probMsgGivenSpam > $probMsgGivenHam ) {
                        return true;
-               }
-               else
-               {
-                       $filterDbHandler->insertFrequencyTable( $words, "ham" );
+               } else {
+                       $filterDbHandler->insertFrequencyTable( $words, 'ham' );
                        return false;
                }
        }
 
        /**
-       * this function trains the DB for ham and spam
-       * @text the string to train
-       * @category can have two values ham or spam
-       * returns nothing
-       */
-
-       public function train( $text, $category )
-       {
-
+        * Trains the database for ham and spam.
+        *
+        * @param string $text The string to train
+        * @param string $category Can have two values, ham or spam
+        * @return void
+        */
+       public function train( $text, $category ) {
                $tokenizer = new BayesianFilterTokenizer;
                $text = $tokenizer->normalize( $text );
 
                $token = $tokenizer->tokenize( $text );
                $words = array();
 
-               while( $token )
-               {
+               while ( $token ) {
                        $token = strtolower( $token );
-                       if( !$tokenizer->isStopWord( $token ) )
-                       {
+
+                       if ( !$tokenizer->isStopWord( $token ) ) {
                                $words[] = $tokenizer->stem( $token );
                        }
+
                        $token = $tokenizer->tokenize();
                }
 
diff --git a/BayesianFilter.DBHandler.php b/BayesianFilter.DBHandler.php
index e602ed7..d0178d7 100644
--- a/BayesianFilter.DBHandler.php
+++ b/BayesianFilter.DBHandler.php
@@ -3,200 +3,187 @@
 class BayesianFilterDBHandler {
 
        /**
-       * returns the array of frequency of each element is an array
-       * @itemArray an array of items whose frequencies to be returned
-       */
-
-       private function itemFrequency( $itemArray )
-       {
+        * @param array $itemArray Items whose frequencies to be returned
+        * @return array The array of frequency of each element is an array
+        */
+       private function itemFrequency( $itemArray ) {
                $itemFrequencyArray = array();
+
                foreach ( $itemArray as $item ) {
-                       if( isset( $itemFrequencyArray[$item] ) )
+                       if ( isset( $itemFrequencyArray[$item] ) ) {
                                $itemFrequencyArray[$item] += 1;
-                       else
+                       } else {
                                $itemFrequencyArray[$item] = 1;
+                       }
                }
+
                return $itemFrequencyArray;
        }
 
        /**
-       * returns the spam and ham frequecy of the words in $words array
-       * @chunksize defines the size of the chunk that should be queried from 
a db in
-       * a single db_query. If 0 it means get all the words from words array.
-       * @words is the array of words whose frequency are required
-       */
-
-       public function getFrequency( $words , $chunksize )
-       {
+        * @param array $words Array of words whose frequency are required
+        * @param int $chunkSize Defines the size of the chunk that should be
+        *                       queried from a DB in a single DB query.
+        *                       If 0, it means get all the words from words 
array.
+        * @return array The spam and ham frequecy of the words in $words array
+        */
+       public function getFrequency( $words, $chunkSize ) {
                $wordsFrequency = $this->itemFrequency( $words );
                $words = array_keys( $wordsFrequency );
                $dbr = wfGetDB( DB_SLAVE );
 
                $wordsFrequency = array();
 
-               if( $chunksize )
-               {
-                       //array_chunk returns a multidimensional array, where 
is each array
-                       //is of size  = $chunksize
-                       $wordsMultiArray = array_chunk( $words , $chunksize );
+               if ( $chunkSize ) {
+                       // array_chunk returns a multidimensional array, where 
is each array
+                       // is of size  = $chunkSize
+                       $wordsMultiArray = array_chunk( $words, $chunkSize );
                        foreach ( $wordsMultiArray as $words ) {
                                $res = $dbr->select(
-                                               "word_frequency",
-                                               array( "wf_word", "wf_spam", 
"wf_ham" ),
-                                               array( "wf_word" => $words ),
-                                               __METHOD__,
-                                               array()
-                                       );
-                               foreach ($res as $row ) {
+                                       'word_frequency',
+                                       array( 'wf_word', 'wf_spam', 'wf_ham' ),
+                                       array( 'wf_word' => $words ),
+                                       __METHOD__
+                               );
+                               foreach ( $res as $row ) {
                                        $wordsFrequency[$row->wf_word] = 
array();
                                        $wordsFrequency[$row->wf_word]['spam'] 
= $row->wf_spam;
                                        $wordsFrequency[$row->wf_word]['spam'] 
= $row->wf_ham;
                                }
                        }
-               }
-               else
-               {
+               } else {
                        $res = $dbr->select(
-                                       "word_frequency",
-                                       array( "wf_word", "wf_spam", "wf_ham" ),
-                                       array( "wf_word" => $words ),
-                                       __METHOD__,
-                                       array()
-                               );
-                       foreach ($res as $row ) {
+                               'word_frequency',
+                               array( 'wf_word', 'wf_spam', 'wf_ham' ),
+                               array( 'wf_word' => $words ),
+                               __METHOD__
+                       );
+                       foreach ( $res as $row ) {
                                $wordsFrequency[$row->wf_word] = array();
                                $wordsFrequency[$row->wf_word]['spam'] = 
$row->wf_spam;
                                $wordsFrequency[$row->wf_word]['ham'] = 
$row->wf_ham;
                        }
                }
+
                $res = $dbr->selectRow(
-                               "word_frequency",
-                               array('spam_count' => 'SUM(wf_spam)', 
"ham_count" => 'SUM(wf_ham)'),
-                               array(),
-                               __METHOD__,
-                               array()
-                       );
+                       'word_frequency',
+                       array( 'spam_count' => 'SUM(wf_spam)', 'ham_count' => 
'SUM(wf_ham)' ),
+                       array(),
+                       __METHOD__
+               );
+
                $wordsFrequency['spam_count'] = $res->spam_count;
                $wordsFrequency['ham_count'] = $res->ham_count;
+
                return $wordsFrequency;
        }
 
-
        /**
-       * updates the frequency of words in words array in ham or spam column 
depending
-       * upon the value of $category
-       * @words whose frequency is to be updated
-       * @category in which category it should be updated
-       */
-
-       public function insertFrequencyTable( $words, $category )
-       {
+        * Updates the frequency of words in words array in ham or spam column 
depending
+        * upon the value of $category
+        *
+        * @param array $words Words whose frequency is to be updated
+        * @param string $category In which category it should be updated
+        */
+       public function insertFrequencyTable( $words, $category ) {
                $wordsFrequency = $this->itemFrequency( $words );
                $words = array_keys( $wordsFrequency );
 
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select(
-                               "word_frequency",
-                               array( "wf_word", "wf_spam", "wf_ham" ),
-                               array( "wf_word" => $words ),
-                               __METHOD__,
-                               array()
-                       );
+                       'word_frequency',
+                       array( 'wf_word', 'wf_spam', 'wf_ham' ),
+                       array( 'wf_word' => $words ),
+                       __METHOD__
+               );
+
                $exists = array();
+
                foreach ( $res as $row ) {
                        $exists[$row->wf_word] = array();
                        $exists[$row->wf_word]['spam'] = $row->wf_spam;
                        $exists[$row->wf_word]['ham'] = $row->wf_ham;
                }
+
                $dbw = wfGetDB( DB_MASTER );
-               $fieldName = "wf_" . $category;
-               foreach ( $words as $word )
-               {
-                       if( isset( $exists[$word] ) )
-                       {
+               $fieldName = 'wf_' . $category;
+
+               foreach ( $words as $word ) {
+                       if ( isset( $exists[$word] ) ) {
                                $dbw->update(
-                                       "word_frequency",
+                                       'word_frequency',
                                        array( $fieldName => ( 
$exists[$word][$category] + $wordsFrequency[$word] ) ),
                                        array( 'wf_word' => $word ),
-                                       __METHOD__,
-                                       array()
+                                       __METHOD__
                                );
-                       }
-                       else
-                       {
+                       } else {
                                $dbw->insert(
-                                       "word_frequency",
+                                       'word_frequency',
                                        array(
                                                $fieldName => 
$wordsFrequency[$word],
                                                'wf_word' => $word
-                                               ),
-                                       __METHOD__,
-                                       array()
+                                       ),
+                                       __METHOD__
                                );
                        }
                }
        }
 
        /**
-       * returns the revision text from the revison table
-       * which has the revision id same as the undidRevision
-       * @undidRevision id of the revision to be returned.
-       */
-
-       public function getRevertedText( $undidRevision )
-       {
+        * Gets the revision text from the revision table
+        * which has the revision ID same as the undidRevision
+        *
+        * @param int $undidRevision ID of the revision to be returned
+        * @return string Revision text
+        */
+       public function getRevertedText( $undidRevision ) {
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->selectRow(
-                               array( 'rev' => "revision", 'txt' => "text" ),
-                               array( 'text' => "old_text" ),
-                               array( 'rev.rev_id' => $undidRevision, 
"txt.old_id = rev.rev_id" ),
-                               __METHOD__,
-                               array()
-                       );
+                       array( 'rev' => 'revision', 'txt' => 'text' ),
+                       array( 'text' => 'old_text' ),
+                       array( 'rev.rev_id' => $undidRevision, 'txt.old_id = 
rev.rev_id' ),
+                       __METHOD__
+               );
                return $res->text;
        }
 
        /**
-       * insert the text into spam_ham_text table
-       * @content the text to be inserted
-       * @spam, if true, then the spam field is set as 1, 0 otherwise
-       */
-
-       public function insertSpamText( $content, $spam=true )
-       {
-               $dbw = wfGetDB( DB_MASTER ) ;
+        * Insert the text into spam_ham_text table
+        *
+        * @param string $content The text to be inserted
+        * @param bool $spam If true, then the spam field is set as 1, 0 
otherwise
+        */
+       public function insertSpamText( $content, $spam = true ) {
+               $dbw = wfGetDB( DB_MASTER );
                $dbw->insert(
-                               "spam_ham_texts",
-                               array(
-                                       'sht_spam' => $spam,
-                                       'sht_text' => $content
-                                       ),
-                               __METHOD__,
-                               array()
-                       );
+                       'spam_ham_texts',
+                       array(
+                               'sht_spam' => $spam,
+                               'sht_text' => $content
+                       ),
+                       __METHOD__
+               );
        }
 
        /**
-       * returns the number of spam texts as result['spam']
-       * and the number or ham texts as result['ham']
-       */
-
-       public function getSpamHamCount()
-       {
+        * @return array The number of spam texts as result['spam']
+        *               and the number or ham texts as result['ham']
+        */
+       public function getSpamHamCount() {
                $result = array( 'spam' => 0, 'ham' => 0 );
-               $dbr = wfGetDB( DB_SLAVE ) ;
+               $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select(
-                               "spam_ham_texts",
-                               array( "sht_id", "sht_spam" ),
-                               array(),
-                               __METHOD__,
-                               array()
-                       );
-               foreach ($res as $row ) {
-                       if($row->sht_spam == 1)
+                       'spam_ham_texts',
+                       array( 'sht_id', 'sht_spam' ),
+                       array(),
+                       __METHOD__
+               );
+               foreach ( $res as $row ) {
+                       if ( $row->sht_spam == 1 ) {
                                $result['spam']++;
-                       else
+                       } else {
                                $result['ham']++;
+                       }
                }
                return $result;
        }
diff --git a/BayesianFilter.Hooks.php b/BayesianFilter.Hooks.php
index b10eb7c..3e0deb0 100644
--- a/BayesianFilter.Hooks.php
+++ b/BayesianFilter.Hooks.php
@@ -1,26 +1,20 @@
 <?php
-
-if ( !defined( 'MEDIAWIKI' ) ) { exit; }
-
 /**
-* Hooks for Bayesian Filter Extension
-*/
-
+ * Hooks for Bayesian Filter extension
+ */
 class BayesianFilterHooks {
 
        /**
-       * Hook function for EditFilterMerged
-       * This function runs the content through our filter
-       * @editpage is the instance of Editpage, the page is which was edited
-       * @content is the content of the page as str
-       * @hookErr: error message to return in case the edit is prohibited
-       * @summary is the summary entered by the user while editing the page
-       * returns true if the article is not spam
-       * returns false otherwise, aslo sets the error message accordingly
-       */
-
+        * Hook function for EditFilterMerged
+        * This function runs the content through our filter
+        *
+        * @param EditPage $editPage
+        * @param string $content The content of the page
+        * @param string $hookErr Error message to return in case the edit is 
prohibited
+        * @param string $summary The edit summary entered by the user while 
editing the page
+        * @return bool True if the article is not spam, otherwise false
+        */
        public static function filterMerged( EditPage $editPage, $content, 
&$hookErr, $summary ) {
-
                $context = $editPage->mArticle->getContext();
                $request = $context->getRequest();
                $filter = new BayesianFilter;
@@ -28,48 +22,39 @@
 
                $undidRevision = $request->getVal( 'wpUndidRevision' );
 
-               if( isset( $undidRevision ) && !empty( $undidRevision ) )
-               {
+               if ( isset( $undidRevision ) && !empty( $undidRevision ) ) {
                        $wpSpam = $request->getVal( 'wpSpam' );
-                       if( isset( $wpSpam ) )
-                       {
+                       if ( isset( $wpSpam ) ) {
                                $text = $filterDbHandler->getRevertedText( 
$undidRevision );
-                               $filter->train( $text, "spam" );
-                               $filterDbHandler->insertSpamText( $text ) ;
+                               $filter->train( $text, 'spam' );
+                               $filterDbHandler->insertSpamText( $text );
                        }
-               }
-               else
-               {
+               } else {
                        $result = $filter->checkSpam( $content, 
$editPage->getContextTitle() );
-                       if( $result )
-                       {
+                       if ( $result ) {
                                $editPage->spamPageWithContent( $result );
-                               $hookErr = "Sorry the content on this page is 
spam. It cannot be saved";
+                               $hookErr = wfMessage( 
'bayesianfilter-content-is-spam' )->escaped();
                                return false;
-                       }
-                       else
-                       {
+                       } else {
                                $filterDbHandler->insertSpamText( $content, 
false );
                        }
                }
+
                return true;
        }
 
-
        /**
-       * Hook function for EditPageBeforeEditChecks
-       *
-       * This hook is run whenever an article is opened for edit. It adds the 
"Mark as Spam" checkbox
-       * besides "Watch this Page" and "This is a minor edit"
-       * @editPage is passed by reference to this function.
-       * @checks is an array that is passed by reference to this function. It 
is an array of checkboxes
-       * @tabindex is the index of current tab.
-       * return true in each case
-       */
-
-
+        * Hook function for EditPageBeforeEditChecks
+        *
+        * This hook is run whenever an article is opened for edit. It adds the 
"Mark as Spam" checkbox
+        * besides "Watch this Page" and "This is a minor edit"
+        *
+        * @param EditPage $editPage
+        * @param array $checks Array of checkboxes
+        * @param int $tabindex The index of current tab
+        * @return bool True in each case
+        */
        public static function addFlagSpamCheckbox( &$editPage, &$checks, 
&$tabindex ) {
-
                $context = $editPage->mArticle->getContext();
                $view = new BayesianFilterPageView( $context );
                $view->addFlagSpamCheckbox( $checks, $tabindex );
@@ -78,37 +63,40 @@
        }
 
        /**
-       * This hooks is run whenever any sysop deletes a page
-       * We use it for training out Spam Database. We assume,
-       * that the article being deleted is done for spam purposes
-       * which is true in most cases
-       * @artcile is the Wikipage for which the deletion request is made.
-       * It is an instance of the WikiPage Class
-       * @user is the currently logged in user
-       * @reason is the reason(str) for which the article is being deleted.
-       * @error error message to be displayed if the article deletion was 
prohibited.
-       * return true in each case
-       */
-
+        * This hooks is run whenever any sysop deletes a page
+        * We use it for training out Spam Database.
+        * We assume that the article being deleted is done for spam purposes,
+        * which is true in most cases.
+        *
+        * @param WikiPage $article The page being deleted
+        * @param User $user The currently logged in user
+        * @param string $reason The reason for which the page is being deleted
+        * @param string $errorMmessage to be displayed if the page deletion 
was prohibited.
+        * @return bool True in each case
+        */
        public static function onArticleDelete( &$article, User &$user, 
&$reason, &$error ) {
                $content = $article->getContent();
                $text = $content->mText;
                $filter = new BayesianFilter;
-               $filter->train( $text, "spam" );
+               $filter->train( $text, 'spam' );
                $filterDbHandler = new BayesianFilterDBHandler;
-               $filterDbHandler->insertSpamText( $text ) ;
-               true;
+               $filterDbHandler->insertSpamText( $text );
+               return true;
        }
 
        /**
+        * Adds the new, required database tables when the user runs
+        * maintenance/update.php, MediaWiki's core updated script from the 
command
+        * line.
+        *
         * @param $updater DatabaseUpdater
         * @throws MWException
         * @return bool
         */
        public static function onLoadExtensionSchemaUpdates( $updater = null ) {
-               $dir = dirname( __FILE__ );
-               $updater-> addExtensionTable( "word_frequency", 
"$dir/db_patches/word_frequency.sql" );
-               $updater-> addExtensionTable( "spam_ham_texts", 
"$dir/db_patches/spam_ham_texts.sql" );
+               $dir = __DIR__;
+               $updater->addExtensionTable( 'word_frequency', 
"$dir/db_patches/word_frequency.sql" );
+               $updater->addExtensionTable( 'spam_ham_texts', 
"$dir/db_patches/spam_ham_texts.sql" );
                return true;
        }
 }
diff --git a/BayesianFilter.PageView.php b/BayesianFilter.PageView.php
index 86a8251..164fc4b 100644
--- a/BayesianFilter.PageView.php
+++ b/BayesianFilter.PageView.php
@@ -6,25 +6,22 @@
 
        protected $context;
 
-       public function __construct( $context )
-       {
+       public function __construct( $context ) {
                $this->context = $context;
        }
 
        /**
-       * It adds the "Mark as Spam" checkbox
-       * besides "Watch this Page" and "This is a minor edit"
-       * @checkboxes is an array that is passed by reference to this function. 
It is an array of checkboxes
-       * @tabindex is the index of current tab.
-       */
-
-       public function addFlagSpamCheckbox( array &$checkboxes, &$tabindex ){
-
+        * It adds the "Mark as Spam" checkbox
+        * besides "Watch this Page" and "This is a minor edit"
+        *
+        * @param array $checkboxes Array of checkboxes
+        * @param int $tabindex Index of current tab
+        */
+       public function addFlagSpamCheckbox( array &$checkboxes, &$tabindex ) {
                $request = $this->context->getRequest();
                $undo = $request->getVal( 'undo' );
 
-               if( isset( $undo ) )
-               {
+               if ( isset( $undo ) ) {
                        $checkbox = Xml::check(
                                'wpSpam',
                                false,
diff --git a/BayesianFilter.Tokenizer.php b/BayesianFilter.Tokenizer.php
index 5128e25..2aafbd9 100644
--- a/BayesianFilter.Tokenizer.php
+++ b/BayesianFilter.Tokenizer.php
@@ -1,44 +1,38 @@
 <?php
-
 /**
  * This class contains the definitions of all the functions used for 
tokenizing the input
  */
-
-class BayesianFilterTokenizer{
+class BayesianFilterTokenizer {
 
        /**
-       * A wiki text consists of square brackets and ~, == section headling, 
signatures.
-       * sanitizes removes all such transformations that wiki does.
-       */
-       public function normalize( $text ){
+        * A wiki text consists of square brackets and ~, == section headling, 
signatures.
+        * sanitizes removes all such transformations that wiki does.
+        */
+       public function normalize( $text ) {
+               $text = strip_tags( $text ); // strips the HTML tags like <br 
/> and <nowiki>
 
-               $text = strip_tags( $text );   //strips the html tags like <br 
/> and <nowiki>
+               // remove the special characters which hold significance in 
wiki formatting
+               $specialChars = array( "'", "\"", '=', '--', '*', '|' );
+               $text = str_replace( $specialChars, '', $text );
 
-               //remove the special characters which hold significance in wiki 
formatting
-               $specialChars = array( "'", "\"", "=", "--", "*", "|" );
-               $text = str_replace( $specialChars, "", $text );
-
-               //remove the [[]] types of text
+               // remove the [[]] types of text
                $pattern = "/\[\[.*?\]\]|{{.*?}}/";
-               $text = preg_replace( $pattern, "", $text );
+               $text = preg_replace( $pattern, '', $text );
 
-               //remove links
-               $pattern = 
"/\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|$!:,.;]*[A-Z0-9+&@#\/%=~_|$]/i" ;
-               $text = preg_replace( $pattern, "", $text );
+               // remove links
+               $pattern = 
"/\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|$!:,.;]*[A-Z0-9+&@#\/%=~_|$]/i";
+               $text = preg_replace( $pattern, '', $text );
 
-               //remove the other special charachters
-               $specialChars = array( "[", "]", "{", "}", ":", "/" , ";" , 
"?", "-", "$", "\\");
-               $text = str_replace( $specialChars, "", $text );
+               // remove the other special characters
+               $specialChars = array( '[', ']', '{', '}', ':', '/', ';', '?', 
'-', '$', "\\" );
+               $text = str_replace( $specialChars, '', $text );
 
-               /*
-               * this is done instead of using a autoloader
-               * cause then this script can be called from
-               * other places
-               */
-               if( class_exists('Sanitizer') )
-               {
-                       if( method_exists( 'Sanitizer' ,  
'decodeCharReferencesAndNormalize' ) )
-                       {
+               /**
+                * this is done instead of using a autoloader
+                * cause then this script can be called from other places
+                */
+               if ( class_exists( 'Sanitizer' ) ) {
+                       if ( method_exists( 'Sanitizer', 
'decodeCharReferencesAndNormalize' ) ) {
                                $text = 
Sanitizer::decodeCharReferencesAndNormalize( $text );
                        }
                }
@@ -46,51 +40,43 @@
                return $text;
        }
 
-
        /**
-       * returns an iterator to the next token in the text;
-       */
-       public function tokenize($text = null){
-
+        * @param string|null $text
+        * @return string An iterator to the next token in the text;
+        */
+       public function tokenize( $text = null ) {
                static $tok = true;
+
                $delimiters = " \n\t\r,.";
 
-               if( $tok == false )
-               {
+               if ( $tok == false ) {
                        return null;
-               }
-               elseif( $text )
-               {
+               } elseif ( $text ) {
                        $tok = strtok( $text, $delimiters );
-               }
-               else
-               {
+               } else {
                        $tok = strtok( $delimiters );
                }
+
                return $tok;
        }
 
        /**
-       * returns if a word is present in stopWords or not
-       */
-       public function isStopWord( $word ){
-
+        * @param string $word Word to check
+        * @return bool Whether a word is present in stopWords (true) or not 
(false)
+        */
+       public function isStopWord( $word ) {
                static $stopWordDict = false;
-               if( !$stopWordDict )
-               {
+               if ( !$stopWordDict ) {
                        $stopWords = array();
-                       $handle = fopen( __DIR__ . "/StopWords.txt", "r");
-                       if( $handle )
-                       {
-                               while( ( $buffer = fgets( $handle ) ) != false )
-                               {
+                       $handle = fopen( __DIR__ . '/StopWords.txt', 'r' );
+                       if ( $handle ) {
+                               while ( ( $buffer = fgets( $handle ) ) != false 
) {
                                        $stopWord = trim( $buffer );
-                                       $stopWords[] = str_replace("'", "", 
$stopWord);
+                                       $stopWords[] = str_replace( "'", '', 
$stopWord );
                                }
                        }
-                       fclose($handle);
-                       foreach( $stopWords  as $stopWord )
-                       {
+                       fclose( $handle );
+                       foreach ( $stopWords as $stopWord ) {
                                $stopWordDict[$stopWord] = 1;
                        }
                }
@@ -99,16 +85,18 @@
        }
 
        /**
-       * stems a word to its root
-       */
-       public function stem( $word ){
-               /*
-               * this is done instead of using a autoloader
-               * cause then this script can be called from
-               * other places
-               */
-               if( !class_exists('PorterStemmer') )
-               {
+        * Stems a word to its root
+        *
+        * @param string $word Word to stem
+        * @return string Root word
+        */
+       public function stem( $word ) {
+               /**
+                * this is done instead of using a autoloader
+                * cause then this script can be called from
+                * other places
+                */
+               if ( !class_exists( 'PorterStemmer' ) ) {
                        require_once(  __DIR__ . '/Stemmer.php' );
                }
 
diff --git a/BayesianFilter.php b/BayesianFilter.php
index f41c182..5e59000 100644
--- a/BayesianFilter.php
+++ b/BayesianFilter.php
@@ -1,7 +1,14 @@
 <?php
-
-# Loader for bayesian filter feature
-# Include this from LocalSettings.php
+/**
+ * Loader for Bayesian filter feature
+ * Include this from LocalSettings.php
+ *
+ * @file
+ * @ingroup Extensions
+ * @version 0.1
+ * @author Anbhav Agarwal
+ * @link https://www.mediawiki.org/wiki/Extension:BayesianFilter Documentation
+ */
 
 if ( !defined( 'MEDIAWIKI' ) ) {
        exit;
@@ -10,14 +17,14 @@
 $wgExtensionCredits['antispam'][] = array(
        'path'           => __FILE__,
        'name'           => 'BayesianFilter',
-       'author'         => array( 'Anbhav Agarwal'),
+       'author'         => array( 'Anbhav Agarwal' ),
+       'version'        => '0.1',
        'url'            => 
'https://www.mediawiki.org/wiki/Extension:BayesianFilter',
        'descriptionmsg' => 'bayesianfilter-desc',
 );
 
-$dir = __DIR__ . '/';
 $wgMessagesDirs['BayesianFilter'] = __DIR__ . '/i18n';
-$wgExtensionMessagesFiles['BayesianFilter'] = $dir . 'BayesianFilter.i18n.php';
+$wgExtensionMessagesFiles['BayesianFilter'] = __DIR__ . 
'/BayesianFilter.i18n.php';
 
 /**
  * Array of settings for filter classes
@@ -32,9 +39,9 @@
 $wgHooks['LoadExtensionSchemaUpdates'][] = 
'BayesianFilterHooks::onLoadExtensionSchemaUpdates';
 
 
-$wgAutoloadClasses['BayesianFilterHooks'] = $dir . 'BayesianFilter.Hooks.php';
-$wgAutoloadClasses['BayesianFilterPageView'] = $dir . 
'BayesianFilter.PageView.php';
-$wgAutoloadClasses['BayesianFilterDBHandler'] = $dir . 
'BayesianFilter.DBHandler.php';
-$wgAutoloadClasses['BayesianFilter'] = $dir . 'BayesianFilter.Body.php';
-$wgAutoloadClasses['BayesianFilterTokenizer'] = $dir . 
'BayesianFilter.Tokenizer.php';
-$wgAutoloadClasses['PorterStemmer'] = $dir . 'Stemmer.php';
+$wgAutoloadClasses['BayesianFilterHooks'] = __DIR__ . 
'/BayesianFilter.Hooks.php';
+$wgAutoloadClasses['BayesianFilterPageView'] = __DIR__ . 
'/BayesianFilter.PageView.php';
+$wgAutoloadClasses['BayesianFilterDBHandler'] = __DIR__ . 
'/BayesianFilter.DBHandler.php';
+$wgAutoloadClasses['BayesianFilter'] = __DIR__ . '/BayesianFilter.Body.php';
+$wgAutoloadClasses['BayesianFilterTokenizer'] = __DIR__ . 
'/BayesianFilter.Tokenizer.php';
+$wgAutoloadClasses['PorterStemmer'] = __DIR__ . '/Stemmer.php';
diff --git a/i18n/en.json b/i18n/en.json
index 1792d1b..e18b94d 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -4,7 +4,8 @@
             "Anbhav Agarwal"
         ]
     },
+    "bayesianfilter-content-is-spam": "Sorry, the content on this page 
triggered the spam filter. It cannot be saved.",
     "bayesianfilter-desc": "Filters wikitext into spam and hams using Bayesian 
techniques",
     "bayesianfilter-flag-spam-check-title": "Mark this checkbox if you think 
that the edit that you are undoing is a spam edit",
     "bayesianfilter-flag-spam-check": "Mark this as spam"
-}
\ No newline at end of file
+}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index cc45a1e..e4fdf98 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -1,10 +1,12 @@
 {
        "@metadata": {
                "authors": [
+                       "Lewis Cawte",
                        "Raimond Spekking",
                        "Shirayuki"
                ]
        },
+       "bayesianfilter-content-is-spam": "Message displayed to the user to 
tell them the edit could not be saved as the filter identified it as spam",
        "bayesianfilter-desc": "{{desc|name=Bayesian 
Filter|url=https://www.mediawiki.org/wiki/Extension:BayesianFilter}}\n\nAbout 
\"bayesian\", see [[w:Bayesian spam filtering]].\n\n\"ham(s)\" means \"not 
spam\".",
        "bayesianfilter-flag-spam-check-title": "Title of a checkbox.\n\nSee 
also:\n* {{msg-mw|Bayesianfilter-flag-spam-check}}",
        "bayesianfilter-flag-spam-check": "Label of a checkbox.\n\nSee also:\n* 
{{msg-mw|Bayesianfilter-flag-spam-check-title}}"

-- 
To view, visit https://gerrit.wikimedia.org/r/161785
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2d7255fb3c44853c45c2ed5b3cee8a2e7b16f897
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/BayesianFilter
Gerrit-Branch: master
Gerrit-Owner: Jack Phoenix <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Lewis Cawte <[email protected]>
Gerrit-Reviewer: Siebrand <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to