http://www.mediawiki.org/wiki/Special:Code/MediaWiki/88113

Revision: 88113
Author:   ialex
Date:     2011-05-14 17:11:32 +0000 (Sat, 14 May 2011)
Log Message:
-----------
Rewrote the article counting code and related:
* (bug 26033, bug 24754) Added $wgArticleCountMethod to have a more flexible 
way to define which method to use to define if a page is an article or not and 
deprecated $wgUseCommaCount. There is now a new 'any' method to count any 
article that is in a content namespace and not a redirect.
* (bug 11868) If using links to count articles, Article::isCountable() will now 
use the ParserOutput to check if there's a link instead of checking for the 
"[[" string. Changed Article::isCountable() to take a stdObject or false for 
the first parameters. If false is passed, the result will be based on the 
current article's state (i.e. database). The only call outside of the Article 
class is in DeleteAction (including extensions).
* Removed this horror of Article::$mGoodAdjustment and 
Article::$mTotalAdjustment, replaced by the new $created parameter on 
Article::editUpdates(); simplified Article::createUpdates()
* Updated Import.php to take advantage of the new parameter and make a single 
call to Article::editUpdates()

Modified Paths:
--------------
    trunk/phase3/RELEASE-NOTES-1.19
    trunk/phase3/includes/Article.php
    trunk/phase3/includes/DefaultSettings.php
    trunk/phase3/includes/Import.php
    trunk/phase3/includes/Setup.php
    trunk/phase3/includes/SiteStats.php
    trunk/phase3/includes/actions/DeleteAction.php

Modified: trunk/phase3/RELEASE-NOTES-1.19
===================================================================
--- trunk/phase3/RELEASE-NOTES-1.19     2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/RELEASE-NOTES-1.19     2011-05-14 17:11:32 UTC (rev 88113)
@@ -16,11 +16,15 @@
 * $wgAllowUserSkin (deprecated in 1.16) has now been removed
 * $wgExtraRandompageSQL (deprecated in 1.16) has now been removed
 * LogReader and LogViewer classes (deprecated in 1.14) have now been removed
+* (bug 26033) Added $wgArticleCountMethod to select the method to use to say
+  whether a page is an article or not. $wgUseCommaCount is now deprecated.
 
 === New features in 1.19 ===
 * (bug 28916) A way to to toggle mw.config legacy globals settings from
   LocalSettings.php has been created by introducing $wgLegacyJavaScriptGlobals.
 * (bug 28503) Support for ircs:// URL protocols
+* (bug 26033) It is now possible to count all non-redirect pages in content
+  namespaces as articles
 
 === Bug fixes in 1.19 ===
 * (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge.
@@ -38,6 +42,8 @@
 * (bug 27864) Transcluding {{Special:Prefix}} with empty prefix now lists all
   pages.
 * (bug 18803) JPEG2000 images can no longer be uploaded as JPEG image.
+* (bug 11868) If using links to count articles, the checking will now be based
+  on the real presence of an internal link instead of the "[[" string
 
 === API changes in 1.19 ===
 * (bug 27790) add query type for querymodules to action=paraminfo

Modified: trunk/phase3/includes/Article.php
===================================================================
--- trunk/phase3/includes/Article.php   2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Article.php   2011-05-14 17:11:32 UTC (rev 88113)
@@ -27,7 +27,6 @@
        var $mContentLoaded = false;      // !<
        var $mCounter = -1;               // !< Not loaded
        var $mDataLoaded = false;         // !<
-       var $mGoodAdjustment = 0;         // !<
        var $mIsRedirect = false;         // !<
        var $mLatest = false;             // !<
        var $mOldId;                      // !<
@@ -61,7 +60,6 @@
 
        var $mTimestamp = '';             // !<
        var $mTitle;                      // !< Title object
-       var $mTotalAdjustment = 0;        // !<
        var $mTouched = '19700101000000'; // !<
 
        /**
@@ -260,7 +258,6 @@
                $this->mRedirectTarget = null; # Title object if set
                $this->mLastRevision = null; # Latest revision
                $this->mTimestamp = '';
-               $this->mGoodAdjustment = $this->mTotalAdjustment = 0;
                $this->mTouched = '19700101000000';
                $this->mIsRedirect = false;
                $this->mRevIdFetched = 0;
@@ -644,15 +641,43 @@
         * Determine whether a page would be suitable for being counted as an
         * article in the site_stats table based on the title & its content
         *
-        * @param $text String: text to analyze
-        * @return bool
+        * @param $editInfo Object or false: object returned by 
prepareTextForEdit(),
+        *        if false, the current database state will be used
+        * @return Boolean
         */
-       public function isCountable( $text ) {
-               global $wgUseCommaCount;
+       public function isCountable( $editInfo = false ) {
+               global $wgArticleCountMethod;
 
-               $token = $wgUseCommaCount ? ',' : '[[';
+               if ( !$this->mTitle->isContentPage() ) {
+                       return false;
+               }
 
-               return $this->mTitle->isContentPage() && !$this->isRedirect( 
$text ) && in_string( $token, $text );
+               $text = $editInfo ? $editInfo->pst : false;
+
+               if ( $this->isRedirect( $text ) ) {
+                       return false;
+               }
+
+               switch ( $wgArticleCountMethod ) {
+               case 'any':
+                       return true;
+               case 'comma':
+                       if ( $text === false ) {
+                               $text = $this->getRawText();
+                       }
+                       return in_string( ',', $text );
+               case 'link':
+                       if ( $editInfo ) {
+                               // ParserOutput::getLinks() is a 2D array of 
page links, so
+                               // to be really correct we would need to 
recurse in the array
+                               // but the main array should only have items in 
it if there are
+                               // links.
+                               return (bool)count( 
$editInfo->output->getLinks() );
+                       } else {
+                               return (bool)wfGetDB( DB_SLAVE )->selectField( 
'pagelinks', 1,
+                                       array( 'pl_from' => $this->getId() ), 
__METHOD__ );
+                       }
+               }
        }
 
        /**
@@ -2067,10 +2092,6 @@
                        $changed = ( strcmp( $text, $oldtext ) != 0 );
 
                        if ( $changed ) {
-                               $this->mGoodAdjustment = 
(int)$this->isCountable( $text )
-                                 - (int)$this->isCountable( $oldtext );
-                               $this->mTotalAdjustment = 0;
-
                                if ( !$this->mLatest ) {
                                        # Article gone missing
                                        wfDebug( __METHOD__ . ": EDIT_UPDATE 
specified but article doesn't exist\n" );
@@ -2165,12 +2186,6 @@
                        # Create new article
                        $status->value['new'] = true;
 
-                       # Set statistics members
-                       # We work out if it's countable after PST to avoid 
counter drift
-                       # when articles are created with {{subst:}}
-                       $this->mGoodAdjustment = (int)$this->isCountable( $text 
);
-                       $this->mTotalAdjustment = 1;
-
                        $dbw->begin();
 
                        # Add the page record; stake our claim on this title!
@@ -2226,7 +2241,7 @@
                        $dbw->commit();
 
                        # Update links, etc.
-                       $this->editUpdates( $text, $summary, $isminor, $now, 
$revisionId, true, $user );
+                       $this->editUpdates( $text, $summary, $isminor, $now, 
$revisionId, true, $user, true );
 
                        # Clear caches
                        Article::onArticleCreate( $this->mTitle );
@@ -3064,7 +3079,7 @@
                        return false;
                }
 
-               $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable( 
$this->getRawText() ), -1 );
+               $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1 
);
                array_push( $wgDeferredUpdateList, $u );
 
                // Bitfields to further suppress the content
@@ -3511,8 +3526,11 @@
         * @param $newid Integer: rev_id value of the new revision
         * @param $changed Boolean: Whether or not the content actually changed
         * @param $user User object: User doing the edit
+        * @param $created Boolean: Whether the edit created the page
         */
-       public function editUpdates( $text, $summary, $minoredit, 
$timestamp_of_pagechange, $newid, $changed = true, User $user = null ) {
+       public function editUpdates( $text, $summary, $minoredit, 
$timestamp_of_pagechange, $newid,
+               $changed = true, User $user = null, $created = false )
+       {
                global $wgDeferredUpdateList, $wgUser, $wgEnableParserCache;
 
                wfProfileIn( __METHOD__ );
@@ -3564,11 +3582,20 @@
                        return;
                }
 
-               $u = new SiteStatsUpdate( 0, 1, $this->mGoodAdjustment, 
$this->mTotalAdjustment );
-               array_push( $wgDeferredUpdateList, $u );
-               $u = new SearchUpdate( $id, $title, $text );
-               array_push( $wgDeferredUpdateList, $u );
+               if ( !$changed ) {
+                       $good = 0;
+                       $total = 0;
+               } elseif ( $created ) {
+                       $good = (int)$this->isCountable( $editInfo );
+                       $total = 1;
+               } else {
+                       $good = (int)$this->isCountable( $editInfo ) - 
(int)$this->isCountable();
+                       $total = 0;
+               }
 
+               $wgDeferredUpdateList[] = new SiteStatsUpdate( 0, 1, $good, 
$total );
+               $wgDeferredUpdateList[] = new SearchUpdate( $id, $title, $text 
);
+
                # If this is another user's talk page, update newtalk
                # Don't do this if $changed = false otherwise some idiot can 
null-edit a
                # load of user talk pages and piss people off, nor if it's a 
minor edit
@@ -3608,10 +3635,8 @@
         * anymore.
         */
        public function createUpdates( $rev ) {
-               $this->mGoodAdjustment = $this->isCountable( $rev->getText() );
-               $this->mTotalAdjustment = 1;
                $this->editUpdates( $rev->getText(), $rev->getComment(),
-                       $rev->isMinor(), wfTimestamp(), $rev->getId(), true );
+                       $rev->isMinor(), wfTimestamp(), $rev->getId(), true, 
null, true );
        }
 
        /**

Modified: trunk/phase3/includes/DefaultSettings.php
===================================================================
--- trunk/phase3/includes/DefaultSettings.php   2011-05-14 17:08:42 UTC (rev 
88112)
+++ trunk/phase3/includes/DefaultSettings.php   2011-05-14 17:11:32 UTC (rev 
88113)
@@ -2974,15 +2974,31 @@
  */
 
 /**
- * Under which condition should a page in the main namespace be counted
- * as a valid article? If $wgUseCommaCount is set to true, it will be
- * counted if it contains at least one comma. If it is set to false
- * (default), it will only be counted if it contains at least one [[wiki
- * link]]. See http://www.mediawiki.org/wiki/Manual:Article_count
+ * Method used to determine if a page in a content namespace should be counted
+ * as a valid article.
  *
- * Retroactively changing this variable will not affect
- * the existing count (cf. maintenance/recount.sql).
+ * Redirect pages will never be counted as valid articles.
+ *
+ * This variable can have the following values:
+ * - 'any': all pages as considered as valid articles
+ * - 'comma': the page must contain a comma to be considered valid
+ * - 'link': the page must contain a [[wiki link]] to be considered valid
+ * - null: the value will be set at run time depending on $wgUseCommaCount:
+ *         if $wgUseCommaCount is false, it will be 'link', if it is true
+ *         it will be 'comma'
+ *
+ * See also See http://www.mediawiki.org/wiki/Manual:Article_count
+ *
+ * Retroactively changing this variable will not affect the existing count,
+ * to update it, you will need to run the maintenance/updateArticleCount.php
+ * script.
  */
+$wgArticleCountMethod = null;
+
+/**
+ * Backward compatibility setting, will set $wgArticleCountMethod if it is 
null.
+ * @deprecated in 1.19; use $wgArticleCountMethod instead
+ */
 $wgUseCommaCount = false;
 
 /**

Modified: trunk/phase3/includes/Import.php
===================================================================
--- trunk/phase3/includes/Import.php    2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Import.php    2011-05-14 17:11:32 UTC (rev 88113)
@@ -1027,25 +1027,25 @@
                $tempTitle = $GLOBALS['wgTitle'];
                $GLOBALS['wgTitle'] = $this->title;
 
-               if( $created ) {
+               if ( $created ) {
                        wfDebug( __METHOD__ . ": running onArticleCreate\n" );
                        Article::onArticleCreate( $this->title );
-
-                       wfDebug( __METHOD__ . ": running create updates\n" );
-                       $article->createUpdates( $revision );
-
                } elseif( $changed ) {
                        wfDebug( __METHOD__ . ": running onArticleEdit\n" );
                        Article::onArticleEdit( $this->title );
+               }
 
-                       wfDebug( __METHOD__ . ": running edit updates\n" );
-                       $article->editUpdates(
-                               $this->getText(),
-                               $this->getComment(),
-                               $this->minor,
-                               $this->timestamp,
-                               $revId );
-               }
+               wfDebug( __METHOD__ . ": running updates\n" );
+               $article->editUpdates(
+                       $this->getText(),
+                       $this->getComment(),
+                       $this->minor,
+                       $this->timestamp,
+                       $revId,
+                       true,
+                       null,
+                       $created );
+
                $GLOBALS['wgTitle'] = $tempTitle;
 
                return true;

Modified: trunk/phase3/includes/Setup.php
===================================================================
--- trunk/phase3/includes/Setup.php     2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Setup.php     2011-05-14 17:11:32 UTC (rev 88113)
@@ -293,6 +293,10 @@
 # Blacklisted file extensions shouldn't appear on the "allowed" list
 $wgFileExtensions = array_diff ( $wgFileExtensions, $wgFileBlacklist );
 
+if ( $wgArticleCountMethod === null ) {
+       $wgArticleCountMethod = $wgUseCommaCount ? 'comma' : 'link';
+}
+
 if ( $wgInvalidateCacheOnLocalSettingsChange ) {
        $wgCacheEpoch = max( $wgCacheEpoch, gmdate( 'YmdHis', @filemtime( 
"$IP/LocalSettings.php" ) ) );
 }

Modified: trunk/phase3/includes/SiteStats.php
===================================================================
--- trunk/phase3/includes/SiteStats.php 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/SiteStats.php 2011-05-14 17:11:32 UTC (rev 88113)
@@ -285,18 +285,24 @@
         * @return Integer
         */
        public function articles() {
-               global $wgUseCommaCount;
+               global $wgArticleCountMethod;
 
                $tables = array( 'page' );
                $conds = array(
                        'page_namespace' => MWNamespace::getContentNamespaces(),
                        'page_is_redirect' => 0,
-                       'page_len > 0'
                );
 
-               if ( !$wgUseCommaCount ) {
+               if ( $wgArticleCountMethod == 'link' ) {
                        $tables[] = 'pagelinks';
                        $conds[] = 'pl_from=page_id';
+               } elseif ( $wgArticleCountMethod == 'comma' ) {
+                       // To make a correct check for this, we would need, for 
each page,
+                       // to load the text, maybe uncompress it, maybe decode 
it and then
+                       // check if there's one comma.
+                       // But one thing we are sure is that if the page is 
empty, it can't
+                       // contain a comma :)
+                       $conds[] = 'page_len > 0';
                }
 
                $this->mArticles = $this->db->selectField( $tables, 
'COUNT(DISTINCT page_id)',

Modified: trunk/phase3/includes/actions/DeleteAction.php
===================================================================
--- trunk/phase3/includes/actions/DeleteAction.php      2011-05-14 17:08:42 UTC 
(rev 88112)
+++ trunk/phase3/includes/actions/DeleteAction.php      2011-05-14 17:11:32 UTC 
(rev 88113)
@@ -245,7 +245,7 @@
                        return false;
                }
 
-               $updates = new SiteStatsUpdate( 0, 1, - 
(int)$page->isCountable( $page->getRawText() ), -1 );
+               $updates = new SiteStatsUpdate( 0, 1, - 
(int)$page->isCountable(), -1 );
                array_push( $wgDeferredUpdateList, $updates );
 
                // Bitfields to further suppress the content


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to