http://www.mediawiki.org/wiki/Special:Code/MediaWiki/88113
Revision: 88113
Author: ialex
Date: 2011-05-14 17:11:32 +0000 (Sat, 14 May 2011)
Log Message:
-----------
Rewrote the article counting code and related:
* (bug 26033, bug 24754) Added $wgArticleCountMethod to have a more flexible
way to define which method to use to define if a page is an article or not and
deprecated $wgUseCommaCount. There is now a new 'any' method to count any
article that is in a content namespace and not a redirect.
* (bug 11868) If using links to count articles, Article::isCountable() will now
use the ParserOutput to check if there's a link instead of checking for the
"[[" string. Changed Article::isCountable() to take a stdObject or false for
the first parameters. If false is passed, the result will be based on the
current article's state (i.e. database). The only call outside of the Article
class is in DeleteAction (including extensions).
* Removed this horror of Article::$mGoodAdjustment and
Article::$mTotalAdjustment, replaced by the new $created parameter on
Article::editUpdates(); simplified Article::createUpdates()
* Updated Import.php to take advantage of the new parameter and make a single
call to Article::editUpdates()
Modified Paths:
--------------
trunk/phase3/RELEASE-NOTES-1.19
trunk/phase3/includes/Article.php
trunk/phase3/includes/DefaultSettings.php
trunk/phase3/includes/Import.php
trunk/phase3/includes/Setup.php
trunk/phase3/includes/SiteStats.php
trunk/phase3/includes/actions/DeleteAction.php
Modified: trunk/phase3/RELEASE-NOTES-1.19
===================================================================
--- trunk/phase3/RELEASE-NOTES-1.19 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/RELEASE-NOTES-1.19 2011-05-14 17:11:32 UTC (rev 88113)
@@ -16,11 +16,15 @@
* $wgAllowUserSkin (deprecated in 1.16) has now been removed
* $wgExtraRandompageSQL (deprecated in 1.16) has now been removed
* LogReader and LogViewer classes (deprecated in 1.14) have now been removed
+* (bug 26033) Added $wgArticleCountMethod to select the method to use to say
+ whether a page is an article or not. $wgUseCommaCount is now deprecated.
=== New features in 1.19 ===
* (bug 28916) A way to to toggle mw.config legacy globals settings from
LocalSettings.php has been created by introducing $wgLegacyJavaScriptGlobals.
* (bug 28503) Support for ircs:// URL protocols
+* (bug 26033) It is now possible to count all non-redirect pages in content
+ namespaces as articles
=== Bug fixes in 1.19 ===
* (bug 10154) Don't allow user to specify days beyond $wgRCMaxAge.
@@ -38,6 +42,8 @@
* (bug 27864) Transcluding {{Special:Prefix}} with empty prefix now lists all
pages.
* (bug 18803) JPEG2000 images can no longer be uploaded as JPEG image.
+* (bug 11868) If using links to count articles, the checking will now be based
+ on the real presence of an internal link instead of the "[[" string
=== API changes in 1.19 ===
* (bug 27790) add query type for querymodules to action=paraminfo
Modified: trunk/phase3/includes/Article.php
===================================================================
--- trunk/phase3/includes/Article.php 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Article.php 2011-05-14 17:11:32 UTC (rev 88113)
@@ -27,7 +27,6 @@
var $mContentLoaded = false; // !<
var $mCounter = -1; // !< Not loaded
var $mDataLoaded = false; // !<
- var $mGoodAdjustment = 0; // !<
var $mIsRedirect = false; // !<
var $mLatest = false; // !<
var $mOldId; // !<
@@ -61,7 +60,6 @@
var $mTimestamp = ''; // !<
var $mTitle; // !< Title object
- var $mTotalAdjustment = 0; // !<
var $mTouched = '19700101000000'; // !<
/**
@@ -260,7 +258,6 @@
$this->mRedirectTarget = null; # Title object if set
$this->mLastRevision = null; # Latest revision
$this->mTimestamp = '';
- $this->mGoodAdjustment = $this->mTotalAdjustment = 0;
$this->mTouched = '19700101000000';
$this->mIsRedirect = false;
$this->mRevIdFetched = 0;
@@ -644,15 +641,43 @@
* Determine whether a page would be suitable for being counted as an
* article in the site_stats table based on the title & its content
*
- * @param $text String: text to analyze
- * @return bool
+ * @param $editInfo Object or false: object returned by
prepareTextForEdit(),
+ * if false, the current database state will be used
+ * @return Boolean
*/
- public function isCountable( $text ) {
- global $wgUseCommaCount;
+ public function isCountable( $editInfo = false ) {
+ global $wgArticleCountMethod;
- $token = $wgUseCommaCount ? ',' : '[[';
+ if ( !$this->mTitle->isContentPage() ) {
+ return false;
+ }
- return $this->mTitle->isContentPage() && !$this->isRedirect(
$text ) && in_string( $token, $text );
+ $text = $editInfo ? $editInfo->pst : false;
+
+ if ( $this->isRedirect( $text ) ) {
+ return false;
+ }
+
+ switch ( $wgArticleCountMethod ) {
+ case 'any':
+ return true;
+ case 'comma':
+ if ( $text === false ) {
+ $text = $this->getRawText();
+ }
+ return in_string( ',', $text );
+ case 'link':
+ if ( $editInfo ) {
+ // ParserOutput::getLinks() is a 2D array of
page links, so
+ // to be really correct we would need to
recurse in the array
+ // but the main array should only have items in
it if there are
+ // links.
+ return (bool)count(
$editInfo->output->getLinks() );
+ } else {
+ return (bool)wfGetDB( DB_SLAVE )->selectField(
'pagelinks', 1,
+ array( 'pl_from' => $this->getId() ),
__METHOD__ );
+ }
+ }
}
/**
@@ -2067,10 +2092,6 @@
$changed = ( strcmp( $text, $oldtext ) != 0 );
if ( $changed ) {
- $this->mGoodAdjustment =
(int)$this->isCountable( $text )
- - (int)$this->isCountable( $oldtext );
- $this->mTotalAdjustment = 0;
-
if ( !$this->mLatest ) {
# Article gone missing
wfDebug( __METHOD__ . ": EDIT_UPDATE
specified but article doesn't exist\n" );
@@ -2165,12 +2186,6 @@
# Create new article
$status->value['new'] = true;
- # Set statistics members
- # We work out if it's countable after PST to avoid
counter drift
- # when articles are created with {{subst:}}
- $this->mGoodAdjustment = (int)$this->isCountable( $text
);
- $this->mTotalAdjustment = 1;
-
$dbw->begin();
# Add the page record; stake our claim on this title!
@@ -2226,7 +2241,7 @@
$dbw->commit();
# Update links, etc.
- $this->editUpdates( $text, $summary, $isminor, $now,
$revisionId, true, $user );
+ $this->editUpdates( $text, $summary, $isminor, $now,
$revisionId, true, $user, true );
# Clear caches
Article::onArticleCreate( $this->mTitle );
@@ -3064,7 +3079,7 @@
return false;
}
- $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(
$this->getRawText() ), -1 );
+ $u = new SiteStatsUpdate( 0, 1, - (int)$this->isCountable(), -1
);
array_push( $wgDeferredUpdateList, $u );
// Bitfields to further suppress the content
@@ -3511,8 +3526,11 @@
* @param $newid Integer: rev_id value of the new revision
* @param $changed Boolean: Whether or not the content actually changed
* @param $user User object: User doing the edit
+ * @param $created Boolean: Whether the edit created the page
*/
- public function editUpdates( $text, $summary, $minoredit,
$timestamp_of_pagechange, $newid, $changed = true, User $user = null ) {
+ public function editUpdates( $text, $summary, $minoredit,
$timestamp_of_pagechange, $newid,
+ $changed = true, User $user = null, $created = false )
+ {
global $wgDeferredUpdateList, $wgUser, $wgEnableParserCache;
wfProfileIn( __METHOD__ );
@@ -3564,11 +3582,20 @@
return;
}
- $u = new SiteStatsUpdate( 0, 1, $this->mGoodAdjustment,
$this->mTotalAdjustment );
- array_push( $wgDeferredUpdateList, $u );
- $u = new SearchUpdate( $id, $title, $text );
- array_push( $wgDeferredUpdateList, $u );
+ if ( !$changed ) {
+ $good = 0;
+ $total = 0;
+ } elseif ( $created ) {
+ $good = (int)$this->isCountable( $editInfo );
+ $total = 1;
+ } else {
+ $good = (int)$this->isCountable( $editInfo ) -
(int)$this->isCountable();
+ $total = 0;
+ }
+ $wgDeferredUpdateList[] = new SiteStatsUpdate( 0, 1, $good,
$total );
+ $wgDeferredUpdateList[] = new SearchUpdate( $id, $title, $text
);
+
# If this is another user's talk page, update newtalk
# Don't do this if $changed = false otherwise some idiot can
null-edit a
# load of user talk pages and piss people off, nor if it's a
minor edit
@@ -3608,10 +3635,8 @@
* anymore.
*/
public function createUpdates( $rev ) {
- $this->mGoodAdjustment = $this->isCountable( $rev->getText() );
- $this->mTotalAdjustment = 1;
$this->editUpdates( $rev->getText(), $rev->getComment(),
- $rev->isMinor(), wfTimestamp(), $rev->getId(), true );
+ $rev->isMinor(), wfTimestamp(), $rev->getId(), true,
null, true );
}
/**
Modified: trunk/phase3/includes/DefaultSettings.php
===================================================================
--- trunk/phase3/includes/DefaultSettings.php 2011-05-14 17:08:42 UTC (rev
88112)
+++ trunk/phase3/includes/DefaultSettings.php 2011-05-14 17:11:32 UTC (rev
88113)
@@ -2974,15 +2974,31 @@
*/
/**
- * Under which condition should a page in the main namespace be counted
- * as a valid article? If $wgUseCommaCount is set to true, it will be
- * counted if it contains at least one comma. If it is set to false
- * (default), it will only be counted if it contains at least one [[wiki
- * link]]. See http://www.mediawiki.org/wiki/Manual:Article_count
+ * Method used to determine if a page in a content namespace should be counted
+ * as a valid article.
*
- * Retroactively changing this variable will not affect
- * the existing count (cf. maintenance/recount.sql).
+ * Redirect pages will never be counted as valid articles.
+ *
+ * This variable can have the following values:
+ * - 'any': all pages as considered as valid articles
+ * - 'comma': the page must contain a comma to be considered valid
+ * - 'link': the page must contain a [[wiki link]] to be considered valid
+ * - null: the value will be set at run time depending on $wgUseCommaCount:
+ * if $wgUseCommaCount is false, it will be 'link', if it is true
+ * it will be 'comma'
+ *
+ * See also See http://www.mediawiki.org/wiki/Manual:Article_count
+ *
+ * Retroactively changing this variable will not affect the existing count,
+ * to update it, you will need to run the maintenance/updateArticleCount.php
+ * script.
*/
+$wgArticleCountMethod = null;
+
+/**
+ * Backward compatibility setting, will set $wgArticleCountMethod if it is
null.
+ * @deprecated in 1.19; use $wgArticleCountMethod instead
+ */
$wgUseCommaCount = false;
/**
Modified: trunk/phase3/includes/Import.php
===================================================================
--- trunk/phase3/includes/Import.php 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Import.php 2011-05-14 17:11:32 UTC (rev 88113)
@@ -1027,25 +1027,25 @@
$tempTitle = $GLOBALS['wgTitle'];
$GLOBALS['wgTitle'] = $this->title;
- if( $created ) {
+ if ( $created ) {
wfDebug( __METHOD__ . ": running onArticleCreate\n" );
Article::onArticleCreate( $this->title );
-
- wfDebug( __METHOD__ . ": running create updates\n" );
- $article->createUpdates( $revision );
-
} elseif( $changed ) {
wfDebug( __METHOD__ . ": running onArticleEdit\n" );
Article::onArticleEdit( $this->title );
+ }
- wfDebug( __METHOD__ . ": running edit updates\n" );
- $article->editUpdates(
- $this->getText(),
- $this->getComment(),
- $this->minor,
- $this->timestamp,
- $revId );
- }
+ wfDebug( __METHOD__ . ": running updates\n" );
+ $article->editUpdates(
+ $this->getText(),
+ $this->getComment(),
+ $this->minor,
+ $this->timestamp,
+ $revId,
+ true,
+ null,
+ $created );
+
$GLOBALS['wgTitle'] = $tempTitle;
return true;
Modified: trunk/phase3/includes/Setup.php
===================================================================
--- trunk/phase3/includes/Setup.php 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/Setup.php 2011-05-14 17:11:32 UTC (rev 88113)
@@ -293,6 +293,10 @@
# Blacklisted file extensions shouldn't appear on the "allowed" list
$wgFileExtensions = array_diff ( $wgFileExtensions, $wgFileBlacklist );
+if ( $wgArticleCountMethod === null ) {
+ $wgArticleCountMethod = $wgUseCommaCount ? 'comma' : 'link';
+}
+
if ( $wgInvalidateCacheOnLocalSettingsChange ) {
$wgCacheEpoch = max( $wgCacheEpoch, gmdate( 'YmdHis', @filemtime(
"$IP/LocalSettings.php" ) ) );
}
Modified: trunk/phase3/includes/SiteStats.php
===================================================================
--- trunk/phase3/includes/SiteStats.php 2011-05-14 17:08:42 UTC (rev 88112)
+++ trunk/phase3/includes/SiteStats.php 2011-05-14 17:11:32 UTC (rev 88113)
@@ -285,18 +285,24 @@
* @return Integer
*/
public function articles() {
- global $wgUseCommaCount;
+ global $wgArticleCountMethod;
$tables = array( 'page' );
$conds = array(
'page_namespace' => MWNamespace::getContentNamespaces(),
'page_is_redirect' => 0,
- 'page_len > 0'
);
- if ( !$wgUseCommaCount ) {
+ if ( $wgArticleCountMethod == 'link' ) {
$tables[] = 'pagelinks';
$conds[] = 'pl_from=page_id';
+ } elseif ( $wgArticleCountMethod == 'comma' ) {
+ // To make a correct check for this, we would need, for
each page,
+ // to load the text, maybe uncompress it, maybe decode
it and then
+ // check if there's one comma.
+ // But one thing we are sure is that if the page is
empty, it can't
+ // contain a comma :)
+ $conds[] = 'page_len > 0';
}
$this->mArticles = $this->db->selectField( $tables,
'COUNT(DISTINCT page_id)',
Modified: trunk/phase3/includes/actions/DeleteAction.php
===================================================================
--- trunk/phase3/includes/actions/DeleteAction.php 2011-05-14 17:08:42 UTC
(rev 88112)
+++ trunk/phase3/includes/actions/DeleteAction.php 2011-05-14 17:11:32 UTC
(rev 88113)
@@ -245,7 +245,7 @@
return false;
}
- $updates = new SiteStatsUpdate( 0, 1, -
(int)$page->isCountable( $page->getRawText() ), -1 );
+ $updates = new SiteStatsUpdate( 0, 1, -
(int)$page->isCountable(), -1 );
array_push( $wgDeferredUpdateList, $updates );
// Bitfields to further suppress the content
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs