http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89277
Revision: 89277
Author: awjrichards
Date: 2011-06-01 18:32:17 +0000 (Wed, 01 Jun 2011)
Log Message:
-----------
Added 'problem articles' view to dashboard; refactored dashboard code
(populateAFStatistics.php, primarily); Added new schema sql scripts as well as
sql migration script
Modified Paths:
--------------
trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php
trunk/extensions/ArticleFeedback/ArticleFeedback.php
trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
trunk/extensions/ArticleFeedback/populateAFStatistics.php
Added Paths:
-----------
trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
Modified: trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php
===================================================================
--- trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php 2011-06-01
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php 2011-06-01
18:32:17 UTC (rev 89277)
@@ -174,13 +174,53 @@
$dir . '/sql/AddRevisionsTable.sql',
true
) );
+
+ if ( $db->tableExists(
'article_feedback_stats_highs_lows') ) {
+ if ( !$db->tableExists(
'article_feedback_stats_types' )) {
+ // add article_feedback_stats_type if
necessaray
+ $updater->addExtensionUpdate( array(
+ 'addTable',
+ 'article_feedback_stats_types',
+ $dir .
'/sql/AddArticleFeedbackStatsTypesTable.sql',
+ true
+ ) );
+ }
+
+ $updater->addExtensionUpdate( array(
+ 'addTable',
+ 'article_feedback_stats',
+ $dir .
'/sql/AddArticleFeedbackStatsTable.sql',
+ true
+ ) );
+
+ // migrate article_feedback_stats_highs_lows to
article_feedback_stats
+ $updater->addExtensionUpdate( array(
+ 'applyPatch',
+ $dir .
'/sql/MigrateArticleFeedbackStatsHighsLows.sql',
+ true
+ ) );
+ } else {
+ // add article_feedback_stats and
article_feedback_stats_type
+ if ( !$db->tableExists(
'article_feedback_stats_type' )) {
+ $updater->addExtensionUpdate( array(
+ 'addTable',
+ 'article_feedback_stats_types',
+ $dir .
'/sql/AddArticleFeedbackStatsTypesTable.sql',
+ true
+ ) );
+ }
+
+ if ( !$db->tableExists(
'article_feedback_stats' )) {
+ $updater->addExtensionUpdate( array(
+ 'addTable',
+ 'article_feedback_stats',
+ $dir .
'/sql/AddArticleFeedbackStatsTable.sql',
+ true
+ ) );
+ }
+ }
+
$updater->addExtensionUpdate( array(
- 'addTable',
- 'article_feedback_stats_highs_lows',
- $dir . '/sql/AddStatsHighsLowsTable.sql',
- true
- ) );
- $updater->addExtensionUpdate( array(
'addIndex',
'article_feedback',
'article_feedback_timestamp',
Modified: trunk/extensions/ArticleFeedback/ArticleFeedback.php
===================================================================
--- trunk/extensions/ArticleFeedback/ArticleFeedback.php 2011-06-01
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/ArticleFeedback.php 2011-06-01
18:32:17 UTC (rev 89277)
@@ -11,7 +11,7 @@
*/
/* XXX: Survey setup */
-require_once( dirname( dirname( __FILE__ ) ) .
'/SimpleSurvey/SimpleSurvey.php' );
+require_once( $IP . '/extensions/SimpleSurvey/SimpleSurvey.php' );
/* Configuration */
Modified: trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
===================================================================
--- trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php 2011-06-01
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php 2011-06-01
18:32:17 UTC (rev 89277)
@@ -43,8 +43,8 @@
/*
This functionality does not exist yet.
- $this->renderWeeklyMostChanged();
- $this->renderRecentLows();*/
+ $this->renderWeeklyMostChanged();*/
+ $this->renderProblems();
} else {
$wgOut->addWikiText( 'This page has been disabled.' );
}
@@ -187,15 +187,15 @@
}
/**
- * Renders recent lows
+ * Renders problem articles table
*
* @return String: HTML table of recent lows
*/
- protected function renderRecentLows() {
+ protected function renderProblems() {
global $wgOut, $wgUser, $wgArticleFeedbackRatings;
$rows = array();
- foreach ( $this->getRecentLows() as $page ) {
+ foreach ( $this->getProblems() as $page ) {
$row = array();
$pageTitle = Title::newFromText( $page['page'] );
$row['page'] = $wgUser->getSkin()->link( $pageTitle,
$pageTitle->getPrefixedText() );
@@ -227,6 +227,54 @@
}
/**
+ * Gets a list of articles which were rated exceptionally low
+ */
+ protected function getProblems() {
+ global $wgMemc;
+ // check if we've got results in the cache
+ $key = wfMemcKey( 'article_feedback_stats_problems' );
+ $cache = $wgMemc->get( $key );
+ if ( is_array( $cache )) {
+ $highs_lows = $cache;
+ } else {
+ $dbr = wfGetDB( DB_SLAVE );
+ // first find the freshest timestamp
+ $row = $dbr->selectRow(
+ 'article_feedback_stats',
+ array( 'afs_ts' ),
+ "",
+ __METHOD__,
+ array( "ORDER BY" => "afs_ts DESC", "LIMIT" =>
1 )
+ );
+
+ // if we have no results, just return
+ if ( !$row || !$row->afs_ts ) {
+ return array();
+ }
+
+ // select ratings with that ts
+ $result = $dbr->select(
+ 'article_feedback_stats',
+ array(
+ 'afs_page_id',
+ 'afs_orderable_data',
+ 'afs_data'
+ ),
+ array(
+ 'afs_ts' => $row->afs_ts,
+ 'afs_stats_type_id' =>
self::getStatsTypeId( 'problems' )
+ ),
+ __METHOD__,
+ array( "ORDER BY" => "afs_orderable_data" )
+ );
+ $problems = $this->buildProblems( $result );
+ $wgMemc->set( $key, $problems, 86400 );
+ }
+
+ return $problems;
+ }
+
+ /**
* Gets a list of articles which were rated exceptionally high or low.
*
* - Based on average of all rating categories
@@ -237,7 +285,6 @@
*/
protected function getDailyHighsAndLows() {
global $wgMemc;
-
// check if we've got results in the cache
$key = wfMemcKey( 'article_feedback_stats_highs_lows' );
$cache = $wgMemc->get( $key );
@@ -247,29 +294,32 @@
$dbr = wfGetDB( DB_SLAVE );
// first find the freshest timestamp
$row = $dbr->selectRow(
- 'article_feedback_stats_highs_lows',
- array( 'afshl_ts' ),
+ 'article_feedback_stats',
+ array( 'afs_ts' ),
"",
__METHOD__,
- array( "ORDER BY" => "afshl_ts DESC", "LIMIT"
=> 1 )
+ array( "ORDER BY" => "afs_ts DESC", "LIMIT" =>
1 )
);
// if we have no results, just return
- if ( !$row || !$row->afshl_ts ) {
+ if ( !$row || !$row->afs_ts ) {
return array();
}
// select ratings with that ts
$result = $dbr->select(
- 'article_feedback_stats_highs_lows',
+ 'article_feedback_stats',
array(
- 'afshl_page_id',
- 'afshl_avg_overall',
- 'afshl_avg_ratings'
+ 'afs_page_id',
+ 'afs_orderable_data',
+ 'afs_data'
),
- array( 'afshl_ts' => $row->afshl_ts ),
+ array(
+ 'afs_ts' => $row->afs_ts,
+ 'afs_stats_type_id' =>
self::getStatsTypeId( 'highs_and_lows' )
+ ),
__METHOD__,
- array( "ORDER BY" => "afshl_avg_overall" )
+ array( "ORDER BY" => "afs_orderable_data" )
);
$highs_lows = $this->buildHighsAndLows( $result );
$wgMemc->set( $key, $highs_lows, 86400 );
@@ -331,15 +381,56 @@
$highs_lows = array();
foreach ( $result as $row ) {
$highs_lows[] = array(
- 'page' => $row->afshl_page_id,
- 'ratings' => FormatJson::decode(
$row->afshl_avg_ratings ),
- 'average' => $row->afshl_avg_overall
+ 'page' => $row->afs_page_id,
+ 'ratings' => FormatJson::decode( $row->afs_data
),
+ 'average' => $row->afs_orderable_data
);
}
return $highs_lows;
}
/**
+ * Build data store of problems for use when rendering table
+ * @param object Database result
+ * @return array
+ */
+ public static function buildProblems( $result ) {
+ $problems = array();
+ foreach( $result as $row ) {
+ $problems[] = array(
+ 'page' => $row->afs_page_id,
+ 'ratings' => FormatJson::decode( $row->afs_data
),
+ 'average' => $row->afs_orderable_data
+ );
+ }
+ return $problems;
+ }
+
+ /**
+ * Get the stats type id for a given stat type
+ * @param string $stats_type
+ */
+ public static function getStatsTypeId( $stats_type ) {
+ global $wgMemc;
+ $key = wfMemcKey( 'article_feedback_stats_type_' . $stats_type
);
+ $cache = $wgMemc->get( $key );
+ if ( $cache ) {
+ return $cache;
+ }
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $row = $dbr->selectRow(
+ 'article_feedback_stats_types',
+ array( 'afst_id' ),
+ array( 'afst_type' => $stats_type ),
+ __METHOD__,
+ array( )
+ );
+ $wgMemc->set( $key, $row->afst_id );
+ return $row->afst_id;
+ }
+
+ /**
* Gets a list of articles which have quickly changing ratings.
*
* - Based on any rating category
Modified: trunk/extensions/ArticleFeedback/populateAFStatistics.php
===================================================================
--- trunk/extensions/ArticleFeedback/populateAFStatistics.php 2011-06-01
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/populateAFStatistics.php 2011-06-01
18:32:17 UTC (rev 89277)
@@ -37,9 +37,38 @@
*/
protected $dbw;
+ /**
+ * Valid operations and their execution methods for this script to
perform
+ *
+ * Operations are passed in as options during run-time - only valid
options,
+ * which are defined here, can be executed. Valid operations are mapped
here
+ * to a corresponding method ( array( 'operation' => 'method' ))
+ * @var array
+ */
+ protected $operation_map = array(
+ 'highslows' => 'populateHighsLows',
+ 'problems' => 'populateProblems',
+ );
+
+ /**
+ * Operations to execute
+ * @var array
+ */
+ public $operations = array();
+
+ /**
+ * The minimum number of rating sets required before taking some action
+ * @var int
+ */
+ public $rating_set_threshold = 10;
+
public function __construct() {
parent::__construct();
$this->mDescription = "Populates the article feedback stats
tables";
+
+ $this->addOption( 'op', 'The ArticleFeedback stats gathering
operation to run (eg "highslows"). Can specify multiple operations, separated
by comma.', true, true );
+ $this->addOption( 'rating_sets', 'The minimum number of rating
sets before taking an action.', false, true );
+ $this->addOption( 'poll_period', 'The polling period for
fetching data, in seconds.', false, true );
}
public function syncDBs() {
@@ -54,72 +83,179 @@
}
}
- public function execute() {
- global $wgMemc;
+ /**
+ * Bootstrap this maintenance script
+ *
+ * Performs operations necessary for this maintenance script to run
which
+ * cannot or do not make sense to run in the constructor.
+ */
+ public function bootstrap() {
+ /**
+ * Set user-specified operations to perform
+ */
+ $operations = explode( ',', $this->getOption( 'op' ));
+ // check sanity of specified operations
+ if ( !$this->checkOperations( $operations )) {
+ $this->error( 'Invalid operation specified.', true );
+ } else {
+ $this->operations = $operations;
+ }
+
+ /**
+ * Set user-specified rating set threshold
+ */
+ $rating_set_threshold = $this->getOption( 'rating_sets',
$this->rating_set_threshold );
+ if ( !is_numeric( $rating_set_threshold )) {
+ $this->error( 'Rating sets must be numeric.', true );
+ } else {
+ $this->rating_set_threshold = $rating_set_threshold;
+ }
+
+ /**
+ * Set user-specified polling period
+ */
+ $polling_period = $this->getOption( 'poll_period',
$this->polling_period );
+ if ( !is_numeric( $polling_period )) {
+ $this->error( 'Poll period must be numeric.', true );
+ } else {
+ $this->polling_period = $polling_period;
+ }
+
+ // set db objects
$this->dbr = wfGetDB( DB_SLAVE );
$this->dbw = wfGetDB( DB_MASTER );
+ }
+
+ /**
+ * Check whether or not specified operations are valid.
+ *
+ * A specified operation is considered valid if it exists
+ * as a key in the operation map.
+ *
+ * @param array $ops An array of operations to check
+ * @return bool
+ */
+ public function checkOperations( array $ops ) {
+ foreach ( $ops as $operation ) {
+ if ( !isset( $this->operation_map[ $operation ] )) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public function execute() {
+ // finish bootstrapping the script
+ $this->bootstrap();
- // the data structure to store ratings for a given page
- $ratings = array(); // stores rating-specific info
- $rating_set_count = array(); // keep track of rating sets
- $highs_and_lows = array(); // store highest/lowest rated page
stats
- $averages = array(); // store overall averages for a given page
+ // execute requested operations
+ foreach ( $this->operations as $operation ) {
+ $method = $this->operation_map[ $operation ];
+ $this->$method();
+ }
+ }
+
+ public function populateProblems() {
+ global $wgMemc;
- // fetch the ratings since the lower bound timestamp
- $this->output( 'Fetching page ratings between now and ' .
date('Y-m-d H:i:s', strtotime( $this->getLowerBoundTimestamp())) . "...\n");
- $res = $this->dbr->select(
- 'article_feedback',
+ /**
+ * Chck to see if we already have a collection of pages to
operate on.
+ * If not, generate the collection of pages and their
associated ratings.
+ */
+ if ( !isset( $this->pages )) {
+ $ts = $this->getLowerBoundTimestamp();
+ $this->pages = $this->populatePageRatingsSince( $ts );
+ }
+ $problems = array();
+ // iterate through pages, look for pages that meet criteria for
problem articles
+ $this->output( "Finding problem articles ...\n" );
+ foreach ( $this->pages as $page ) {
+ // make sure that we have more rating sets than the
req'd threshold for this page in order to qualify for calculating
+ if ( $page->rating_set_count <
$this->rating_set_threshold ) {
+ continue;
+ }
+
+ if ( $page->isProblematic() ) {
+ array_push( $problems, $page->page_id );
+ }
+ }
+
+ // populate stats table with problem articles & associated data
+ // fetch stats type id - add stat type if it's non-existant
+ $stats_type_id = SpecialArticleFeedback::getStatsTypeId(
'problems' );
+ if ( !$stats_type_id ) {
+ $stats_type_id = $this->addStatType( 'problems' );
+ }
+ foreach( $problems as $page_id ) {
+ $page = $this->pages->getPage( $page_id );
+ $rows[] = array(
+ 'afs_page_id' => $page_id,
+ 'afs_orderable_data' => $page->overall_average,
+ 'afs_data' => FormatJson::encode(
$page->rating_averages ),
+ 'afs_ts' => $cur_ts,
+ 'afs_stats_type_id' => $stats_type_id,
+ );
+ }
+ $this->output( "Done.\n" );
+
+ // populate cache with current problem articles
+ // loading data into cache
+ $this->output( "Caching latest problems (if cache present).\n"
);
+ $key = wfMemcKey( 'article_feedback_stats_problems' );
+ $result = $this->dbr->select(
+ 'article_feedback_stats',
+ array(
+ 'afs_page_id',
+ 'afs_orderable_data',
+ 'afs_data'
+ ),
array(
- 'aa_revision',
- 'aa_user_text',
- 'aa_rating_id',
- 'aa_user_anon_token',
- 'aa_page_id',
- 'aa_rating_value',
- ),
- array( 'aa_timestamp >= ' . $this->dbr->addQuotes(
$this->getLowerBoundTimestamp() ) ),
+ 'afs_ts' => $cur_ts,
+ 'afs_stats_type_id' => $stats_type_id
+ ),
__METHOD__,
- array()
+ array( "ORDER BY" => "afs_orderable_data" )
);
+ // grab the article feedback special page so we can reuse the
data structure building code
+ // FIXME this logic should not be in the special page class
+ $problems = SpecialArticleFeedback::buildProblems( $result );
+ // stash the data structure in the cache
+ $wgMemc->set( $key, $problems, 86400 );
+ $this->output( "Done.\n" );
+ }
+
+ /**
+ * Populate stats about highest/lowest rated articles
+ */
+ public function populateHighsLows() {
+ global $wgMemc;
- // assign the rating data to our data structure
- foreach ( $res as $row ) {
- // determine the unique hash for a given rating set
(page rev + user identifying info)
- $rating_hash = md5( $row->aa_revision .
$row->aa_user_text . $row->aa_user_anon_token );
-
- // keep track of how many rating sets a particular page
has
- if ( !isset( $rating_count[ $row->aa_page_id ][
$rating_hash ] )) {
- // we store the rating hash as a key rather
than value as checking isset( $arr[$hash] ) is way faster
- // than doing something like array_search(
$hash, $arr ) when dealing with large arrays
- $rating_set_count[ $row->aa_page_id ][
$rating_hash ] = 1;
- }
-
- $ratings[ $row->aa_page_id ][ $row->aa_rating_id ][] =
$row->aa_rating_value;
+ $averages = array(); // store overall averages for a given page
+
+ /**
+ * Chck to see if we already have a collection of pages to
operate on.
+ * If not, generate the collection of pages and their
associated ratings.
+ */
+ if ( !isset( $this->pages )) {
+ $ts = $this->getLowerBoundTimestamp();
+ $this->pages = $this->populatePageRatingsSince( $ts );
}
- $this->output( "Done\n" );
// determine the average ratings for a given page
$this->output( "Determining average ratings for articles ...\n"
);
- foreach ( $ratings as $page_id => $data ) {
- // make sure that we have at least 10 rating sets for
this page in order to qualify for ranking
- if ( count( array_keys( $rating_set_count[ $page_id ]
)) < 10 ) {
+ foreach ( $this->pages as $page ) {
+ // make sure that we have more rating sets than the
req'd threshold for this page in order to qualify for ranking
+ if ( $page->rating_set_count <
$this->rating_set_threshold ) {
continue;
}
- // calculate the rating averages for a given page
- foreach( $data as $rating_id => $rating ) {
- $rating_sum = array_sum( $rating );
- $rating_avg = $rating_sum / count( $rating );
- $highs_and_lows[ $page_id ][ 'avg_ratings' ][
$rating_id ] = $rating_avg;
+ // calculate the rating averages if they haven't
already been calculated
+ if ( !count( $page->rating_averages )) {
+ $page->calculateRatingAverages();
}
- // calculate the overall average for a page
- $overall_rating_sum = array_sum( $highs_and_lows[
$page_id ][ 'avg_ratings' ] );
- $overall_rating_average = $overall_rating_sum / count(
$highs_and_lows[ $page_id ][ 'avg_ratings' ] );
- $highs_and_lows[ $page_id ][ 'average' ] =
$overall_rating_average;
-
// store overall average rating seperately so we can
easily sort
- $averages[ $page_id ] = $overall_rating_average;
+ $averages[ $page->page_id ] = $page->overall_average;
}
$this->output( "Done.\n" );
@@ -129,35 +265,42 @@
// take lowest 50 and highest 50
$highest_and_lowest_page_ids = array_slice( $averages, 0, 50,
true );
if ( count( $averages ) > 50 ) {
+ // in the event that we have < 100 $averages total,
this will still
+ // work nicely - it will select duplicate averages, but
the +=
+ // will cause items with the same keys to essentially
be ignored
$highest_and_lowest_page_ids += array_slice( $averages,
-50, 50, true );
}
$this->output( "Done\n" );
+ // fetch stats type id - add stat type if it's non-existant
+ $stats_type_id = SpecialArticleFeedback::getStatsTypeId(
'highs_and_lows' );
+ if ( !$stats_type_id ) {
+ $stats_type_id = $this->addStatType( 'highs_and_lows' );
+ }
+
// prepare data for insert into db
$this->output( "Preparing data for db insertion ...\n");
$cur_ts = $this->dbw->timestamp();
$rows = array();
- foreach( $highs_and_lows as $page_id => $data ) {
- // make sure this is one of the highest/lowest average
ratings
- if ( !isset( $highest_and_lowest_page_ids[ $page_id ]
)) {
- continue;
- }
+ foreach( $highest_and_lowest_page_ids as $page_id =>
$overall_average ) {
+ $page = $this->pages->getPage( $page_id );
$rows[] = array(
- 'afshl_page_id' => $page_id,
- 'afshl_avg_overall' => $data[ 'average' ],
- 'afshl_avg_ratings' => FormatJson::encode(
$data[ 'avg_ratings' ] ),
- 'afshl_ts' => $cur_ts,
+ 'afs_page_id' => $page_id,
+ 'afs_orderable_data' => $page->overall_average,
+ 'afs_data' => FormatJson::encode(
$page->rating_averages ),
+ 'afs_ts' => $cur_ts,
+ 'afs_stats_type_id' => $stats_type_id,
);
}
$this->output( "Done.\n" );
// insert data to db
- $this->output( "Writing data to
article_feedback_stats_highs_lows ...\n" );
+ $this->output( "Writing data to article_feedback_stats ...\n" );
$rowsInserted = 0;
while( $rows ) {
$batch = array_splice( $rows, 0,
$this->insert_batch_size );
$this->dbw->insert(
- 'article_feedback_stats_highs_lows',
+ 'article_feedback_stats',
$batch,
__METHOD__
);
@@ -167,30 +310,96 @@
}
$this->output( "Done.\n" );
- // loading data into caching
+ // loading data into cache
$this->output( "Caching latest highs/lows (if cache
present).\n" );
$key = wfMemcKey( 'article_feedback_stats_highs_lows' );
$result = $this->dbr->select(
- 'article_feedback_stats_highs_lows',
+ 'article_feedback_stats',
array(
- 'afshl_page_id',
- 'afshl_avg_overall',
- 'afshl_avg_ratings'
+ 'afs_page_id',
+ 'afs_orderable_data',
+ 'afs_data'
),
- array( 'afshl_ts' => $cur_ts ),
+ array(
+ 'afs_ts' => $cur_ts,
+ 'afs_stats_type_id' => $stats_type_id
+ ),
__METHOD__,
- array( "ORDER BY" => "afshl_avg_overall" )
+ array( "ORDER BY" => "afs_orderable_data" )
);
// grab the article feedback special page so we can reuse the
data structure building code
// FIXME this logic should not be in the special page class
$highs_lows = SpecialArticleFeedback::buildHighsAndLows(
$result );
// stash the data structure in the cache
$wgMemc->set( $key, $highs_lows, 86400 );
- $this->output( "Done\n" );
+ $this->output( "Done\n" );
}
+ /**
+ * Fetch ratings newer than a given time stamp.
+ *
+ * If no timestamp is provided, relies on $this->lowerBoundTimestamp
+ * @param numeric $ts
+ * @return database result object
+ */
+ public function fetchRatingsNewerThanTs( $ts=null ) {
+ if ( !$ts ) {
+ $ts = $this->getLowerBoundTimestamp();
+ }
+
+ if ( !is_numeric( $ts )) {
+ throw new InvalidArgumentException( 'Timestamp expected
to be numeric.' );
+ }
+
+ $res = $this->dbr->select(
+ 'article_feedback',
+ array(
+ 'aa_revision',
+ 'aa_user_text',
+ 'aa_rating_id',
+ 'aa_user_anon_token',
+ 'aa_page_id',
+ 'aa_rating_value',
+ ),
+ array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts
)),
+ __METHOD__,
+ array()
+ );
+
+ return $res;
+ }
/**
+ * Construct collection of pages and their ratings since a given time
stamp
+ * @param $ts
+ * @return object The colelction of pages
+ */
+ public function populatePageRatingsSince( $ts ) {
+ $pages = new Pages();
+ // fetch the ratings since the lower bound timestamp
+ $this->output( 'Fetching page ratings between now and ' . date(
'Y-m-d H:i:s', strtotime( $ts )) . "...\n" );
+ $res = $this->fetchRatingsNewerThanTs( $ts );
+ $this->output( "Done.\n" );
+
+ // assign the rating data to our data structure
+ $this->output( "Assigning fetched ratings to internal data
structure ...\n" );
+ foreach ( $res as $row ) {
+ // fetch the page from the page store referentially so
we can
+ // perform actions on it that will automagically be
saved in the
+ // object for easy access later
+ $page =& $pages->getPage( $row->aa_page_id );
+
+ // determine the unique hash for a given rating set
(page rev + user identifying info)
+ $rating_hash = $row->aa_revision . "|" .
$row->aa_user_text . "|" . $row->aa_user_anon_token;
+
+ // add rating data for this page
+ $page->addRating( $row->aa_rating_id,
$row->aa_rating_value, $rating_hash );
+ }
+ $this->output( "Done.\n" );
+ return $pages;
+ }
+
+ /**
* Set $this->timestamp
* @param int $ts
*/
@@ -216,7 +425,177 @@
}
return $this->lowerBoundTimestamp;
}
+
+ /**
+ * Add stat type record to article_feedbak_stats_types
+ * @param string $stat_type The identifying name of the stat type (eg
'highs_lows')
+ */
+ public function addStatType( $stat_type ) {
+ $this->dbw->insert(
+ 'article_feedback_stats',
+ array( 'afst_type' => $stat_type ),
+ __METHOD__
+ );
+ return $this->dbw->insertId();
+ }
}
+/**
+ * A class to represent a page and data about its ratings
+ */
+class Page {
+ public $page_id;
+
+ /**
+ * The number of rating sets recorded for this page
+ * @var int
+ */
+ public $rating_set_count = 0;
+
+ /**
+ * An array of ratings for this page
+ * @var array
+ */
+ public $ratings = array();
+
+ /**
+ * An array to hold mean ratings by rating type id
+ * @var array
+ */
+ public $rating_averages = array();
+
+ /**
+ * Mean of all ratings for this page
+ * @var float
+ */
+ public $overall_average;
+
+ /**
+ * An array of rating set hashes, which are used to identify unique
sets of
+ * ratings
+ * @var array
+ */
+ protected $rating_set_hashes = array();
+
+ public function __construct( $page_id ) {
+ if ( !is_numeric( $page_id )) {
+ throw new Exception( 'Page id must be numeric.' );
+ }
+ $this->page_id = $page_id;
+ }
+
+ /**
+ * Add a new rating for this particular page
+ * @param int $rating_id
+ * @param int $rating_value
+ * @param string $rating_set_hash
+ */
+ public function addRating( $rating_id, $rating_value, $rating_set_hash
= null ) {
+ $this->ratings[ $rating_id ][] = $rating_value;
+
+ if ( $rating_set_hash ) {
+ $this->trackRatingSet( $rating_set_hash );
+ }
+ }
+
+ /**
+ * Keep track of rating sets
+ *
+ * Record when we see a new rating set and increment the set count
+ * @param string $rating_set_hash
+ */
+ protected function trackRatingSet( $rating_set_hash ) {
+ if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) {
+ return;
+ }
+
+ $this->rating_set_hashes[ $rating_set_hash ] = 1;
+ $this->rating_set_count += 1;
+ }
+
+ public function calculateRatingAverages() {
+ // determine averages for each rating type
+ foreach( $this->ratings as $rating_id => $rating ) {
+ $rating_sum = array_sum( $rating );
+ $rating_avg = $rating_sum / count( $rating );
+ $this->rating_averages[ $rating_id ] = $rating_avg;
+ }
+
+ // determine overall rating average for this page
+ if ( count( $this->rating_averages )) {
+ $overall_rating_sum = array_sum( $this->rating_averages
);
+ $overall_rating_average = $overall_rating_sum / count(
$this->rating_averages );
+ } else {
+ $overall_rating_average = 0;
+ }
+ $this->overall_average = $overall_rating_average;
+ }
+
+ /**
+ * Returns whether or not this page is considered problematic
+ * @return bool
+ */
+ public function isProblematic() {
+ if ( !isset( $this->problematic )) {
+ $this->determineProblematicStatus();
+ }
+ return $this->probematic;
+ }
+
+ /**
+ * Determine whether this article is 'problematic'
+ *
+ * If a page has one more rating categories where 70% of the ratings
are
+ * <= 2, it is considered problematic.
+ */
+ public function determineProblematicStatus() {
+ foreach( $this->ratings as $rating_id => $ratings ) {
+ $count = 0;
+ foreach ( $ratings as $rating ) {
+ if ( $rating <= 2 ) {
+ $count += 1;
+ }
+ }
+
+ $threshold = round( 0.7 * count( $ratings ));
+ if ( $count >= $threshold ) {
+ $this->problematic = true;
+ return;
+ }
+ }
+
+ $this->problematic = false;
+ return;
+ }
+}
+
+/**
+ * A storage class to keep track of PageRatings object by page
+ *
+ * Iterable on array of pages.
+ */
+class Pages implements IteratorAggregate {
+ /**
+ * An array of page rating objects
+ * @var array
+ */
+ public $pages = array();
+
+ public function getPage( $page_id ) {
+ if ( !isset( $this->pages[ $page_id ] )) {
+ $this->addPage( $page_id );
+ }
+ return $this->pages[ $page_id ];
+ }
+
+ public function addPage( $page_id ) {
+ $this->pages[ $page_id ] = new Page( $page_id );
+ }
+
+ public function getIterator() {
+ return new ArrayIterator( $this->pages );
+ }
+}
+
$maintClass = "PopulateAFStatistics";
require_once( DO_MAINTENANCE );
Added: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
===================================================================
--- trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
(rev 0)
+++ trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS article_feedback_stats;
+CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats (
+ afs_page_id integer unsigned NOT NULL,
+ -- data point to be used for ordering this data
+ afs_orderable_data double unsigned NOT NULL,
+ -- json object of stat data
+ afs_data varbinary(255) NOT NULL,
+ afs_stats_type_id integer unsigned NOT NULL,
+ -- timestamp of insertion job
+ afs_ts binary(14) NOT NULL
+) /*$wgDBTableOptions*/;
+CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/ article_feedback_stats(
afs_page_id, afs_ts, afs_stats_type_id );
+CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts,
afs_orderable_data);
Added: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
===================================================================
--- trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
(rev 0)
+++ trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,9 @@
+CREATE TABLE IF NOT EXISTS /*_*/ article_feedback_stats_types (
+ afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+ afst_type varbinary(255) NOT NULL
+) /*$wgDBTableOptions*/;
+CREATE UNIQUE INDEX /*i*/ afst_type ON /*_*/ article_feedback_stats_types(
afst_type );
+
+-- Pre-populate table with stat types
+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES (
'highs_and_lows' );
+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' );
\ No newline at end of file
Added:
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
===================================================================
---
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
(rev 0)
+++
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,22 @@
+-- migrate data from article_feedback_stats_highs_lows into
article_feedback_stats
+INSERT INTO /*_*/article_feedback_stats (
+ afs_page_id,
+ afs_orderable_data,
+ afs_data,
+ afs_ts,
+ afs_stats_type_id
+)
+SELECT
+ afshl_page_id,
+ afshl_avg_overall,
+ afshl_avg_ratings,
+ afshl_ts,
+ afst_id
+FROM
+ /*_*/article_feedback_stats_highs_lows,
+ /*_*/article_feedback_stats_types
+WHERE
+ /*_*/article_feedback_stats_types.afst_type='highs_and_lows';
+
+-- get rid of article_feedback_stats_highs_lows as it is no longer necessary
+DROP TABLE /*_*/article_feedback_stats_highs_lows;
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs