http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89277

Revision: 89277
Author:   awjrichards
Date:     2011-06-01 18:32:17 +0000 (Wed, 01 Jun 2011)
Log Message:
-----------
Added 'problem articles' view to dashboard; refactored dashboard code 
(populateAFStatistics.php, primarily); Added new schema sql scripts as well as 
sql migration script

Modified Paths:
--------------
    trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php
    trunk/extensions/ArticleFeedback/ArticleFeedback.php
    trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
    trunk/extensions/ArticleFeedback/populateAFStatistics.php

Added Paths:
-----------
    trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
    trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
    
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql

Modified: trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php
===================================================================
--- trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php  2011-06-01 
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/ArticleFeedback.hooks.php  2011-06-01 
18:32:17 UTC (rev 89277)
@@ -174,13 +174,53 @@
                                $dir . '/sql/AddRevisionsTable.sql',
                                true
                        ) );
+                       
+                       if ( $db->tableExists( 
'article_feedback_stats_highs_lows') ) {
+                               if ( !$db->tableExists( 
'article_feedback_stats_types' )) {
+                                       // add article_feedback_stats_type if 
necessaray
+                                       $updater->addExtensionUpdate( array(
+                                               'addTable',
+                                               'article_feedback_stats_types',
+                                               $dir . 
'/sql/AddArticleFeedbackStatsTypesTable.sql',
+                                               true
+                                       ) );
+                               }
+                               
+                               $updater->addExtensionUpdate( array(
+                                       'addTable',
+                                       'article_feedback_stats',
+                                       $dir . 
'/sql/AddArticleFeedbackStatsTable.sql',
+                                       true
+                               ) );
+                               
+                               // migrate article_feedback_stats_highs_lows to 
article_feedback_stats
+                               $updater->addExtensionUpdate( array(
+                                       'applyPatch',
+                                       $dir . 
'/sql/MigrateArticleFeedbackStatsHighsLows.sql',
+                                       true
+                               ) );
+                       } else {
+                               // add article_feedback_stats and 
article_feedback_stats_type
+                               if ( !$db->tableExists( 
'article_feedback_stats_type' )) {
+                                       $updater->addExtensionUpdate( array(
+                                               'addTable',
+                                               'article_feedback_stats_types',
+                                               $dir . 
'/sql/AddArticleFeedbackStatsTypesTable.sql',
+                                               true
+                                       ) );
+                               }
+                               
+                               if ( !$db->tableExists( 
'article_feedback_stats' )) {
+                                       $updater->addExtensionUpdate( array(
+                                               'addTable',
+                                               'article_feedback_stats',
+                                               $dir . 
'/sql/AddArticleFeedbackStatsTable.sql',
+                                               true
+                                       ) );
+                               }
+                       }
+                       
                        $updater->addExtensionUpdate( array(
-                               'addTable',
-                               'article_feedback_stats_highs_lows',
-                               $dir . '/sql/AddStatsHighsLowsTable.sql',
-                               true
-                       ) );
-                       $updater->addExtensionUpdate( array(
                                'addIndex',
                                'article_feedback',
                                'article_feedback_timestamp',

Modified: trunk/extensions/ArticleFeedback/ArticleFeedback.php
===================================================================
--- trunk/extensions/ArticleFeedback/ArticleFeedback.php        2011-06-01 
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/ArticleFeedback.php        2011-06-01 
18:32:17 UTC (rev 89277)
@@ -11,7 +11,7 @@
  */
 
 /* XXX: Survey setup */
-require_once( dirname( dirname( __FILE__ ) ) . 
'/SimpleSurvey/SimpleSurvey.php' );
+require_once( $IP . '/extensions/SimpleSurvey/SimpleSurvey.php' );
 
 /* Configuration */
 

Modified: trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php
===================================================================
--- trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php 2011-06-01 
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/SpecialArticleFeedback.php 2011-06-01 
18:32:17 UTC (rev 89277)
@@ -43,8 +43,8 @@
                        
                        /*
                        This functionality does not exist yet.
-                       $this->renderWeeklyMostChanged();
-                       $this->renderRecentLows();*/
+                       $this->renderWeeklyMostChanged();*/
+                       $this->renderProblems();
                } else {
                        $wgOut->addWikiText( 'This page has been disabled.' );
                }
@@ -187,15 +187,15 @@
        }
 
        /**
-        * Renders recent lows
+        * Renders problem articles table
         * 
         * @return String: HTML table of recent lows
         */
-       protected function renderRecentLows() {
+       protected function renderProblems() {
                global $wgOut, $wgUser, $wgArticleFeedbackRatings;
 
                $rows = array();
-               foreach ( $this->getRecentLows() as $page ) {
+               foreach ( $this->getProblems() as $page ) {
                        $row = array();
                        $pageTitle = Title::newFromText( $page['page'] );
                        $row['page'] = $wgUser->getSkin()->link( $pageTitle, 
$pageTitle->getPrefixedText() );
@@ -227,6 +227,54 @@
        }
 
        /**
+        * Gets a list of articles which were rated exceptionally low
+        */
+       protected function getProblems() {
+               global $wgMemc;
+               // check if we've got results in the cache
+               $key = wfMemcKey( 'article_feedback_stats_problems' );
+               $cache = $wgMemc->get( $key );
+               if ( is_array( $cache )) {
+                       $highs_lows = $cache;
+               } else {
+                       $dbr = wfGetDB( DB_SLAVE );
+                       // first find the freshest timestamp
+                       $row = $dbr->selectRow(
+                               'article_feedback_stats',
+                               array( 'afs_ts' ),
+                               "",
+                               __METHOD__,
+                               array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 
1 )
+                       );
+                       
+                       // if we have no results, just return
+                       if ( !$row || !$row->afs_ts ) {
+                               return array();
+                       }
+                       
+                       // select ratings with that ts
+                       $result = $dbr->select(
+                               'article_feedback_stats',
+                               array(
+                                       'afs_page_id',
+                                       'afs_orderable_data',
+                                       'afs_data'
+                               ),
+                               array( 
+                                       'afs_ts' => $row->afs_ts,
+                                       'afs_stats_type_id' => 
self::getStatsTypeId( 'problems' )
+                               ),
+                               __METHOD__,
+                               array( "ORDER BY" => "afs_orderable_data" )
+                       );
+                       $problems = $this->buildProblems( $result );
+                       $wgMemc->set( $key, $problems, 86400 );
+               }
+               
+               return $problems;
+       }
+       
+       /**
         * Gets a list of articles which were rated exceptionally high or low.
         * 
         * - Based on average of all rating categories
@@ -237,7 +285,6 @@
         */
        protected function getDailyHighsAndLows() {
                global $wgMemc;
-               
                // check if we've got results in the cache
                $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
                $cache = $wgMemc->get( $key );
@@ -247,29 +294,32 @@
                        $dbr = wfGetDB( DB_SLAVE );
                        // first find the freshest timestamp
                        $row = $dbr->selectRow(
-                               'article_feedback_stats_highs_lows',
-                               array( 'afshl_ts' ),
+                               'article_feedback_stats',
+                               array( 'afs_ts' ),
                                "",
                                __METHOD__,
-                               array( "ORDER BY" => "afshl_ts DESC", "LIMIT" 
=> 1 )
+                               array( "ORDER BY" => "afs_ts DESC", "LIMIT" => 
1 )
                        );
                        
                        // if we have no results, just return
-                       if ( !$row || !$row->afshl_ts ) {
+                       if ( !$row || !$row->afs_ts ) {
                                return array();
                        }
                        
                        // select ratings with that ts
                        $result = $dbr->select(
-                               'article_feedback_stats_highs_lows',
+                               'article_feedback_stats',
                                array(
-                                       'afshl_page_id',
-                                       'afshl_avg_overall',
-                                       'afshl_avg_ratings'
+                                       'afs_page_id',
+                                       'afs_orderable_data',
+                                       'afs_data'
                                ),
-                               array( 'afshl_ts' => $row->afshl_ts ),
+                               array( 
+                                       'afs_ts' => $row->afs_ts,
+                                       'afs_stats_type_id' => 
self::getStatsTypeId( 'highs_and_lows' )
+                               ),
                                __METHOD__,
-                               array( "ORDER BY" => "afshl_avg_overall" )
+                               array( "ORDER BY" => "afs_orderable_data" )
                        );
                        $highs_lows = $this->buildHighsAndLows( $result );
                        $wgMemc->set( $key, $highs_lows, 86400 );
@@ -331,15 +381,56 @@
                $highs_lows = array();
                foreach ( $result as $row ) {
                        $highs_lows[] = array(
-                               'page' => $row->afshl_page_id,
-                               'ratings' => FormatJson::decode( 
$row->afshl_avg_ratings ),
-                               'average' => $row->afshl_avg_overall            
+                               'page' => $row->afs_page_id,
+                               'ratings' => FormatJson::decode( $row->afs_data 
),
+                               'average' => $row->afs_orderable_data           
                        );
                }
                return $highs_lows;
        }
        
        /**
+        * Build data store of problems for use when rendering table
+        * @param object Database result
+        * @return array
+        */
+       public static function buildProblems( $result ) {
+               $problems = array();
+               foreach( $result as $row ) {
+                       $problems[] = array(
+                               'page' => $row->afs_page_id,
+                               'ratings' => FormatJson::decode( $row->afs_data 
),
+                               'average' => $row->afs_orderable_data           
+                       );
+               }
+               return $problems;
+       }
+       
+       /**
+        * Get the stats type id for a given stat type
+        * @param string $stats_type
+        */
+       public static function getStatsTypeId( $stats_type ) {
+               global $wgMemc;
+               $key = wfMemcKey( 'article_feedback_stats_type_' . $stats_type 
);
+               $cache = $wgMemc->get( $key );
+               if ( $cache ) {
+                       return $cache;
+               }
+               
+               $dbr = wfGetDB( DB_SLAVE );
+               $row = $dbr->selectRow(
+                       'article_feedback_stats_types',
+                       array( 'afst_id' ),
+                       array( 'afst_type' => $stats_type ),
+                       __METHOD__,
+                       array( )
+               );
+               $wgMemc->set( $key, $row->afst_id );
+               return $row->afst_id;
+       }
+       
+       /**
         * Gets a list of articles which have quickly changing ratings.
         * 
         * - Based on any rating category

Modified: trunk/extensions/ArticleFeedback/populateAFStatistics.php
===================================================================
--- trunk/extensions/ArticleFeedback/populateAFStatistics.php   2011-06-01 
17:53:05 UTC (rev 89276)
+++ trunk/extensions/ArticleFeedback/populateAFStatistics.php   2011-06-01 
18:32:17 UTC (rev 89277)
@@ -37,9 +37,38 @@
         */
        protected $dbw;
        
+       /**
+        * Valid operations and their execution methods for this script to 
perform
+        * 
+        * Operations are passed in as options during run-time - only valid 
options,
+        * which are defined here, can be executed. Valid operations are mapped 
here
+        * to a corresponding method ( array( 'operation' => 'method' ))
+        * @var array
+        */
+       protected $operation_map = array( 
+               'highslows' => 'populateHighsLows', 
+               'problems' => 'populateProblems',
+       );
+       
+       /**
+        * Operations to execute
+        * @var array
+        */
+       public $operations = array();
+       
+       /**
+        * The minimum number of rating sets required before taking some action
+        * @var int
+        */
+       public $rating_set_threshold = 10;
+       
        public function __construct() {
                parent::__construct();
                $this->mDescription = "Populates the article feedback stats 
tables";
+               
+               $this->addOption( 'op', 'The ArticleFeedback stats gathering 
operation to run (eg "highslows").  Can specify multiple operations, separated 
by comma.', true, true );
+               $this->addOption( 'rating_sets', 'The minimum number of rating 
sets before taking an action.', false, true );
+               $this->addOption( 'poll_period', 'The polling period for 
fetching data, in seconds.', false, true );
        }
        
        public function syncDBs() {
@@ -54,72 +83,179 @@
                }
        }
        
-       public function execute() {
-               global $wgMemc;
+       /**
+        * Bootstrap this maintenance script
+        * 
+        * Performs operations necessary for this maintenance script to run 
which 
+        * cannot or do not make sense to run in the constructor.
+        */
+       public function bootstrap() {
+               /**
+                * Set user-specified operations to perform
+                */
+               $operations = explode( ',', $this->getOption( 'op' ));
+               // check sanity of specified operations
+               if ( !$this->checkOperations( $operations )) {
+                       $this->error( 'Invalid operation specified.', true );
+               } else {
+                       $this->operations = $operations;
+               }
+
+               /**
+                * Set user-specified rating set threshold
+                */
+               $rating_set_threshold = $this->getOption( 'rating_sets', 
$this->rating_set_threshold );
+               if ( !is_numeric( $rating_set_threshold )) {
+                       $this->error( 'Rating sets must be numeric.', true );
+               } else {
+                       $this->rating_set_threshold = $rating_set_threshold;
+               }
+
+               /**
+                * Set user-specified polling period
+                */
+               $polling_period = $this->getOption( 'poll_period', 
$this->polling_period );
+               if ( !is_numeric( $polling_period )) {
+                       $this->error( 'Poll period must be numeric.', true );
+               } else {
+                       $this->polling_period = $polling_period;
+               }
+               
+               // set db objects
                $this->dbr = wfGetDB( DB_SLAVE );
                $this->dbw = wfGetDB( DB_MASTER );
+       }
+       
+       /**
+        * Check whether or not specified operations are valid.
+        * 
+        * A specified operation is considered valid if it exists
+        * as a key in the operation map.
+        * 
+        * @param array $ops An array of operations to check
+        * @return bool
+        */
+       public function checkOperations( array $ops ) {
+               foreach ( $ops as $operation ) {
+                       if ( !isset( $this->operation_map[ $operation ] )) {
+                               return false;
+                       }
+               }
+               return true;
+       }
+       
+       public function execute() {
+               // finish bootstrapping the script
+               $this->bootstrap();
                
-               // the data structure to store ratings for a given page
-               $ratings = array();  // stores rating-specific info
-               $rating_set_count = array(); // keep track of rating sets
-               $highs_and_lows = array(); // store highest/lowest rated page 
stats
-               $averages = array(); // store overall averages for a given page
+               // execute requested operations
+               foreach ( $this->operations as $operation ) {
+                       $method = $this->operation_map[ $operation ];
+                       $this->$method();
+               }
+       }
+       
+       public function populateProblems() {
+               global $wgMemc;
                
-               // fetch the ratings since the lower bound timestamp
-               $this->output( 'Fetching page ratings between now and ' . 
date('Y-m-d H:i:s', strtotime( $this->getLowerBoundTimestamp())) . "...\n");
-               $res = $this->dbr->select(
-                       'article_feedback', 
+               /**
+                * Chck to see if we already have a collection of pages to 
operate on.
+                * If not, generate the collection of pages and their 
associated ratings.
+                */
+               if ( !isset( $this->pages )) {
+                       $ts = $this->getLowerBoundTimestamp();
+                       $this->pages = $this->populatePageRatingsSince( $ts );
+               }
+               $problems = array();
+               // iterate through pages, look for pages that meet criteria for 
problem articles
+               $this->output( "Finding problem articles ...\n" );
+               foreach ( $this->pages as $page ) {
+                       // make sure that we have more rating sets than the 
req'd threshold for this page in order to qualify for calculating
+                       if ( $page->rating_set_count < 
$this->rating_set_threshold ) {
+                               continue;
+                       }
+                       
+                       if ( $page->isProblematic() ) {
+                               array_push( $problems, $page->page_id );
+                       }
+               }
+               
+               // populate stats table with problem articles & associated data
+               // fetch stats type id - add stat type if it's non-existant
+               $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 
'problems' );
+               if ( !$stats_type_id ) {
+                       $stats_type_id = $this->addStatType( 'problems' );
+               }
+               foreach( $problems as $page_id ) {
+                       $page = $this->pages->getPage( $page_id );
+                       $rows[] = array(
+                               'afs_page_id' => $page_id,
+                               'afs_orderable_data' => $page->overall_average,
+                               'afs_data' => FormatJson::encode( 
$page->rating_averages ),
+                               'afs_ts' => $cur_ts,
+                               'afs_stats_type_id' => $stats_type_id,
+                       );
+               }
+               $this->output( "Done.\n" );
+               
+               // populate cache with current problem articles
+               // loading data into cache
+               $this->output( "Caching latest problems (if cache present).\n" 
);
+               $key = wfMemcKey( 'article_feedback_stats_problems' );
+               $result = $this->dbr->select(
+                       'article_feedback_stats',
+                       array(
+                               'afs_page_id',
+                               'afs_orderable_data',
+                               'afs_data'
+                       ),
                        array( 
-                               'aa_revision',
-                               'aa_user_text',
-                               'aa_rating_id',
-                               'aa_user_anon_token',
-                               'aa_page_id', 
-                               'aa_rating_value',
-                       ), 
-                       array( 'aa_timestamp >= ' . $this->dbr->addQuotes( 
$this->getLowerBoundTimestamp() ) ),
+                               'afs_ts' => $cur_ts,
+                               'afs_stats_type_id' => $stats_type_id 
+                       ),
                        __METHOD__,
-                       array()
+                       array( "ORDER BY" => "afs_orderable_data" )
                );
+               // grab the article feedback special page so we can reuse the 
data structure building code
+               // FIXME this logic should not be in the special page class
+               $problems = SpecialArticleFeedback::buildProblems( $result );
+               // stash the data structure in the cache
+               $wgMemc->set( $key, $problems, 86400 );
+               $this->output( "Done.\n" );
+       }
+       
+       /**
+        * Populate stats about highest/lowest rated articles
+        */
+       public function populateHighsLows() {
+               global $wgMemc;
                
-               // assign the rating data to our data structure
-               foreach ( $res as $row ) {
-                       // determine the unique hash for a given rating set 
(page rev + user identifying info)
-                       $rating_hash = md5( $row->aa_revision . 
$row->aa_user_text . $row->aa_user_anon_token );
-                       
-                       // keep track of how many rating sets a particular page 
has
-                       if ( !isset( $rating_count[ $row->aa_page_id ][ 
$rating_hash ] )) {
-                               // we store the rating hash as a key rather 
than value as checking isset( $arr[$hash] ) is way faster
-                               // than doing something like array_search( 
$hash, $arr ) when dealing with large arrays
-                               $rating_set_count[ $row->aa_page_id ][ 
$rating_hash ] = 1;
-                       }
-                       
-                       $ratings[ $row->aa_page_id ][ $row->aa_rating_id ][] = 
$row->aa_rating_value; 
+               $averages = array(); // store overall averages for a given page
+               
+               /**
+                * Chck to see if we already have a collection of pages to 
operate on.
+                * If not, generate the collection of pages and their 
associated ratings.
+                */
+               if ( !isset( $this->pages )) {
+                       $ts = $this->getLowerBoundTimestamp();
+                       $this->pages = $this->populatePageRatingsSince( $ts );
                }
-               $this->output( "Done\n" );
 
                // determine the average ratings for a given page
                $this->output( "Determining average ratings for articles ...\n" 
);
-               foreach ( $ratings as $page_id => $data ) {
-                       // make sure that we have at least 10 rating sets for 
this page in order to qualify for ranking
-                       if ( count( array_keys( $rating_set_count[ $page_id ] 
)) < 10 ) {
+               foreach ( $this->pages as $page ) {
+                       // make sure that we have more rating sets than the 
req'd threshold for this page in order to qualify for ranking
+                       if ( $page->rating_set_count < 
$this->rating_set_threshold ) {
                                continue;
                        }
                        
-                       // calculate the rating averages for a given page
-                       foreach( $data as $rating_id => $rating ) {
-                               $rating_sum = array_sum( $rating );
-                               $rating_avg = $rating_sum / count( $rating );
-                               $highs_and_lows[ $page_id ][ 'avg_ratings' ][ 
$rating_id ] = $rating_avg;
+                       // calculate the rating averages if they haven't 
already been calculated
+                       if ( !count( $page->rating_averages )) {
+                               $page->calculateRatingAverages();
                        }
                        
-                       // calculate the overall average for a page
-                       $overall_rating_sum = array_sum( $highs_and_lows[ 
$page_id ][ 'avg_ratings' ] );
-                       $overall_rating_average = $overall_rating_sum / count( 
$highs_and_lows[ $page_id ][ 'avg_ratings' ] );
-                       $highs_and_lows[ $page_id ][ 'average' ] = 
$overall_rating_average;
-                       
                        // store overall average rating seperately so we can 
easily sort
-                       $averages[ $page_id ] = $overall_rating_average;
+                       $averages[ $page->page_id ] = $page->overall_average;
                }
                $this->output( "Done.\n" );
 
@@ -129,35 +265,42 @@
                // take lowest 50 and highest 50
                $highest_and_lowest_page_ids = array_slice( $averages, 0, 50, 
true );
                if ( count( $averages ) > 50 ) {
+                       // in the event that we have < 100 $averages total, 
this will still
+                       // work nicely - it will select duplicate averages, but 
the +=
+                       // will cause items with the same keys to essentially 
be ignored
                        $highest_and_lowest_page_ids += array_slice( $averages, 
-50, 50, true );
                }
                $this->output( "Done\n" );
                
+               // fetch stats type id - add stat type if it's non-existant
+               $stats_type_id = SpecialArticleFeedback::getStatsTypeId( 
'highs_and_lows' );
+               if ( !$stats_type_id ) {
+                       $stats_type_id = $this->addStatType( 'highs_and_lows' );
+               }
+               
                // prepare data for insert into db
                $this->output( "Preparing data for db insertion ...\n");
                $cur_ts = $this->dbw->timestamp();
                $rows = array();
-               foreach( $highs_and_lows as $page_id => $data ) {
-                       // make sure this is one of the highest/lowest average 
ratings
-                       if ( !isset( $highest_and_lowest_page_ids[ $page_id ] 
)) {
-                               continue;
-                       }
+               foreach( $highest_and_lowest_page_ids as $page_id => 
$overall_average ) {
+                       $page = $this->pages->getPage( $page_id );
                        $rows[] = array(
-                               'afshl_page_id' => $page_id,
-                               'afshl_avg_overall' => $data[ 'average' ],
-                               'afshl_avg_ratings' => FormatJson::encode( 
$data[ 'avg_ratings' ] ),
-                               'afshl_ts' => $cur_ts,
+                               'afs_page_id' => $page_id,
+                               'afs_orderable_data' => $page->overall_average,
+                               'afs_data' => FormatJson::encode( 
$page->rating_averages ),
+                               'afs_ts' => $cur_ts,
+                               'afs_stats_type_id' => $stats_type_id,
                        );
                }
                $this->output( "Done.\n" );
 
                // insert data to db
-               $this->output( "Writing data to 
article_feedback_stats_highs_lows ...\n" );
+               $this->output( "Writing data to article_feedback_stats ...\n" );
                $rowsInserted = 0;
                while( $rows ) {
                        $batch = array_splice( $rows, 0, 
$this->insert_batch_size );
                        $this->dbw->insert( 
-                               'article_feedback_stats_highs_lows',
+                               'article_feedback_stats',
                                $batch,
                                __METHOD__
                        );
@@ -167,30 +310,96 @@
                }
                $this->output( "Done.\n" );
                
-               // loading data into caching
+               // loading data into cache
                $this->output( "Caching latest highs/lows (if cache 
present).\n" );
                $key = wfMemcKey( 'article_feedback_stats_highs_lows' );
                $result = $this->dbr->select(
-                       'article_feedback_stats_highs_lows',
+                       'article_feedback_stats',
                        array(
-                               'afshl_page_id',
-                               'afshl_avg_overall',
-                               'afshl_avg_ratings'
+                               'afs_page_id',
+                               'afs_orderable_data',
+                               'afs_data'
                        ),
-                       array( 'afshl_ts' => $cur_ts ),
+                       array( 
+                               'afs_ts' => $cur_ts,
+                               'afs_stats_type_id' => $stats_type_id 
+                       ),
                        __METHOD__,
-                       array( "ORDER BY" => "afshl_avg_overall" )
+                       array( "ORDER BY" => "afs_orderable_data" )
                );
                // grab the article feedback special page so we can reuse the 
data structure building code
                // FIXME this logic should not be in the special page class
                $highs_lows = SpecialArticleFeedback::buildHighsAndLows( 
$result );
                // stash the data structure in the cache
                $wgMemc->set( $key, $highs_lows, 86400 );
-               $this->output( "Done\n" );
+               $this->output( "Done\n" );              
        }
        
+       /**
+        * Fetch ratings newer than a given time stamp.
+        * 
+        * If no timestamp is provided, relies on $this->lowerBoundTimestamp
+        * @param numeric $ts
+        * @return database result object
+        */
+       public function fetchRatingsNewerThanTs( $ts=null ) {
+               if ( !$ts ) {
+                       $ts = $this->getLowerBoundTimestamp();
+               }
+               
+               if ( !is_numeric( $ts )) {
+                       throw new InvalidArgumentException( 'Timestamp expected 
to be numeric.' );
+               }
+               
+               $res = $this->dbr->select(
+                       'article_feedback', 
+                       array( 
+                               'aa_revision',
+                               'aa_user_text',
+                               'aa_rating_id',
+                               'aa_user_anon_token',
+                               'aa_page_id', 
+                               'aa_rating_value',
+                       ), 
+                       array( 'aa_timestamp >= ' . $this->dbr->addQuotes( $ts 
)),
+                       __METHOD__,
+                       array()
+               );
+               
+               return $res;
+       }
        
        /**
+        * Construct collection of pages and their ratings since a given time 
stamp
+        * @param $ts
+        * @return object The colelction of pages
+        */
+       public function populatePageRatingsSince( $ts ) {
+               $pages = new Pages();
+               // fetch the ratings since the lower bound timestamp
+               $this->output( 'Fetching page ratings between now and ' . date( 
'Y-m-d H:i:s', strtotime( $ts )) . "...\n" );
+               $res = $this->fetchRatingsNewerThanTs( $ts );
+               $this->output( "Done.\n" );
+                       
+               // assign the rating data to our data structure
+               $this->output( "Assigning fetched ratings to internal data 
structure ...\n" );
+               foreach ( $res as $row ) {
+                       // fetch the page from the page store referentially so 
we can
+                       // perform actions on it that will automagically be 
saved in the
+                       // object for easy access later
+                       $page =& $pages->getPage( $row->aa_page_id );
+                       
+                       // determine the unique hash for a given rating set 
(page rev + user identifying info)
+                       $rating_hash = $row->aa_revision . "|" . 
$row->aa_user_text . "|" . $row->aa_user_anon_token;
+                       
+                       // add rating data for this page
+                       $page->addRating( $row->aa_rating_id, 
$row->aa_rating_value, $rating_hash );    
+               }
+               $this->output( "Done.\n" );
+               return $pages;
+       }
+       
+       /**
         * Set $this->timestamp
         * @param int $ts
         */
@@ -216,7 +425,177 @@
                }
                return $this->lowerBoundTimestamp;
        }
+       
+       /**
+        * Add stat type record to article_feedbak_stats_types
+        * @param string $stat_type The identifying name of the stat type (eg 
'highs_lows')
+        */
+       public function addStatType( $stat_type ) {
+               $this->dbw->insert( 
+                       'article_feedback_stats',
+                       array( 'afst_type' => $stat_type ),
+                       __METHOD__
+               );
+               return $this->dbw->insertId();
+       }
 }
 
+/**
+ * A class to represent a page and data about its ratings
+ */
+class Page {
+       public $page_id;
+       
+       /**
+        * The number of rating sets recorded for this page
+        * @var int
+        */
+       public $rating_set_count = 0;
+       
+       /**
+        * An array of ratings for this page
+        * @var array
+        */
+       public $ratings = array();
+
+       /**
+        * An array to hold mean ratings by rating type id
+        * @var array
+        */
+       public $rating_averages = array();
+       
+       /**
+        * Mean of all ratings for this page
+        * @var float
+        */
+       public $overall_average;
+       
+       /**
+        * An array of rating set hashes, which are used to identify unique 
sets of
+        * ratings
+        * @var array
+        */
+       protected $rating_set_hashes = array();
+       
+       public function __construct( $page_id ) {
+               if ( !is_numeric( $page_id )) {
+                       throw new Exception( 'Page id must be numeric.' );
+               }
+               $this->page_id = $page_id;
+       }
+       
+       /**
+        * Add a new rating for this particular page
+        * @param int $rating_id
+        * @param int $rating_value
+        * @param string $rating_set_hash
+        */
+       public function addRating( $rating_id, $rating_value, $rating_set_hash 
= null ) {
+               $this->ratings[ $rating_id ][] = $rating_value;
+               
+               if ( $rating_set_hash ) {
+                       $this->trackRatingSet( $rating_set_hash );      
+               }
+       }
+       
+       /**
+        * Keep track of rating sets
+        * 
+        * Record when we see a new rating set and increment the set count
+        * @param string $rating_set_hash
+        */
+       protected function trackRatingSet( $rating_set_hash ) {
+               if ( isset( $this->rating_set_hashes[ $rating_set_hash ] )) {
+                       return;
+               }
+               
+               $this->rating_set_hashes[ $rating_set_hash ] = 1;
+               $this->rating_set_count += 1;
+       }
+       
+       public function calculateRatingAverages() {
+               // determine averages for each rating type
+               foreach( $this->ratings as $rating_id => $rating ) {
+                       $rating_sum = array_sum( $rating );
+                       $rating_avg = $rating_sum / count( $rating );
+                       $this->rating_averages[ $rating_id ] = $rating_avg;
+               }
+               
+               // determine overall rating average for this page               
+               if ( count( $this->rating_averages )) {
+                       $overall_rating_sum = array_sum( $this->rating_averages 
);
+                       $overall_rating_average = $overall_rating_sum / count( 
$this->rating_averages );
+               } else {
+                       $overall_rating_average = 0;
+               }
+               $this->overall_average = $overall_rating_average;
+       }
+       
+       /**
+        * Returns whether or not this page is considered problematic
+        * @return bool
+        */
+       public function isProblematic() {
+               if ( !isset( $this->problematic )) {
+                       $this->determineProblematicStatus();
+               }
+               return $this->probematic;
+       }
+       
+       /**
+        * Determine whether this article is  'problematic'
+        *
+        * If a page has one more rating categories where 70% of the ratings 
are 
+        * <= 2, it is considered problematic.
+        */
+       public function determineProblematicStatus() {
+               foreach( $this->ratings as $rating_id => $ratings ) {
+                       $count = 0;
+                       foreach ( $ratings as $rating ) {
+                               if ( $rating <= 2 ) {
+                                       $count += 1;
+                               }
+                       }
+                               
+                       $threshold = round( 0.7 * count( $ratings ));
+                       if ( $count >= $threshold ) {
+                               $this->problematic = true;
+                               return;
+                       }
+               }
+               
+               $this->problematic = false;
+               return;
+       }
+}
+
+/**
+ * A storage class to keep track of PageRatings object by page
+ * 
+ * Iterable on array of pages.
+ */
+class Pages implements IteratorAggregate {
+       /**
+        * An array of page rating objects
+        * @var array
+        */
+       public $pages = array();
+       
+       public function getPage( $page_id ) {
+               if ( !isset( $this->pages[ $page_id ] )) {
+                       $this->addPage( $page_id );
+               }
+               return $this->pages[ $page_id ];
+       }
+       
+       public function addPage( $page_id ) {
+               $this->pages[ $page_id ] = new Page( $page_id );
+       }
+       
+       public function getIterator() {
+               return new ArrayIterator( $this->pages );
+       }
+}
+
 $maintClass = "PopulateAFStatistics";
 require_once( DO_MAINTENANCE );

Added: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql
===================================================================
--- trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql       
                        (rev 0)
+++ trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTable.sql       
2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS article_feedback_stats;
+CREATE TABLE IF NOT EXISTS /*_*/article_feedback_stats (
+       afs_page_id integer unsigned NOT NULL,
+       -- data point to be used for ordering this data
+       afs_orderable_data double unsigned NOT NULL,
+       -- json object of stat data
+       afs_data varbinary(255) NOT NULL,
+       afs_stats_type_id integer unsigned NOT NULL,
+       -- timestamp of insertion job   
+       afs_ts binary(14) NOT NULL
+) /*$wgDBTableOptions*/;
+CREATE UNIQUE INDEX /*i*/ afs_page_ts_type ON /*_*/ article_feedback_stats( 
afs_page_id, afs_ts, afs_stats_type_id );
+CREATE INDEX /*i*/ afs_ts_avg_overall ON /*_*/article_feedback_stats (afs_ts, 
afs_orderable_data);

Added: trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql
===================================================================
--- trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql   
                        (rev 0)
+++ trunk/extensions/ArticleFeedback/sql/AddArticleFeedbackStatsTypeTable.sql   
2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,9 @@
+CREATE TABLE IF NOT EXISTS /*_*/ article_feedback_stats_types (
+       afst_id integer unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+       afst_type varbinary(255) NOT NULL
+) /*$wgDBTableOptions*/;
+CREATE UNIQUE INDEX /*i*/ afst_type ON /*_*/ article_feedback_stats_types( 
afst_type );
+
+-- Pre-populate table with stat types
+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 
'highs_and_lows' );
+INSERT INTO article_feedback_stats_types ( afst_type ) VALUES ( 'problems' );
\ No newline at end of file

Added: 
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql
===================================================================
--- 
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql   
                            (rev 0)
+++ 
trunk/extensions/ArticleFeedback/sql/MigrateArticleFeedbackStatsHighsLows.sql   
    2011-06-01 18:32:17 UTC (rev 89277)
@@ -0,0 +1,22 @@
+-- migrate data from article_feedback_stats_highs_lows into 
article_feedback_stats
+INSERT INTO /*_*/article_feedback_stats (
+       afs_page_id,
+       afs_orderable_data,
+       afs_data,
+       afs_ts,
+       afs_stats_type_id
+) 
+SELECT 
+       afshl_page_id,
+       afshl_avg_overall,
+       afshl_avg_ratings,
+       afshl_ts,
+       afst_id
+FROM 
+       /*_*/article_feedback_stats_highs_lows,
+       /*_*/article_feedback_stats_types
+WHERE 
+       /*_*/article_feedback_stats_types.afst_type='highs_and_lows';
+
+-- get rid of article_feedback_stats_highs_lows as it is no longer necessary
+DROP TABLE /*_*/article_feedback_stats_highs_lows;


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to