https://www.mediawiki.org/wiki/Special:Code/MediaWiki/112494

Revision: 112494
Author:   maxsem
Date:     2012-02-27 17:38:12 +0000 (Mon, 27 Feb 2012)
Log Message:
-----------
A quick script to collect extract length stats

Added Paths:
-----------
    trunk/extensions/MobileFrontend/collectExtractStats.php

Added: trunk/extensions/MobileFrontend/collectExtractStats.php
===================================================================
--- trunk/extensions/MobileFrontend/collectExtractStats.php                     
        (rev 0)
+++ trunk/extensions/MobileFrontend/collectExtractStats.php     2012-02-27 
17:38:12 UTC (rev 112494)
@@ -0,0 +1,73 @@
+<?php
+
+$IP = getenv( 'MW_INSTALL_PATH' );
+if ( $IP === false ) {
+       $IP = dirname( __FILE__ ) . '/../..';
+}
+require_once( "$IP/maintenance/Maintenance.php" );
+
+class CollectStats extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = 'Developer script that calculates average 
full extract size';
+               $this->addArg( 'rate', 'Check excerpt length for one page of 
this number', true );
+       }
+
+       public function execute() {
+               if ( !class_exists( 'ApiQueryExcerpt' ) ) {
+                       $this->error( 'This script requires MobileFrontend to 
be properly installed', true );
+               }
+               $rate = $this->getArg( 0 );
+               $ns = array( NS_MAIN );
+               $pageId = 0;
+               $dbr = $this->getDB( DB_SLAVE );
+               $total = 0;
+               $calls = 0;
+               $html = 0;
+               $plain = 0;
+               do {
+                       $res = $dbr->select( 'page',
+                               array( 'page_id', 'page_namespace', 
'page_title' ),
+                               array( 'page_namespace' => $ns, 
'page_is_redirect' => 0, "page_id > $pageId" ),
+                               __METHOD__,
+                               array( 'ORDER BY' => 'page_id', 'LIMIT' => 500 )
+                       );
+                       foreach ( $res as $row ) {
+                               $pageId = $row->page_id;
+                               if ( $total++ % $rate == 0 ) {
+                                       $title = Title::newFromRow( $row );
+                                       $html += $this->getLength( $title, 
false );
+                                       $plain += $this->getLength( $title, 
true );
+                                       if ( ++$calls % 10 == 0 ) {
+                                               $this->output( "$calls\n" );
+                                       }
+                               }
+                       }
+               } while( $res->numRows() > 0 );
+               
+               $this->output( "Total pages processed: $calls\n" );
+               if ( $calls > 0 ) {
+                       $html /= $calls;
+                       $plain /= $calls;
+                       $this->output( "   Average HTML length: $html\n   
Average plaintext length: $plain" );
+               }
+       }
+
+       private function getLength( Title $title, $plainText ) {
+               $params = array(
+                       'action' => 'query',
+                       'prop' => 'excerpt',
+                       'titles' => $title->getPrefixedText(),
+               );
+               if ( $plainText ) {
+                       $params['explaintext'] = 1;
+               }
+               $main = new ApiMain( new FauxRequest( $params ) );
+               $main->execute();
+               $data = $main->getResultData();
+               return strlen( 
$data['query']['pages'][$title->getArticleID()]['excerpt'][0] );
+       }
+}
+
+$maintClass = 'CollectStats';
+require_once( DO_MAINTENANCE );
\ No newline at end of file


Property changes on: trunk/extensions/MobileFrontend/collectExtractStats.php
___________________________________________________________________
Added: svn:eol-style
   + native


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to