Aude has submitted this change and it was merged.

Change subject: (Bug 47125) ChunkCache for speeding up dispatchChanges.
......................................................................


(Bug 47125) ChunkCache for speeding up dispatchChanges.

This allows changes to be cached between dispatcher passes, so the same change
does not have to be re-loaded for every target wiki.

Change-Id: I677d5fe46fcd7cf565443aa581f69e73c28fa940
---
M lib/WikibaseLib.classes.php
M lib/WikibaseLib.hooks.php
M lib/config/WikibaseLib.default.php
M lib/includes/ChangesTable.php
A lib/includes/store/ChunkAccess.php
A lib/includes/store/ChunkCache.php
M lib/maintenance/dispatchChanges.php
A lib/tests/phpunit/store/ChunkCacheTest.php
A lib/tests/phpunit/store/MockChunkAccess.php
9 files changed, 736 insertions(+), 38 deletions(-)

Approvals:
  Aude: Verified; Looks good to me, approved
  jenkins-bot: Checked



diff --git a/lib/WikibaseLib.classes.php b/lib/WikibaseLib.classes.php
index 0d303f7..3c888c8 100644
--- a/lib/WikibaseLib.classes.php
+++ b/lib/WikibaseLib.classes.php
@@ -116,6 +116,8 @@
                'Wikibase\Lib\Serializers\Unserializer' => 
'includes/serializers/Unserializer.php',
 
                // includes/store
+               'Wikibase\ChunkCache' => 'includes/store/ChunkCache.php',
+               'Wikibase\ChunkAccess' => 'includes/store/ChunkAccess.php',
                'Wikibase\EntityLookup' => 'includes/store/EntityLookup.php',
                'Wikibase\PropertyLookup' => 
'includes/store/PropertyLookup.php',
                'Wikibase\SiteLinkCache' => 'includes/store/SiteLinkCache.php',
@@ -151,6 +153,7 @@
                'Wikibase\Lib\Test\Serializers\UnserializerBaseTest' => 
'tests/phpunit/serializers/UnserializerBaseTest.php',
                'Wikibase\Test\MockRepository' => 
'tests/phpunit/MockRepository.php',
                'Wikibase\Test\EntityLookupTest' => 
'tests/phpunit/EntityLookupTest.php',
+               'Wikibase\Test\MockChunkAccess' => 
'tests/phpunit/store/MockChunkAccess.php'
        );
 
        return $classes;
diff --git a/lib/WikibaseLib.hooks.php b/lib/WikibaseLib.hooks.php
index 069a109..435ae16 100644
--- a/lib/WikibaseLib.hooks.php
+++ b/lib/WikibaseLib.hooks.php
@@ -61,6 +61,7 @@
                        'serializers/Serializer',
                        'serializers/SnakSerializer',
 
+                       'store/ChunkCache',
                        'store/SiteLinkLookup',
                        'store/SiteLinkTable',
                        'store/WikiPageEntityLookup',
diff --git a/lib/config/WikibaseLib.default.php 
b/lib/config/WikibaseLib.default.php
index 177ef26..7ec0fa5 100644
--- a/lib/config/WikibaseLib.default.php
+++ b/lib/config/WikibaseLib.default.php
@@ -60,6 +60,7 @@
 $wgWBSettings['localClientDatabases'] = array();
 
 $wgWBSettings['dispatchBatchChunkFactor'] = 3;
+$wgWBSettings['dispatchBatchCacheFactor'] = 3;
 
 $wgWBSettings['changeHandlers'] = array(
        'wikibase-item~add' => 'Wikibase\ItemChange',
diff --git a/lib/includes/ChangesTable.php b/lib/includes/ChangesTable.php
index 54291cb..2184ebb 100644
--- a/lib/includes/ChangesTable.php
+++ b/lib/includes/ChangesTable.php
@@ -29,7 +29,7 @@
  * @licence GNU GPL v2+
  * @author Jeroen De Dauw < [email protected] >
  */
-class ChangesTable extends \ORMTable {
+class ChangesTable extends \ORMTable implements ChunkAccess {
 
        /**
         * Constructor.
@@ -149,4 +149,43 @@
 
                return $values;
        }
+
+       /**
+        * Returns a chunk of Change records, starting at the given change ID.
+        *
+        * @param int $start The change ID to start at
+        * @param int $size  The desired number of Change objects
+        *
+        * @return Change[]
+        */
+       public function loadChunk( $start, $size ) {
+               wfProfileIn( __METHOD__ );
+
+               $changes = $this->selectObjects(
+                       null,
+                       array(
+                               'id >= ' . intval( $start )
+                       ),
+                       array(
+                               'LIMIT' => $size,
+                               'ORDER BY ' => $this->getPrefixedField( 'id' ) 
. ' ASC'
+                       ),
+                       __METHOD__
+               );
+
+               wfProfileOut( __METHOD__ );
+               return $changes;
+       }
+
+       /**
+        * Returns the sequential ID of the given Change.
+        *
+        * @param Change $rec
+        *
+        * @return int
+        */
+       public function getRecordId( $rec ) {
+               /* @var Change $rec */
+               return $rec->getId();
+       }
 }
diff --git a/lib/includes/store/ChunkAccess.php 
b/lib/includes/store/ChunkAccess.php
new file mode 100644
index 0000000..703bad2
--- /dev/null
+++ b/lib/includes/store/ChunkAccess.php
@@ -0,0 +1,60 @@
+<?php
+/**
+ * Interface for DAO objects providing chunked access.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+
+namespace Wikibase;
+
+
+/**
+ * Interface for DAO objects providing chunked access based on sequential 
indexes.
+ * "holes" in the index sequence are acceptable but should not be frequent.
+ *
+ * @package Wikibase
+ */
+interface ChunkAccess {
+
+       /**
+        * Returns a chunk as a list of whatever object is used for data 
records by
+        * the implementing class.
+        *
+        * @param int $start The first ID in the chunk
+        * @param int $size  The desired size of the chunk
+        *
+        * @return array the desired chunk of rows/objects
+        */
+       public function loadChunk( $start, $size );
+
+       /**
+        * Returns the sequential ID of the given data record.
+        *
+        * @param mixed $rec
+        *
+        * @return int
+        */
+       public function getRecordId( $rec );
+
+}
\ No newline at end of file
diff --git a/lib/includes/store/ChunkCache.php 
b/lib/includes/store/ChunkCache.php
new file mode 100644
index 0000000..8edcac5
--- /dev/null
+++ b/lib/includes/store/ChunkCache.php
@@ -0,0 +1,366 @@
+<?php
+/**
+ * Interface for DAO objects providing chunked access.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+
+namespace Wikibase;
+
+
+class ChunkCache implements ChunkAccess {
+
+       /**
+        * @var ChunkAccess
+        */
+       protected $source;
+
+       /**
+        * Array containing cache entries; each entry is an associative array 
with the
+        * following fields:
+        *
+        * - start: the chunk's data (an array of data records)
+        * - next:  the next ID after the records in this chunk
+        * - data:  an array of data records
+        *
+        * The entries array is maintained sorted by the 'start' field of its 
entries.
+        *
+        * @var array
+        */
+       protected $entries = array();
+
+       /**
+        * @var int
+        */
+       protected $size = 0;
+
+       /**
+        * @var int
+        */
+       protected $maxSize;
+
+       /**
+        * @var int
+        */
+       protected $chunkSize;
+
+       /**
+        * @var int
+        */
+       protected $hitCount = 0;
+
+       /**
+        * @var int
+        */
+       protected $missCount = 0;
+
+       /**
+        * modification counter (logical clock)
+        *
+        * @var int
+        */
+       protected $modCount = 0;
+
+       /**
+        * @param ChunkAccess $source    The source to load from
+        * @param int         $chunkSize The size of the chunk to load, if we 
have a choice
+        * @param int         $maxSize   The maximum size this cache can grow to
+        *
+        * @throws MWException
+        */
+       public function __construct( ChunkAccess $source, $chunkSize, $maxSize 
) {
+               $this->source = $source;
+               $this->maxSize = $maxSize;
+               $this->chunkSize = $chunkSize;
+
+               if ( $this->maxSize < $this->chunkSize ) {
+                       throw new MWException( "chunk size must be smaller than 
total max size" );
+               }
+       }
+
+       /**
+        * Finds the position for the given key in the internal entry array.
+        * This is implemented using iterative binary search.
+        *
+        * @note This is only public for testing, there is no need to call this 
function directly.
+        *
+        * @param int $key
+        *
+        * @return int the position if found, or the negative insert position 
minus one, if not.
+        */
+       public function findEntryPosition( $key ) {
+               assert( '$key >= 0' );
+
+               if ( empty( $this->entries ) ) {
+                       return -1;
+               }
+
+               $low = 0;
+               $high = count( $this->entries ) -1;
+
+               $bottom = $this->entries[$low];
+               $top = $this->entries[$high];
+
+               if ( $key < $bottom['start'] ) {
+                       return -1;
+               }
+
+               if ( $key >= $top['next'] ) {
+                       return -$high -2;
+               }
+
+               while ( $low <= $high ) {
+                       assert( '$high >= 0' );
+                       assert( '$low >= 0' );
+
+                       $mid = intval( floor( ( $low + $high ) / 2 ) );
+
+                       $entry = $this->entries[$mid];
+
+                       if ( $key < $entry['start'] ) {
+                               $high = $mid -1;
+                       } else if ( $key >= $entry['next'] ) {
+                               $low = $mid +1;
+                       } else {
+                               return $mid;
+                       }
+               }
+
+               // not found
+               return -$low -1;
+       }
+
+       /**
+        * Returns a chunk as a list of whatever object is used for data 
records by
+        * the implementing class.
+        *
+        * @param int $start The first ID in the chunk
+        * @param int $size  The desired size of the chunk
+        *
+        * @return array the desired chunk of rows/objects
+        */
+       public function loadChunk( $start, $size ) {
+               $result = array();
+               $remaining = $size;
+
+               while ( $remaining > 0 ) {
+                       $maxPos = count( $this->entries ) -1;
+                       $pos = $this->findEntryPosition( $start );
+
+                       if ( $pos >= 0 ) {
+                               // the desired start key is cached
+
+                               $entry = $this->entries[ $pos ];
+                               $this->entries[ $pos ]['touched'] = 
++$this->modCount; // bump
+
+                               $hit = true;
+                       } else {
+                               // the desired start key is not cached
+
+                               $ipos = -$pos -1; // insert position
+
+                               if ( $ipos <= $maxPos && $maxPos >= 0 ) {
+                                       // we are inserting before an existing 
entry, so clip the size.
+
+                                       $next = $this->entries[ $ipos ];
+                                       assert( '$start < $next[\'start\']' );
+
+                                       $partSize = min( $this->chunkSize, 
$next['start'] - $start );
+                               } else {
+                                       // we are inserting after the last 
cache entry, load as much as we can.
+
+                                       $partSize = $this->chunkSize;
+                               }
+
+                               $entry = $this->insertChunk( $start, $partSize, 
$ipos );
+
+                               if ( !$entry ) {
+                                       // nothing could be loaded, perhaps old 
records got pruned?
+                                       // If we are < $maxPos, we could 
advance $start by 1 and try again...
+                                       break;
+                               }
+
+                               $hit = false;
+                       }
+
+                       $offset = $start - $entry['start']; // offset inside 
the cached data
+
+                       $part = array_slice( $entry['data'], $offset, 
$remaining );
+                       $partSize = count( $part );
+                       $result = array_merge( $result, $part );
+
+                       // update start and remaining
+                       $start = $entry['next'];
+                       $remaining -= $partSize;
+
+                       if ( $hit ) {
+                               $this->hitCount += $partSize;
+                       } else {
+                               $this->missCount += $partSize;
+                       }
+               }
+
+               return $result;
+       }
+
+       /**
+        * @param int $start the ID to start loading at
+        * @param int $size the maximum size of the chunk to load
+        * @param int $before insert into the internal entry list before this 
position.
+        *
+        * @return array|bool the cache entry created by inserting the new 
chunk, or false if
+        *         there is no more data to load from the source at the given 
position.
+        *         The cache entry is an associative array containing the 
following keys:
+        *         - start: the key the chunk starts at
+        *         - data:  a list of data records
+        *         - next:  the id the following chunk starts at (or after)
+        *         - touched: (logical) timestamp of the entry's creation 
(taken from $this->modCount)
+        */
+       protected function insertChunk( $start, $size, $before ) {
+               assert( '$start >= 0' );
+               assert( '$size >= 0' );
+               assert( '$before >= 0' );
+
+               $data = $this->source->loadChunk( $start, $size );
+
+               if ( empty( $data ) ) {
+                       return false;
+               }
+
+               $last = end( $data );
+
+               $next = $this->source->getRecordId( $last ) +1;
+
+               reset( $data );
+
+               $entry = array(
+                       'start' => $start,
+                       'data' => $data,
+                       'next' => $next,
+                       'touched' => ++$this->modCount,
+               );
+
+               $this->entries = array_merge(
+                       array_slice( $this->entries, 0, $before ),
+                       array( $entry ),
+                       array_slice( $this->entries, $before )
+               );
+
+               $this->size += count( $data );
+
+               $this->prune();
+
+               return $entry;
+       }
+
+       /**
+        * Removes least recently used chunks until the total size is smaller 
than the max size
+        * specified in the constructor.
+        *
+        * Note that this implementation is rather inefficient for large number 
of chunks.
+        */
+       protected function prune() {
+               if ( $this->size <= $this->maxSize ) {
+                       return;
+               }
+
+               $lru = $this->entries; // copy (PHP is crazy like that)
+               usort( $lru,
+                       function ( $a, $b ) {
+                               return $a['touched'] - $b['touched'];
+                       }
+               );
+
+               while ( $this->size > $this->maxSize && !empty( $this->entries 
) ) {
+                       $entry = array_shift( $lru );
+
+                       $this->dropChunk( $entry['start'] );
+               }
+       }
+
+       /**
+        * Remove the chunk with the given start key from the cache.
+        * Used during pruning.
+        *
+        * @param $startKey
+        *
+        * @return bool
+        */
+       protected function dropChunk( $startKey ) {
+               foreach ( $this->entries as $pos => $entry ) {
+                       if ( $entry['start'] === $startKey ) {
+                               unset( $this->entries[$pos] );
+
+                               // re-index
+                               $this->entries = array_values( $this->entries );
+                               $this->size -= count( $entry['data'] );
+
+                               return true;
+                       }
+               }
+
+               return false;
+       }
+
+       /**
+        * Returns the sequential ID of the given data record.
+        *
+        * @param mixed $rec
+        *
+        * @return int
+        */
+       public function getRecordId( $rec ) {
+               return $this->source->getRecordId( $rec );
+       }
+
+       /**
+        * Returns the current size of the cache.
+        *
+        * @return int
+        */
+       public function getSize() {
+               return $this->size();
+       }
+
+       /**
+        * Resets internal hit/miss statistics
+        */
+       public function resetStarts() {
+               $this->hitCount = 0;
+               $this->missCount = 0;
+       }
+
+       /**
+        * Returns this cache's hit ratio
+        */
+       public function getHitRatio() {
+               $total = $this->hitCount + $this->missCount;
+
+               if ( $total === 0 ) {
+                       return 0;
+               }
+
+               return $this->hitCount / $total;
+       }
+}
\ No newline at end of file
diff --git a/lib/maintenance/dispatchChanges.php 
b/lib/maintenance/dispatchChanges.php
index ac92390..e52d726 100644
--- a/lib/maintenance/dispatchChanges.php
+++ b/lib/maintenance/dispatchChanges.php
@@ -46,6 +46,11 @@
        protected $changesTable;
 
        /**
+        * @var ChunkCache: cache for changes
+        */
+       protected $changesCache;
+
+       /**
         * @var string: the logical name of the repository's database
         */
        protected $repoDB;
@@ -66,6 +71,12 @@
         *           based on $this->batchSize.
         */
        protected $batchChunkFactor;
+
+       /**
+        * @var int: factor used to compute the maximum size of the chunk 
cache. The total cache size is
+        *           $this->batchSize * $this->batchChunkFactor * 
$this->batchCacheFactor
+        */
+       protected $batchCacheFactor;
 
        /**
         * @var int: the number of client update passes to perform before 
exiting.
@@ -146,6 +157,7 @@
                $this->repoDB = Settings::get( 'changesDatabase' );
                $this->clientWikis = Settings::get( 'localClientDatabases' );
                $this->batchChunkFactor = Settings::get( 
'dispatchBatchChunkFactor' );
+               $this->batchCacheFactor = Settings::get( 
'dispatchBatchCacheFactor' );
 
                $this->batchSize = intval( $this->getOption( 'batch-size', 1000 
) );
                $this->maxTime = intval( $this->getOption( 'max-time', 
PHP_INT_MAX ) );
@@ -155,6 +167,10 @@
                $this->lockGraceInterval = intval( $this->getOption( 
'lock-grace-interval', 60 ) );
 
                $this->verbose = $this->getOption( 'verbose', false );
+
+               $cacheChunkSize = $this->batchSize * $this->batchChunkFactor;
+               $cacheSize = $cacheChunkSize * $this->batchCacheFactor;
+               $this->changesCache = new ChunkCache( $this->changesTable, 
$cacheChunkSize, $cacheSize );
 
                // make sure we have a mapping from siteId to database name in 
clientWikis:
                foreach ( $this->clientWikis as $siteID => $dbName ) {
@@ -789,7 +805,7 @@
 
                while ( $batchSize < $this->batchSize ) {
                        // get a chunk of changes
-                       $chunk = $this->selectChanges( $after, $chunkSize );
+                       $chunk = $this->changesCache->loadChunk( $after+1, 
$chunkSize );
 
                        if ( empty( $chunk ) ) {
                                break; // no more changes
@@ -813,44 +829,10 @@
 
                wfProfileOut( __METHOD__ );
 
-               $this->trace( "Got pending changes." );
+               $this->trace( "Got " . count( $batch ) . " pending changes. "
+                       . sprintf( "Cache hit rate is %2d%%", 
$this->changesCache->getHitRatio() * 100 ) );
 
                return array( $batch, $lastIdSeen );
-       }
-
-       /**
-        * Returns a list of Change objects loaded from the changes table.
-        *
-        * The list will have at most $limit entries, all IDs will be greater 
than $after,
-        * and it will be sorted with IDs in ascending order.
-        *
-        * @param int $after: The change ID from which to start
-        * @param int $limit: The maximum number of changes to return
-        *
-        * @return Change[] any changes matching the above criteria.
-        */
-       public function selectChanges( $after, $limit ) {
-               $this->trace( "Selecting $limit changes." );
-
-               wfProfileIn( __METHOD__ );
-
-               $changes = $this->changesTable->selectObjects(
-                       null,
-                       array(
-                               'id > ' . intval( $after )
-                       ),
-                       array(
-                               'LIMIT' => $limit,
-                               'ORDER BY ' => 
$this->changesTable->getPrefixedField( 'id' ) . ' ASC'
-                       ),
-                       __METHOD__
-               );
-
-               wfProfileOut( __METHOD__ );
-
-               $this->trace( "Selected $limit changes." );
-
-               return $changes;
        }
 
        /**
diff --git a/lib/tests/phpunit/store/ChunkCacheTest.php 
b/lib/tests/phpunit/store/ChunkCacheTest.php
new file mode 100644
index 0000000..8a34687
--- /dev/null
+++ b/lib/tests/phpunit/store/ChunkCacheTest.php
@@ -0,0 +1,148 @@
+<?php
+namespace Wikibase\Test;
+
+use Wikibase\ChunkCache;
+
+/**
+ * Tests for the Wikibase\ChunkCache class.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ * @ingroup Test
+ *
+ * @group WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+
+class ChunkCacheTest extends \MediaWikiTestCase {
+
+       protected static function getTestData() {
+               static $data = array();
+
+               if ( empty( $data ) ) {
+                       for ( $i = 0; $i < 100; $i++ ) {
+                               $data[$i] = strval( $i );
+                       }
+               }
+
+               return $data;
+       }
+
+       protected static function makeCacheAction( $start, $length, $info ) {
+               $data = self::getTestData();
+
+               return array(
+                       'start' => $start,
+                       'length' => $length,
+                       'expected' => array_slice( $data, $start, $length ),
+                       'info' => $info
+               );
+       }
+
+       public static function provideLoadChunk() {
+               return array(
+                       array( // #0: basic loading
+                               10,  // chunkSize
+                               50, // maxSize
+                               array(
+                                       self::makeCacheAction(  0, 4, 'start at 
the start' ),
+                                       self::makeCacheAction( 10, 4, 'start at 
ten' ),
+                                       self::makeCacheAction( 98, 5, 'exceed 
end' ),
+                               )
+                       ),
+
+                       array( // #1: matching & loading
+                               10,  // chunkSize
+                               50, // maxSize
+                               array(
+                                       self::makeCacheAction( 20, 4, 'start in 
the middle' ),
+
+                                       self::makeCacheAction( 16, 4, 'fit 
block before' ),
+                                       self::makeCacheAction( 24, 4, 'fit 
block after' ),
+
+                                       self::makeCacheAction( 14, 4, 'overlap 
block before' ),
+                                       self::makeCacheAction( 26, 4, 'overlap 
block after' ),
+
+                                       self::makeCacheAction(  7, 4, 'detached 
block before' ),
+                                       self::makeCacheAction( 33, 4, 'detached 
block after' ),
+
+                                       self::makeCacheAction( 21, 2, 'single 
chunk match' ),
+                                       self::makeCacheAction( 18, 8, 'multi 
chunk match' ),
+                               )
+                       ),
+
+                       array( // #2: pruning
+                               3, // chunkSize
+                               7, // maxSize
+                               array(
+                                       self::makeCacheAction( 3, 3, 'first 
chunk fits' ),
+                                       self::makeCacheAction( 0, 3, 'second 
chunk fits' ),
+                                       self::makeCacheAction( 2, 4, 'third 
chunk is a hit' ),
+                                       self::makeCacheAction( 16, 4, 'fourth 
chunk triggers prune' ),
+                                       self::makeCacheAction( 22, 4, 'fifth 
chunk triggers prune' ),
+                                       self::makeCacheAction( 26, 4, 'sixth 
chunk triggers prune' ),
+                               )
+                       ),
+
+               );
+       }
+
+       /**
+        * @dataProvider provideLoadChunk
+        */
+       public function testLoadChunk( $chunkSize, $maxSize, $sequence ) {
+               $data = self::getTestData();
+
+               $store = new MockChunkAccess( $data );
+               $cache = new ChunkCache( $store, $chunkSize, $maxSize );
+
+               foreach ( $sequence as $action ) {
+                       $start = $action['start'];
+                       $length = $action['length'];
+                       $expected = $action['expected'];
+                       $info = $action['info'];
+
+                       $chunk = $cache->loadChunk( $start, $length );
+                       $this->assertEquals( $expected, $chunk, $info );
+               }
+       }
+
+       /**
+        * Fuzz test for discovering unexpected issues
+        */
+       public function testFuzz() {
+               $data = self::getTestData();
+
+               $store = new MockChunkAccess( $data );
+               $cache = new ChunkCache( $store, 10, 50 );
+
+               for ( $i = 0; $i < 100; $i++ ) {
+                       $start = mt_rand( 0, 110 );
+                       $length = mt_rand( 1, 20 );
+                       $expected =  array_slice( $data, $start, $length );
+                       $info = "fuzz: start $start, len $length";
+
+                       $chunk = $cache->loadChunk( $start, $length );
+                       $this->assertEquals( $expected, $chunk, $info );
+               }
+       }
+}
\ No newline at end of file
diff --git a/lib/tests/phpunit/store/MockChunkAccess.php 
b/lib/tests/phpunit/store/MockChunkAccess.php
new file mode 100644
index 0000000..5bdc0c9
--- /dev/null
+++ b/lib/tests/phpunit/store/MockChunkAccess.php
@@ -0,0 +1,98 @@
+<?php
+/**
+ * Mock implementation of the ChunkAccess interface
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @since 0.4
+ *
+ * @file
+ * @ingroup WikibaseLib
+ *
+ * @licence GNU GPL v2+
+ * @author Daniel Kinzler
+ */
+
+namespace Wikibase\Test;
+
+use Wikibase\ChunkAccess;
+
+class MockChunkAccess implements ChunkAccess {
+
+       protected $data;
+
+       public function __construct( $data ) {
+               $this->data = $data;
+       }
+
+       /**
+        * Returns a chunk as a list of whatever object is used for data 
records by
+        * the implementing class.
+        *
+        * The present implementation is quite inefficient at O(n).
+        *
+        * @param int $start The first ID in the chunk
+        * @param int $size  The desired size of the chunk
+        *
+        * @return array the desired chunk of rows/objects
+        */
+       public function loadChunk( $start, $size ) {
+               reset( $this->data );
+               do {
+                       $rec = current( $this->data );
+
+                       if ( $rec === false ) {
+                               break;
+                       }
+
+                       $id = $this->getRecordId( $rec );
+
+                       if ( $id >= $start ) {
+                               break;
+                       }
+               } while ( next( $this->data ) );
+
+               $c = 0;
+               $chunk = array();
+               do {
+                       if ( $c >= $size ) {
+                               break;
+                       }
+
+                       $rec = current( $this->data );
+
+                       if ( $rec === false ) {
+                               break;
+                       }
+
+                       $chunk[] = $rec;
+                       $c++;
+               } while( next( $this->data ) );
+
+               return $chunk;
+       }
+
+       /**
+        * Returns the sequential ID of the given data record.
+        *
+        * @param mixed $rec
+        *
+        * @return int
+        */
+       public function getRecordId( $rec ) {
+               return intval( $rec );
+       }
+}
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/59388
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I677d5fe46fcd7cf565443aa581f69e73c28fa940
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Jeroen De Dauw <[email protected]>
Gerrit-Reviewer: Tobias Gritschacher <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to