EBernhardson (WMF) has submitted this change and it was merged. Change subject: Separate row compact/expand into class ......................................................................
Separate row compact/expand into class The code for applying 'shallow' indexing was too specific, this refactor should be simpler and easier to follow. Additionally should be able to implement something following this interface that realizes the topic list join so we get the real objects and not just ids. Change-Id: I14e5d392f834efc5caf0762a9fc830585a185476 --- M container.php M includes/Block/Summary.php M includes/Data/MultiDimArray.php M includes/Data/ObjectManager.php M includes/Data/RevisionStorage.php 5 files changed, 151 insertions(+), 117 deletions(-) Approvals: EBernhardson (WMF): Verified; Looks good to me, approved diff --git a/container.php b/container.php index 4bd0e1f..6b4bddb 100644 --- a/container.php +++ b/container.php @@ -111,19 +111,20 @@ $mapper = BasicObjectMapper::model( 'Flow\\Model\\Summary' ); $storage = new SummaryRevisionStorage( $c['db.factory'], $wgFlowExternalStore ); + $pk = new UniqueFeatureIndex( + $cache, $storage, + 'flow_summary:pk', array( 'rev_id' ) + ); $workflowIndexOptions = array( 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { return $row['rev_parent_id'] === null; }, ); $indexes = array( - new UniqueFeatureIndex( - $cache, $storage, - 'flow_summary:pk', array( 'rev_id' ) - ), + $pk, new TopKIndex( $cache, $storage, 'flow_summary:workflow', array( 'summary_workflow_id' ), @@ -140,6 +141,10 @@ } ); // List of topic workflows and their owning discussion workflow +// TODO: This could use similar to ShallowCompactor to +// get the objects directly instead of just returning ids. +// Would also need object mapper adjustments to return array +// of two objects. $c['storage.topic_list'] = $c->share( function( $c ) { $cache = $c['memcache.buffered']; $mapper = BasicObjectMapper::model( 'Flow\\Model\\TopicListEntry' ); @@ -170,15 +175,16 @@ $treeRepo = $c['repository.tree']; $mapper = BasicObjectMapper::model( 'Flow\\Model\\PostRevision' ); $storage = new PostRevisionStorage( $c['db.factory'], $wgFlowExternalStore, $treeRepo ); + $pk = new UniqueFeatureIndex( $cache, $storage, 'flow_revision:pk', array( 'rev_id' ) ); $indexes = array( - new UniqueFeatureIndex( $cache, $storage, 'flow_revision:pk', array( 'rev_id' ) ), + $pk, new TopKIndex( $cache, $storage, 'flow_revision:descendant', array( 'tree_rev_descendant_id' ), array( 'limit' => 100, 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { // return true to create instead of merge index return $row['rev_parent_id'] === null; @@ -190,7 +196,7 @@ 'limit' => 1, 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { return $row['rev_parent_id'] === null; }, diff --git a/includes/Block/Summary.php b/includes/Block/Summary.php index f2c62bf..2f6b785 100644 --- a/includes/Block/Summary.php +++ b/includes/Block/Summary.php @@ -70,9 +70,6 @@ public function commit() { switch( $this->action ) { case 'edit-summary': - if ( $this->summary ) { - $summary = $this- - } $this->storage->put( $this->summary ); break; diff --git a/includes/Data/MultiDimArray.php b/includes/Data/MultiDimArray.php index bde44e7..3a89928 100644 --- a/includes/Data/MultiDimArray.php +++ b/includes/Data/MultiDimArray.php @@ -137,8 +137,10 @@ foreach ( $order as $position => $query ) { if ( $dimensions > 1 ) { $final[$position] = self::sortResult( $query, $result, $dimensions - 1 ); - } elseif ( !isset( $result[$query] ) ) { + } elseif ( isset( $result[$query] ) ) { $final[$position] = $result[$query]; + } else { + $final[$position] = null; } } return $final; diff --git a/includes/Data/ObjectManager.php b/includes/Data/ObjectManager.php index 3446cbe..8a26128 100644 --- a/includes/Data/ObjectManager.php +++ b/includes/Data/ObjectManager.php @@ -81,6 +81,18 @@ function canAnswer( array $keys, array $options ); } +// Compact rows before writing to memcache, expand when receiving back +// Still returns arrays, just removes unneccessary values +interface Compactor { + // Return only the values in $row that will be written to the cache + public function compactRow( array $row ); + // perform self::compactRow against an array of rows + public function compactRows( array $rows ); + // Repopulate BagOStuff::multiGet results with any values removed in self::compactRow + public function expandCacheResult( array $cached, array $keyToQuery ); +} + + // A little glue code so you dont need to use the individual manager for each class // Can be made more specific once the interfaces settle down class ManagerGroup { @@ -575,6 +587,13 @@ */ abstract class FeatureIndex implements Index { + protected $cache; + protected $storage; + protected $prefix; + protected $rowCompactor; + protected $indexed; + protected $indexedOrdered; + abstract public function getLimit(); abstract public function queryOptions(); abstract public function limitIndexSize( array $values ); @@ -591,6 +610,7 @@ $this->cache = $cache; $this->storage = $storage; $this->prefix = $prefix; + $this->rowCompactor = new FeatureCompactor( $indexedColumns ); $this->indexed = $indexedColumns; // sort this and ksort in self::cacheKey to always have cache key // fields in same order @@ -629,7 +649,7 @@ if ( !$indexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $new ) ); } - $this->addToIndex( $indexed, $this->compactRow( $new ) ); + $this->addToIndex( $indexed, $this->rowCompactor->compactRow( $new ) ); } public function onAfterUpdate( $object, array $old, array $new ) { @@ -644,8 +664,8 @@ if ( !$newIndexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $newIndexed ) ); } - $this->removeFromIndex( $oldIndexed, $this->compactRow( $old ) ); - $this->addToIndex( $newIndexed, $this->compactRow( $new ) ); + $this->removeFromIndex( $oldIndexed, $this->rowCompactor->compactRow( $old ) ); + $this->addToIndex( $newIndexed, $this->rowCompactor->compactRow( $new ) ); } public function onAfterRemove( $object, array $old ) { @@ -653,7 +673,7 @@ if ( !$indexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $old ) ); } - $this->removeFromIndex( $indexed, $this->compactRow( $old ) ); + $this->removeFromIndex( $indexed, $this->rowCompactor->compactRow( $old ) ); } public function onAfterLoad( $object, array $old ) { @@ -690,8 +710,8 @@ // Retreive from cache $cached = $this->cache->getMulti( array_keys( $keyToIdx ) ); - // expand partial results - foreach( $this->expandCacheResult( $cached, $keyToQuery ) as $key => $rows ) { + // expand partial results and merge into result set + foreach( $this->rowCompactor->expandCacheResult( $cached, $keyToQuery ) as $key => $rows ) { foreach ( $keyToIdx[$key] as $idx ) { $results[$idx] = $rows; unset( $queries[$idx] ); @@ -716,7 +736,7 @@ } } } - $this->cache->add( $idxToKey[$idx], $this->compactRows( $rows ) ); + $this->cache->add( $idxToKey[$idx], $this->rowCompactor->compactRows( $rows ) ); $results[$idx] = $rows; unset( $queries[$idx] ); } @@ -747,54 +767,6 @@ } return wfForeignMemcKey( $db, '', $this->prefix, implode( ':', $attributes ) ); - } - - /** - * The indexed values are always available when querying, this strips - * the duplicated data. - */ - protected function compactRow( array $row ) { - foreach ( $this->indexed as $key ) { - unset( $row[$key] ); - } - foreach ( $row as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Attempted to compact row containing objects, must be scalar values: ' . print_r( $foo, true ) ); - } - } - return $row; - } - - protected function compactRows( array $rows ) { - $compacted = array(); - foreach ( $rows as $key => $row ) { - $compacted[$key] = $this->compactRow( $row ); - } - return $compacted; - } - - // Each item in $cached is a result from self::compactRows - // All at once to allow bulking any IO implementing classes may want to do - protected function expandCacheResult( array $cached, array $keyToQuery ) { - foreach ( $cached as $key => $rows ) { - $expanded = array(); - $query = $keyToQuery[$key]; - foreach ( $query as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Query values to merge with cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); - } - } - foreach ( $rows as $k => $row ) { - foreach ( $row as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Result from cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); - } - } - $cached[$key][$k] = $row + $query; - } - } - - return $cached; } } @@ -836,9 +808,6 @@ if ( empty( $options['sort'] ) ) { throw new \InvalidArgumentException( 'TopKIndex must be sorted' ); } - if ( isset( $options['shallow'] ) && !$options['shallow'] instanceof UniqueFeatureIndex ) { - throw new \InvalidArgumentException( "The 'shallow' option must be either null or UniqueFeatureIndex." ); - } parent::__construct( $cache, $storage, $prefix, $indexed ); @@ -850,6 +819,10 @@ ); if ( !is_array( $this->options['sort'] ) ) { $this->options['sort'] = array( $this->options['sort'] ); + } + if ( $this->options['shallow'] ) { + // TODO: perhaps we shouldn't even get a shallow option, just receive a proper compactor in FeatureIndex::__construct + $this->rowCompactor = new ShallowCompactor( $this->rowCompactor, $this->options['shallow'], $this->options['sort'] ); } } @@ -929,52 +902,6 @@ return $options; } - protected function compactRow( array $row ) { - // An OO solution wouldn't check this, it would have a compact/expand - // implementation - if ( isset( $this->options['shallow'] ) ) { - $keys = array_merge( - $this->options['shallow']->getPrimaryKeyColumns(), - $this->options['sort'] - ); - $extra = array_diff( array_keys( $row ), $keys ); - foreach ( $extra as $key ) { - unset( $row[$key] ); - } - } - return parent::compactRow( $row ); - } - - - protected function expandCacheResult( array $cached, array $keyToQuery ) { - $cached = parent::expandCacheResult( $cached, $keyToQuery ); - if ( $cached && $this->options['shallow'] ) { - return $this->expandShallowResult( $cached ); - } - return $cached; - } - - protected function expandShallowResult( array $results ) { - // Allows us to flatten $results into a single $query array, then - // rebuild final return value in same structure and order as $results. - $duplicator = new ResultDuplicator( $this->options['shallow']->getPrimaryKeyColumns(), 2 ); - foreach ( $results as $i => $rows ) { - foreach ( $rows as $j => $row ) { - $duplicator->add( $row, array( $i, $j ) ); - } - } - - $innerResult = $this->options['shallow']->findMulti( $duplicator->getUniqueQueries() ); - foreach ( $innerResult as $rows ) { - // __construct guaranteed the shallow backing index is a unique, so $first is only result - $first = reset( $rows ); - $duplicator->merge( $first, $first ); - } - - // TODO: fix whatever bug doesnt allow this to be strict - return $duplicator->getResult(); - } - public function canAnswer( array $keys, array $options ) { if ( !parent::canAnswer( $keys, $options ) ) { return false; @@ -988,6 +915,108 @@ } +class FeatureCompactor implements Compactor { + public function __construct( array $indexedColumns ) { + $this->indexed = $indexedColumns; + } + + /** + * The indexed values are always available when querying, this strips + * the duplicated data. + */ + public function compactRow( array $row ) { + foreach ( $this->indexed as $key ) { + unset( $row[$key] ); + } + foreach ( $row as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Attempted to compact row containing objects, must be scalar values: ' . print_r( $foo, true ) ); + } + } + return $row; + } + + public function compactRows( array $rows ) { + return array_map( array( $this, 'compactRow' ), $rows ); + } + + /** + * The $cached array is three dimensional. Each top level key is a cache key + * and contains an array of rows. Each row is an array representing a single data model. + * + * $cached = array( $cacheKey => array( array( 'rev_id' => 123, ... ), ... ), ... ) + * + * The $keyToQuery array maps from cache key to the values that were used to build the cache key. + * These values are re-added to the results found in memcache. + * + * @param array $cached Array of results from BagOStuff::multiGet each containg a list of rows + * @param array $keyToQuery Map from key in $cached to the values used to generate that key + * @return array The $cached array with the queried values merged in + */ + public function expandCacheResult( array $cached, array $keyToQuery ) { + foreach ( $cached as $key => $rows ) { + $query = $keyToQuery[$key]; + foreach ( $query as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Query values to merge with cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); + } + } + foreach ( $rows as $k => $row ) { + foreach ( $row as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Result from cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); + } + } + $cached[$key][$k] += $query; + } + } + + return $cached; + } +} + +class ShallowCompactor implements Compactor { + public function __construct( Compactor $inner, UniqueFeatureIndex $shallow, array $sortedColumns ) { + $this->inner = $inner; + $this->shallow = $shallow; + $this->sort = $sortedColumns; + } + + public function compactRow( array $row ) { + $keys = array_merge( $this->shallow->getPrimaryKeyColumns(), $this->sort ); + $extra = array_diff( array_keys( $row ), $keys ); + foreach ( $extra as $key ) { + unset( $row[$key] ); + } + return $this->inner->compactRow( $row ); + } + + public function compactRows( array $rows ) { + return array_map( array( $this, 'compactRow' ), $rows ); + } + + public function expandCacheResult( array $cached, array $keyToQuery ) { + $results = $this->inner->expandCacheResult( $cached, $keyToQuery ); + // Allows us to flatten $results into a single $query array, then + // rebuild final return value in same structure and order as $results. + $duplicator = new ResultDuplicator( $this->shallow->getPrimaryKeyColumns(), 2 ); + foreach ( $results as $i => $rows ) { + foreach ( $rows as $j => $row ) { + $duplicator->add( $row, array( $i, $j ) ); + } + } + + $innerResult = $this->shallow->findMulti( $duplicator->getUniqueQueries() ); + foreach ( $innerResult as $rows ) { + // __construct guaranteed the shallow backing index is a unique, so $first is only result + $first = reset( $rows ); + $duplicator->merge( $first, $first ); + } + + return $duplicator->getResult( /* strict = */ true ); + } +} + /** * Performs the equivilent of an SQL ORDER BY c1 ASC, c2 ASC... * Always sorts in ascending order. array_reverse to get all descending. diff --git a/includes/Data/RevisionStorage.php b/includes/Data/RevisionStorage.php index 4fc53e1..150984d 100644 --- a/includes/Data/RevisionStorage.php +++ b/includes/Data/RevisionStorage.php @@ -111,7 +111,7 @@ !isset( $options['OFFSET'] ) && count( $queriedKeys ) === 1 && in_array( reset( $queriedKeys ), array( 'rev_id', $this->joinField() ) ) && - count( $options['ORDER BY'] ) === 1 && + isset( $options['ORDER BY'] ) && count( $options['ORDER BY'] ) === 1 && in_array( reset( $options['ORDER BY'] ), array( 'rev_id DESC', "{$this->joinField()} DESC" ) ) ) { return $this->findMostRecent( $queries ); -- To view, visit https://gerrit.wikimedia.org/r/78203 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I14e5d392f834efc5caf0762a9fc830585a185476 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Flow Gerrit-Branch: master Gerrit-Owner: EBernhardson (WMF) <ebernhard...@wikimedia.org> Gerrit-Reviewer: EBernhardson (WMF) <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits