EBernhardson (WMF) has uploaded a new change for review. https://gerrit.wikimedia.org/r/78203
Change subject: Separate row compact/expand into class ...................................................................... Separate row compact/expand into class The code for applying 'shallow' indexing was too specific, this refactor should be simpler and easier to follow, along with making compaction directly testable independantly. Change-Id: I14e5d392f834efc5caf0762a9fc830585a185476 --- M container.php M includes/Block/Summary.php M includes/Data/MultiDimArray.php M includes/Data/ObjectManager.php M includes/Data/RevisionStorage.php 5 files changed, 156 insertions(+), 117 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow refs/changes/03/78203/1 diff --git a/container.php b/container.php index 4bd0e1f..6b4bddb 100644 --- a/container.php +++ b/container.php @@ -111,19 +111,20 @@ $mapper = BasicObjectMapper::model( 'Flow\\Model\\Summary' ); $storage = new SummaryRevisionStorage( $c['db.factory'], $wgFlowExternalStore ); + $pk = new UniqueFeatureIndex( + $cache, $storage, + 'flow_summary:pk', array( 'rev_id' ) + ); $workflowIndexOptions = array( 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { return $row['rev_parent_id'] === null; }, ); $indexes = array( - new UniqueFeatureIndex( - $cache, $storage, - 'flow_summary:pk', array( 'rev_id' ) - ), + $pk, new TopKIndex( $cache, $storage, 'flow_summary:workflow', array( 'summary_workflow_id' ), @@ -140,6 +141,10 @@ } ); // List of topic workflows and their owning discussion workflow +// TODO: This could use similar to ShallowCompactor to +// get the objects directly instead of just returning ids. +// Would also need object mapper adjustments to return array +// of two objects. $c['storage.topic_list'] = $c->share( function( $c ) { $cache = $c['memcache.buffered']; $mapper = BasicObjectMapper::model( 'Flow\\Model\\TopicListEntry' ); @@ -170,15 +175,16 @@ $treeRepo = $c['repository.tree']; $mapper = BasicObjectMapper::model( 'Flow\\Model\\PostRevision' ); $storage = new PostRevisionStorage( $c['db.factory'], $wgFlowExternalStore, $treeRepo ); + $pk = new UniqueFeatureIndex( $cache, $storage, 'flow_revision:pk', array( 'rev_id' ) ); $indexes = array( - new UniqueFeatureIndex( $cache, $storage, 'flow_revision:pk', array( 'rev_id' ) ), + $pk, new TopKIndex( $cache, $storage, 'flow_revision:descendant', array( 'tree_rev_descendant_id' ), array( 'limit' => 100, 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { // return true to create instead of merge index return $row['rev_parent_id'] === null; @@ -190,7 +196,7 @@ 'limit' => 1, 'sort' => 'rev_id', 'order' => 'DESC', - //'shallow' => array( 'rev_id' ), + 'shallow' => $pk, 'create' => function( array $row ) { return $row['rev_parent_id'] === null; }, diff --git a/includes/Block/Summary.php b/includes/Block/Summary.php index f2c62bf..2f6b785 100644 --- a/includes/Block/Summary.php +++ b/includes/Block/Summary.php @@ -70,9 +70,6 @@ public function commit() { switch( $this->action ) { case 'edit-summary': - if ( $this->summary ) { - $summary = $this- - } $this->storage->put( $this->summary ); break; diff --git a/includes/Data/MultiDimArray.php b/includes/Data/MultiDimArray.php index bde44e7..3a89928 100644 --- a/includes/Data/MultiDimArray.php +++ b/includes/Data/MultiDimArray.php @@ -137,8 +137,10 @@ foreach ( $order as $position => $query ) { if ( $dimensions > 1 ) { $final[$position] = self::sortResult( $query, $result, $dimensions - 1 ); - } elseif ( !isset( $result[$query] ) ) { + } elseif ( isset( $result[$query] ) ) { $final[$position] = $result[$query]; + } else { + $final[$position] = null; } } return $final; diff --git a/includes/Data/ObjectManager.php b/includes/Data/ObjectManager.php index 3446cbe..7623a41 100644 --- a/includes/Data/ObjectManager.php +++ b/includes/Data/ObjectManager.php @@ -81,6 +81,18 @@ function canAnswer( array $keys, array $options ); } +// Compact rows before writing to memcache, expand when receiving back +// Still returns arrays, just removes unneccessary values +interface Compactor { + // Return only the values in $row that will be written to the cache + public function compactRow( array $row ); + // perform self::compactRow against an array of rows + public function compactRows( array $rows ); + // Repopulate BagOStuff::multiGet results with any values removed in self::compactRow + public function expandCacheResult( array $cached, array $keyToQuery ); +} + + // A little glue code so you dont need to use the individual manager for each class // Can be made more specific once the interfaces settle down class ManagerGroup { @@ -575,6 +587,13 @@ */ abstract class FeatureIndex implements Index { + protected $cache; + protected $storage; + protected $prefix; + protected $rowCompactor; + protected $indexed; + protected $indexedOrdered; + abstract public function getLimit(); abstract public function queryOptions(); abstract public function limitIndexSize( array $values ); @@ -591,6 +610,7 @@ $this->cache = $cache; $this->storage = $storage; $this->prefix = $prefix; + $this->rowCompactor = new FeatureCompactor( $indexedColumns ); $this->indexed = $indexedColumns; // sort this and ksort in self::cacheKey to always have cache key // fields in same order @@ -629,7 +649,7 @@ if ( !$indexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $new ) ); } - $this->addToIndex( $indexed, $this->compactRow( $new ) ); + $this->addToIndex( $indexed, $this->rowCompactor->compactRow( $new ) ); } public function onAfterUpdate( $object, array $old, array $new ) { @@ -644,8 +664,8 @@ if ( !$newIndexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $newIndexed ) ); } - $this->removeFromIndex( $oldIndexed, $this->compactRow( $old ) ); - $this->addToIndex( $newIndexed, $this->compactRow( $new ) ); + $this->removeFromIndex( $oldIndexed, $this->rowCompactor->compactRow( $old ) ); + $this->addToIndex( $newIndexed, $this->rowCompactor->compactRow( $new ) ); } public function onAfterRemove( $object, array $old ) { @@ -653,7 +673,7 @@ if ( !$indexed ) { throw new \MWException( 'Unindexable row: ' .json_encode( $old ) ); } - $this->removeFromIndex( $indexed, $this->compactRow( $old ) ); + $this->removeFromIndex( $indexed, $this->rowCompactor->compactRow( $old ) ); } public function onAfterLoad( $object, array $old ) { @@ -690,8 +710,8 @@ // Retreive from cache $cached = $this->cache->getMulti( array_keys( $keyToIdx ) ); - // expand partial results - foreach( $this->expandCacheResult( $cached, $keyToQuery ) as $key => $rows ) { + // expand partial results and merge into result set + foreach( $this->rowCompactor->expandCacheResult( $cached, $keyToQuery ) as $key => $rows ) { foreach ( $keyToIdx[$key] as $idx ) { $results[$idx] = $rows; unset( $queries[$idx] ); @@ -716,7 +736,7 @@ } } } - $this->cache->add( $idxToKey[$idx], $this->compactRows( $rows ) ); + $this->cache->add( $idxToKey[$idx], $this->rowCompactor->compactRows( $rows ) ); $results[$idx] = $rows; unset( $queries[$idx] ); } @@ -747,54 +767,6 @@ } return wfForeignMemcKey( $db, '', $this->prefix, implode( ':', $attributes ) ); - } - - /** - * The indexed values are always available when querying, this strips - * the duplicated data. - */ - protected function compactRow( array $row ) { - foreach ( $this->indexed as $key ) { - unset( $row[$key] ); - } - foreach ( $row as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Attempted to compact row containing objects, must be scalar values: ' . print_r( $foo, true ) ); - } - } - return $row; - } - - protected function compactRows( array $rows ) { - $compacted = array(); - foreach ( $rows as $key => $row ) { - $compacted[$key] = $this->compactRow( $row ); - } - return $compacted; - } - - // Each item in $cached is a result from self::compactRows - // All at once to allow bulking any IO implementing classes may want to do - protected function expandCacheResult( array $cached, array $keyToQuery ) { - foreach ( $cached as $key => $rows ) { - $expanded = array(); - $query = $keyToQuery[$key]; - foreach ( $query as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Query values to merge with cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); - } - } - foreach ( $rows as $k => $row ) { - foreach ( $row as $foo ) { - if ( $foo !== null && !is_scalar( $foo ) ) { - throw new \MWException( 'Result from cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); - } - } - $cached[$key][$k] = $row + $query; - } - } - - return $cached; } } @@ -836,9 +808,6 @@ if ( empty( $options['sort'] ) ) { throw new \InvalidArgumentException( 'TopKIndex must be sorted' ); } - if ( isset( $options['shallow'] ) && !$options['shallow'] instanceof UniqueFeatureIndex ) { - throw new \InvalidArgumentException( "The 'shallow' option must be either null or UniqueFeatureIndex." ); - } parent::__construct( $cache, $storage, $prefix, $indexed ); @@ -850,6 +819,10 @@ ); if ( !is_array( $this->options['sort'] ) ) { $this->options['sort'] = array( $this->options['sort'] ); + } + if ( $this->options['shallow'] ) { + // TODO: perhaps we shouldn't even get a shallow option, just receive a proper compactor in FeatureIndex::__construct + $this->rowCompactor = new ShallowCompactor( $this->rowCompactor, $this->options['shallow'], $this->options['sort'] ); } } @@ -929,52 +902,6 @@ return $options; } - protected function compactRow( array $row ) { - // An OO solution wouldn't check this, it would have a compact/expand - // implementation - if ( isset( $this->options['shallow'] ) ) { - $keys = array_merge( - $this->options['shallow']->getPrimaryKeyColumns(), - $this->options['sort'] - ); - $extra = array_diff( array_keys( $row ), $keys ); - foreach ( $extra as $key ) { - unset( $row[$key] ); - } - } - return parent::compactRow( $row ); - } - - - protected function expandCacheResult( array $cached, array $keyToQuery ) { - $cached = parent::expandCacheResult( $cached, $keyToQuery ); - if ( $cached && $this->options['shallow'] ) { - return $this->expandShallowResult( $cached ); - } - return $cached; - } - - protected function expandShallowResult( array $results ) { - // Allows us to flatten $results into a single $query array, then - // rebuild final return value in same structure and order as $results. - $duplicator = new ResultDuplicator( $this->options['shallow']->getPrimaryKeyColumns(), 2 ); - foreach ( $results as $i => $rows ) { - foreach ( $rows as $j => $row ) { - $duplicator->add( $row, array( $i, $j ) ); - } - } - - $innerResult = $this->options['shallow']->findMulti( $duplicator->getUniqueQueries() ); - foreach ( $innerResult as $rows ) { - // __construct guaranteed the shallow backing index is a unique, so $first is only result - $first = reset( $rows ); - $duplicator->merge( $first, $first ); - } - - // TODO: fix whatever bug doesnt allow this to be strict - return $duplicator->getResult(); - } - public function canAnswer( array $keys, array $options ) { if ( !parent::canAnswer( $keys, $options ) ) { return false; @@ -988,6 +915,113 @@ } +class FeatureCompactor implements Compactor { + public function __construct( array $indexedColumns ) { + $this->indexed = $indexedColumns; + } + + /** + * The indexed values are always available when querying, this strips + * the duplicated data. + */ + public function compactRow( array $row ) { + foreach ( $this->indexed as $key ) { + unset( $row[$key] ); + } + foreach ( $row as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Attempted to compact row containing objects, must be scalar values: ' . print_r( $foo, true ) ); + } + } + return $row; + } + + public function compactRows( array $rows ) { + return array_map( array( $this, 'compactRow' ), $rows ); + } + + /** + * The $cached array is three dimensional. Each top level key is a cache key + * and contains an array of rows. Each row is an array representing a single data model. + * + * $cached = array( $cacheKey => array( array( 'rev_id' => 123, ... ), ... ), ... ) + * + * The $keyToQuery array maps from cache key to the values that were used to build the cache key. + * These values are re-added to the results found in memcache. + * + * @param array $cached Array of results from BagOStuff::multiGet each containg a list of rows + * @param array $keyToQuery Map from key in $cached to the values used to generate that key + * @return array The $cached array with the queried values merged in + */ + public function expandCacheResult( array $cached, array $keyToQuery ) { + foreach ( $cached as $key => $rows ) { + $query = $keyToQuery[$key]; + foreach ( $query as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Query values to merge with cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); + } + } + foreach ( $rows as $k => $row ) { + foreach ( $row as $foo ) { + if ( $foo !== null && !is_scalar( $foo ) ) { + throw new \MWException( 'Result from cache contains objects, should be scalar values: ' . print_r( $foo, true ) ); + } + } + $cached[$key][$k] += $query; + } + } + + return $cached; + } +} + +class ShallowCompactor implements Compactor { + public function __construct( Compactor $inner, UniqueFeatureIndex $shallow, array $sortedColumns ) { + $this->inner = $inner; + $this->shallow = $shallow; + $this->sort = $sortedColumns; + } + + public function compactRow( array $row ) { + $keys = array_merge( $this->shallow->getPrimaryKeyColumns(), $this->sort ); + $extra = array_diff( array_keys( $row ), $keys ); + foreach ( $extra as $key ) { + unset( $row[$key] ); + } + return $this->inner->compactRow( $row ); + } + + public function compactRows( array $rows ) { + return array_map( array( $this, 'compactRow' ), $rows ); + } + + public function expandCacheResult( array $cached, array $keyToQuery ) { + return $this->expandShallowResult( + $this->inner->expandCacheResult( $cached, $keyToQuery ) + ); + } + + protected function expandShallowResult( array $results ) { + // Allows us to flatten $results into a single $query array, then + // rebuild final return value in same structure and order as $results. + $duplicator = new ResultDuplicator( $this->shallow->getPrimaryKeyColumns(), 2 ); + foreach ( $results as $i => $rows ) { + foreach ( $rows as $j => $row ) { + $duplicator->add( $row, array( $i, $j ) ); + } + } + + $innerResult = $this->shallow->findMulti( $duplicator->getUniqueQueries() ); + foreach ( $innerResult as $rows ) { + // __construct guaranteed the shallow backing index is a unique, so $first is only result + $first = reset( $rows ); + $duplicator->merge( $first, $first ); + } + + return $duplicator->getResult( /* strict = */ true ); + } +} + /** * Performs the equivilent of an SQL ORDER BY c1 ASC, c2 ASC... * Always sorts in ascending order. array_reverse to get all descending. diff --git a/includes/Data/RevisionStorage.php b/includes/Data/RevisionStorage.php index 4fc53e1..150984d 100644 --- a/includes/Data/RevisionStorage.php +++ b/includes/Data/RevisionStorage.php @@ -111,7 +111,7 @@ !isset( $options['OFFSET'] ) && count( $queriedKeys ) === 1 && in_array( reset( $queriedKeys ), array( 'rev_id', $this->joinField() ) ) && - count( $options['ORDER BY'] ) === 1 && + isset( $options['ORDER BY'] ) && count( $options['ORDER BY'] ) === 1 && in_array( reset( $options['ORDER BY'] ), array( 'rev_id DESC', "{$this->joinField()} DESC" ) ) ) { return $this->findMostRecent( $queries ); -- To view, visit https://gerrit.wikimedia.org/r/78203 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I14e5d392f834efc5caf0762a9fc830585a185476 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Flow Gerrit-Branch: master Gerrit-Owner: EBernhardson (WMF) <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits