EBernhardson (WMF) has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/77468


Change subject: Batch retreival of ES data with minimal round trips
......................................................................

Batch retreival of ES data with minimal round trips

Batches ES urls by schema and host to reduce round trips required to retreive
many pieces of data.  Takes array of ES urls to retreive and returns a map
from ES urls to their data.  Errored urls are represented with a boolean false
in the result set.  Initially implemented for ExternalStoreDB, other stores
fallback to serial requests.

Change-Id: If1bef25f57bfe7de32fc6787f553a90bd76e87ea
---
M includes/externalstore/ExternalStore.php
M includes/externalstore/ExternalStoreDB.php
M includes/externalstore/ExternalStoreMedium.php
3 files changed, 149 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/68/77468/1

diff --git a/includes/externalstore/ExternalStore.php 
b/includes/externalstore/ExternalStore.php
index f5119d5..b5139d6 100644
--- a/includes/externalstore/ExternalStore.php
+++ b/includes/externalstore/ExternalStore.php
@@ -91,6 +91,39 @@
        }
 
        /**
+        * Fetch data from multiple URLs with a minimum of round trips
+        *
+        * @param array $urls The URLs of the text to get
+        * @return array Map from url to its data.  Data is either string when 
found
+        *     or false on failure.
+        */
+       public static function batchFetchFromURLs( array $urls ) {
+               $batches = array();
+               foreach ( $urls as $url ) {
+                       $scheme = parse_url( $url, PHP_URL_SCHEME );
+                       if ( $scheme ) {
+                               $batches[$scheme][] = $url;
+                       }
+               }
+               $retval = array();
+               foreach ( $batches as $proto => $batchedUrls ) {
+                       $store = self::getStoreObject( $proto );
+                       if ( $store === false ) {
+                               continue;
+                       }
+                       $retval += $store->batchFetchFromURLs( $batchedUrls );
+               }
+               // invalid, not found, db dead, etc.
+               $missing = array_diff( $urls, array_keys( $retval ) );
+               if ( $missing ) {
+                       foreach ( $missing as $url ) {
+                               $retval[$url] = false;
+                       }
+               }
+               return $retval;
+       }
+
+       /**
         * Store a data item to an external store, identified by a partial URL
         * The protocol part is used to identify the class, the rest is passed 
to the
         * class itself as a parameter.
diff --git a/includes/externalstore/ExternalStoreDB.php 
b/includes/externalstore/ExternalStoreDB.php
index be9c066..33c3448 100644
--- a/includes/externalstore/ExternalStoreDB.php
+++ b/includes/externalstore/ExternalStoreDB.php
@@ -30,7 +30,7 @@
  */
 class ExternalStoreDB extends ExternalStoreMedium {
        /**
-        * The URL returned is of the form of the form DB://cluster/id
+        * The provided URL is in the form of DB://cluster/id
         * or DB://cluster/id/itemid for concatened storage.
         *
         * @see ExternalStoreMedium::fetchFromURL()
@@ -49,6 +49,49 @@
 
                if ( $itemID !== false && $ret !== false ) {
                        return $ret->getItem( $itemID );
+               }
+               return $ret;
+       }
+
+       /**
+        * Fetch data from given external store URLs.
+        * The provided URLs are in the form of DB://cluster/id
+        * or DB://cluster/id/itemid for concatened storage.
+        *
+        * @param array $urls An array of external store URLs
+        * @return array A map from url to stored content. Failed results
+        *     are not represented.
+        */
+       public function batchFetchFromURLs( array $urls ) {
+               $batched = $inverseUrlMap = array();
+               foreach ( $urls as $url ) {
+                       $path = explode( '/', $url );
+                       $cluster = $path[2];
+                       $id = $path[3];
+                       if ( isset( $path[4] ) ) {
+                               $itemID = $path[4];
+                       } else {
+                               $itemID = false;
+                       }
+
+                       // id => list of item ids to know which blobs to 
unserialize and which are plain
+                       $batched[$cluster][$id][] = $itemID;
+                       // map from item id back to url for constructing result 
of url => content
+                       $inverseUrlMap[$cluster][$id][$itemID] = $url;
+               }
+               $ret = array();
+               foreach ( $batched as $cluster => $batchByCluster ) {
+                       $res = $this->batchFetchBlobs( $cluster, 
$batchByCluster );
+                       foreach ( $res as $id => $blob ) {
+                               foreach ( $batchByCluster[$id] as $itemID ) {
+                                       $url = 
$inverseUrlMap[$cluster][$id][$itemID];
+                                       if ( $itemID === false ) {
+                                               $ret[$url] = $blob;
+                                       } else {
+                                               $ret[$url] = $blob->getItem( 
$itemID );
+                                       }
+                               }
+                       }
                }
                return $ret;
        }
@@ -178,4 +221,57 @@
                $externalBlobCache = array( $cacheID => &$ret );
                return $ret;
        }
+
+       /**
+        * Fetch multiple blob items out of the database
+        *
+        * @param string $cluster A cluster name valid for use with LBFactory
+        * @param array $ids A map from the blob_id's to look for to the 
requested itemIDs in the blobs
+        * @return array A map from the blob_id's requested to their content.  
Unlocated ids are not represented
+        */
+       function batchFetchBlobs( $cluster, array $ids ) {
+               $dbr = $this->getSlave( $cluster );
+               $res = $dbr->select( $this->getTable( $dbr ), array( 'blob_id', 
'blob_text' ), array( 'blob_id' => array_keys( $ids ) ), __METHOD__ );
+               $ret = array();
+               if ( $res !== false ) {
+                       $this->mergeMultiResult( $ret, $ids, $res );
+               }
+               if ( $ids ) {
+                       wfDebugLog( __CLASS__, __METHOD__ . " master fallback 
on '$cluster' for: " . implode( ',', array_keys( $ids ) ) . "\n" );
+                       // Try the master
+                       $dbw = $this->getMaster( $cluster );
+                       $res = $dbw->select( $this->getTable( $dbr ), array( 
'blob_id', 'blob_text' ), array( 'blob_id' => array_keys( $ids ) ), __METHOD__ 
);
+                       if ( $res === false ) {
+                               wfDebugLog( __CLASS__, __METHOD__ . " master 
failed on '$cluster'\n" );
+                       } else {
+                               $this->mergeMultiResult( $ret, $ids, $res );
+                       }
+               }
+               if ( $ids ) {
+                       wfDebugLog( __CLASS__, __METHOD__ . " master on 
'$cluster' failed locating items: " . implode( ',', array_keys( $ids ) ) . "\n" 
);
+               }
+               return $ret;
+       }
+
+       /**
+        * Helper function for self::batchFetchBlobs for merging master/slave 
results
+        * @param array &$ret Current self::batchFetchBlobs return value
+        * @param array &$ids Map from blob_id to requested itemIDs
+        * @param mixed $res DB result from DatabaseBase::select
+        */
+       private function mergeMultiResult( array &$ret, array &$ids, $res ) {
+               foreach ( $res as $row ) {
+                       $id = $row->blob_id;
+                       $itemIDs = $ids[$id];
+                       unset( $ids[$id] ); // to track if everything is found
+                       if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === 
false ) {
+                               // single result stored per blob
+                               $ret[$id] = $row->blob_text;
+                       } else {
+                               // multi result stored per blob
+                               $ret[$id] = unserialize( $row->blob_text );
+                       }
+               }
+       }
+
 }
diff --git a/includes/externalstore/ExternalStoreMedium.php 
b/includes/externalstore/ExternalStoreMedium.php
index 41af7d8..02bdcb5 100644
--- a/includes/externalstore/ExternalStoreMedium.php
+++ b/includes/externalstore/ExternalStoreMedium.php
@@ -49,6 +49,25 @@
        abstract public function fetchFromURL( $url );
 
        /**
+        * Fetch data from given external store URLs.
+        *
+        * @param array $urls A list of external store URLs
+        * @return array Map from the url to the text stored. Unfound data is 
not represented
+        */
+       public function batchFetchFromURLs( array $urls ) {
+               $retval = array();
+               foreach ( $urls as $url ) {
+                       $data = $this->fetchFromURL( $url );
+                       // Dont return when false to allow for simpler 
implementations.
+                       // errored urls are handled in 
ExternalStore::batchFetchFromURLs
+                       if ( $data !== false ) {
+                               $retval[$urls] = $data;
+                       }
+               }
+               return $retval;
+       }
+
+       /**
         * Insert a data item into a given location
         *
         * @param string $location the location name

-- 
To view, visit https://gerrit.wikimedia.org/r/77468
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If1bef25f57bfe7de32fc6787f553a90bd76e87ea
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: EBernhardson (WMF) <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to