EBernhardson (WMF) has uploaded a new change for review.
https://gerrit.wikimedia.org/r/77468
Change subject: Batch retreival of ES data with minimal round trips
......................................................................
Batch retreival of ES data with minimal round trips
Batches ES urls by schema and host to reduce round trips required to retreive
many pieces of data. Takes array of ES urls to retreive and returns a map
from ES urls to their data. Errored urls are represented with a boolean false
in the result set. Initially implemented for ExternalStoreDB, other stores
fallback to serial requests.
Change-Id: If1bef25f57bfe7de32fc6787f553a90bd76e87ea
---
M includes/externalstore/ExternalStore.php
M includes/externalstore/ExternalStoreDB.php
M includes/externalstore/ExternalStoreMedium.php
3 files changed, 149 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/68/77468/1
diff --git a/includes/externalstore/ExternalStore.php
b/includes/externalstore/ExternalStore.php
index f5119d5..b5139d6 100644
--- a/includes/externalstore/ExternalStore.php
+++ b/includes/externalstore/ExternalStore.php
@@ -91,6 +91,39 @@
}
/**
+ * Fetch data from multiple URLs with a minimum of round trips
+ *
+ * @param array $urls The URLs of the text to get
+ * @return array Map from url to its data. Data is either string when
found
+ * or false on failure.
+ */
+ public static function batchFetchFromURLs( array $urls ) {
+ $batches = array();
+ foreach ( $urls as $url ) {
+ $scheme = parse_url( $url, PHP_URL_SCHEME );
+ if ( $scheme ) {
+ $batches[$scheme][] = $url;
+ }
+ }
+ $retval = array();
+ foreach ( $batches as $proto => $batchedUrls ) {
+ $store = self::getStoreObject( $proto );
+ if ( $store === false ) {
+ continue;
+ }
+ $retval += $store->batchFetchFromURLs( $batchedUrls );
+ }
+ // invalid, not found, db dead, etc.
+ $missing = array_diff( $urls, array_keys( $retval ) );
+ if ( $missing ) {
+ foreach ( $missing as $url ) {
+ $retval[$url] = false;
+ }
+ }
+ return $retval;
+ }
+
+ /**
* Store a data item to an external store, identified by a partial URL
* The protocol part is used to identify the class, the rest is passed
to the
* class itself as a parameter.
diff --git a/includes/externalstore/ExternalStoreDB.php
b/includes/externalstore/ExternalStoreDB.php
index be9c066..33c3448 100644
--- a/includes/externalstore/ExternalStoreDB.php
+++ b/includes/externalstore/ExternalStoreDB.php
@@ -30,7 +30,7 @@
*/
class ExternalStoreDB extends ExternalStoreMedium {
/**
- * The URL returned is of the form of the form DB://cluster/id
+ * The provided URL is in the form of DB://cluster/id
* or DB://cluster/id/itemid for concatened storage.
*
* @see ExternalStoreMedium::fetchFromURL()
@@ -49,6 +49,49 @@
if ( $itemID !== false && $ret !== false ) {
return $ret->getItem( $itemID );
+ }
+ return $ret;
+ }
+
+ /**
+ * Fetch data from given external store URLs.
+ * The provided URLs are in the form of DB://cluster/id
+ * or DB://cluster/id/itemid for concatened storage.
+ *
+ * @param array $urls An array of external store URLs
+ * @return array A map from url to stored content. Failed results
+ * are not represented.
+ */
+ public function batchFetchFromURLs( array $urls ) {
+ $batched = $inverseUrlMap = array();
+ foreach ( $urls as $url ) {
+ $path = explode( '/', $url );
+ $cluster = $path[2];
+ $id = $path[3];
+ if ( isset( $path[4] ) ) {
+ $itemID = $path[4];
+ } else {
+ $itemID = false;
+ }
+
+ // id => list of item ids to know which blobs to
unserialize and which are plain
+ $batched[$cluster][$id][] = $itemID;
+ // map from item id back to url for constructing result
of url => content
+ $inverseUrlMap[$cluster][$id][$itemID] = $url;
+ }
+ $ret = array();
+ foreach ( $batched as $cluster => $batchByCluster ) {
+ $res = $this->batchFetchBlobs( $cluster,
$batchByCluster );
+ foreach ( $res as $id => $blob ) {
+ foreach ( $batchByCluster[$id] as $itemID ) {
+ $url =
$inverseUrlMap[$cluster][$id][$itemID];
+ if ( $itemID === false ) {
+ $ret[$url] = $blob;
+ } else {
+ $ret[$url] = $blob->getItem(
$itemID );
+ }
+ }
+ }
}
return $ret;
}
@@ -178,4 +221,57 @@
$externalBlobCache = array( $cacheID => &$ret );
return $ret;
}
+
+ /**
+ * Fetch multiple blob items out of the database
+ *
+ * @param string $cluster A cluster name valid for use with LBFactory
+ * @param array $ids A map from the blob_id's to look for to the
requested itemIDs in the blobs
+ * @return array A map from the blob_id's requested to their content.
Unlocated ids are not represented
+ */
+ function batchFetchBlobs( $cluster, array $ids ) {
+ $dbr = $this->getSlave( $cluster );
+ $res = $dbr->select( $this->getTable( $dbr ), array( 'blob_id',
'blob_text' ), array( 'blob_id' => array_keys( $ids ) ), __METHOD__ );
+ $ret = array();
+ if ( $res !== false ) {
+ $this->mergeMultiResult( $ret, $ids, $res );
+ }
+ if ( $ids ) {
+ wfDebugLog( __CLASS__, __METHOD__ . " master fallback
on '$cluster' for: " . implode( ',', array_keys( $ids ) ) . "\n" );
+ // Try the master
+ $dbw = $this->getMaster( $cluster );
+ $res = $dbw->select( $this->getTable( $dbr ), array(
'blob_id', 'blob_text' ), array( 'blob_id' => array_keys( $ids ) ), __METHOD__
);
+ if ( $res === false ) {
+ wfDebugLog( __CLASS__, __METHOD__ . " master
failed on '$cluster'\n" );
+ } else {
+ $this->mergeMultiResult( $ret, $ids, $res );
+ }
+ }
+ if ( $ids ) {
+ wfDebugLog( __CLASS__, __METHOD__ . " master on
'$cluster' failed locating items: " . implode( ',', array_keys( $ids ) ) . "\n"
);
+ }
+ return $ret;
+ }
+
+ /**
+ * Helper function for self::batchFetchBlobs for merging master/slave
results
+ * @param array &$ret Current self::batchFetchBlobs return value
+ * @param array &$ids Map from blob_id to requested itemIDs
+ * @param mixed $res DB result from DatabaseBase::select
+ */
+ private function mergeMultiResult( array &$ret, array &$ids, $res ) {
+ foreach ( $res as $row ) {
+ $id = $row->blob_id;
+ $itemIDs = $ids[$id];
+ unset( $ids[$id] ); // to track if everything is found
+ if ( count( $itemIDs ) === 1 && reset( $itemIDs ) ===
false ) {
+ // single result stored per blob
+ $ret[$id] = $row->blob_text;
+ } else {
+ // multi result stored per blob
+ $ret[$id] = unserialize( $row->blob_text );
+ }
+ }
+ }
+
}
diff --git a/includes/externalstore/ExternalStoreMedium.php
b/includes/externalstore/ExternalStoreMedium.php
index 41af7d8..02bdcb5 100644
--- a/includes/externalstore/ExternalStoreMedium.php
+++ b/includes/externalstore/ExternalStoreMedium.php
@@ -49,6 +49,25 @@
abstract public function fetchFromURL( $url );
/**
+ * Fetch data from given external store URLs.
+ *
+ * @param array $urls A list of external store URLs
+ * @return array Map from the url to the text stored. Unfound data is
not represented
+ */
+ public function batchFetchFromURLs( array $urls ) {
+ $retval = array();
+ foreach ( $urls as $url ) {
+ $data = $this->fetchFromURL( $url );
+ // Dont return when false to allow for simpler
implementations.
+ // errored urls are handled in
ExternalStore::batchFetchFromURLs
+ if ( $data !== false ) {
+ $retval[$urls] = $data;
+ }
+ }
+ return $retval;
+ }
+
+ /**
* Insert a data item into a given location
*
* @param string $location the location name
--
To view, visit https://gerrit.wikimedia.org/r/77468
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: If1bef25f57bfe7de32fc6787f553a90bd76e87ea
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: EBernhardson (WMF) <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits