MaxSem has submitted this change and it was merged. Change subject: Count pages with geo tags ......................................................................
Count pages with geo tags Bug: T149722 Bug: T148812 Change-Id: I1fa2d69549af5527105efa4403329182399ab238 --- M bin/hourly.sh M config.json A geo-tag-counts.php A src/Graphite.php A src/Mysql.php 5 files changed, 116 insertions(+), 0 deletions(-) Approvals: MaxSem: Verified Yurik: Looks good to me, approved diff --git a/bin/hourly.sh b/bin/hourly.sh index 8ba6b00..d3f53c2 100755 --- a/bin/hourly.sh +++ b/bin/hourly.sh @@ -3,3 +3,4 @@ BASEDIR=`dirname "$0"`/.. /usr/bin/php $BASEDIR/tracking-category-count.php +/usr/bin/php $BASEDIR/geo-tag-counts.php diff --git a/config.json b/config.json index 8c3c83c..07ad537 100644 --- a/config.json +++ b/config.json @@ -4,5 +4,11 @@ "categories": { "kartographer-tracking-category": "kartographer.pages.%WIKI%.hourly", "graph-tracking-category": "graph.pages.%WIKI%.hourly" + }, + "geoCoordinates": { + "contentNamespaces": { + "default": [ 0 ], + "commonswiki": [ 6 ] + } } } diff --git a/geo-tag-counts.php b/geo-tag-counts.php new file mode 100644 index 0000000..37495d3 --- /dev/null +++ b/geo-tag-counts.php @@ -0,0 +1,59 @@ +<?php + +namespace DiscoveryStats; + +require_once( __DIR__ . '/vendor/autoload.php' ); + +$config = json_decode( file_get_contents( __DIR__ . '/config.json' ) ); +$wikiBlacklist = [ + 'labswiki', + 'labtestwiki', +]; + +$matrix = new SiteMatrix(); +$db = Mysql::connect( '/etc/mysql/conf.d/analytics-research-client.cnf', + 'analytics-store.eqiad.wmnet' +); +$graphite = new Graphite( $config ); + +foreach ( $matrix->getSites() as $site ) { + $dbName = $site->getDbName(); + // Can't quote it, have to validate + if ( !preg_match( '/^[a-z0-9_]+$/', $dbName ) ) { + throw new \Exception( "Invalid database '$dbName'" ); + } + if ( $site->isPrivate() || in_array( $dbName, $wikiBlacklist ) ) { + continue; + } + + query( "USE $dbName" ); + $siteKey = $site->getFamily() . '.' . $site->getCode(); + + $res = query( 'SELECT count(*) AS num FROM geo_tags WHERE gt_primary=1' ); + if ( $res && ( $row = $res->fetch() ) ) { + $graphite->record( "geodata.pages.$siteKey.hourly", $row['num'] ); + } + + $ns = isset( $config->geoCoordinates->contentNamespaces->$dbName ) + ? $config->geoCoordinates->contentNamespaces->$dbName + : $config->geoCoordinates->contentNamespaces->default; + $ns = implode( ', ', $ns ); + $res = query( 'SELECT count(*) AS num FROM geo_tags, page WHERE page_id=gt_page_id ' + . "AND page_namespace IN ($ns) AND gt_primary=1" + ); + if ( $res && ( $row = $res->fetch() ) ) { + $graphite->record( "geodata.content.$siteKey.hourly", $row['num'] ); + } +} + +function query( $sql ) { + global $db; + + $res = $db->query( $sql ); + if ( !$res ) { + $err = $db->errorInfo(); + throw new \Exception( "{$err[0]}: {$err[2]}" ); + } + + return $res; +} diff --git a/src/Graphite.php b/src/Graphite.php new file mode 100644 index 0000000..7a65164 --- /dev/null +++ b/src/Graphite.php @@ -0,0 +1,26 @@ +<?php + +namespace DiscoveryStats; + +class Graphite { + /** @var string */ + private $host; + /** @var int */ + private $port; + /* @var int */ + private $timestamp; + + public function __construct( $config ) { + $this->host = $config->graphiteHost; + $this->port = $config->graphitePort; + $this->timestamp = time(); + } + + public function record( $metric, $value ) { + $packet = "{$metric} {$value} {$this->timestamp}"; + $nc = "nc -q0 {$this->host} {$this->port}"; + $command = "echo \"$packet\" | $nc"; + + exec( $command ); + } +} diff --git a/src/Mysql.php b/src/Mysql.php new file mode 100644 index 0000000..271c398 --- /dev/null +++ b/src/Mysql.php @@ -0,0 +1,24 @@ +<?php + +namespace DiscoveryStats; + +use Exception; +use PDO; + +class Mysql { + /** + * + */ + public static function connect( $config, $host ) { + $ini = parse_ini_file( $config ); + + if ( !$ini ) { + throw new Exception( "Error opening mysql config $config" ); + } + + return new PDO( "mysql:host=$host", + $ini['user'], + $ini['password'] + ); + } +} -- To view, visit https://gerrit.wikimedia.org/r/319260 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1fa2d69549af5527105efa4403329182399ab238 Gerrit-PatchSet: 6 Gerrit-Project: analytics/discovery-stats Gerrit-Branch: master Gerrit-Owner: MaxSem <maxsem.w...@gmail.com> Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com> Gerrit-Reviewer: Nuria <nu...@wikimedia.org> Gerrit-Reviewer: Yurik <yu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits