MaxSem has submitted this change and it was merged.

Change subject: Count pages with geo tags
......................................................................


Count pages with geo tags

Bug: T149722
Bug: T148812
Change-Id: I1fa2d69549af5527105efa4403329182399ab238
---
M bin/hourly.sh
M config.json
A geo-tag-counts.php
A src/Graphite.php
A src/Mysql.php
5 files changed, 116 insertions(+), 0 deletions(-)

Approvals:
  MaxSem: Verified
  Yurik: Looks good to me, approved



diff --git a/bin/hourly.sh b/bin/hourly.sh
index 8ba6b00..d3f53c2 100755
--- a/bin/hourly.sh
+++ b/bin/hourly.sh
@@ -3,3 +3,4 @@
 BASEDIR=`dirname "$0"`/..
 
 /usr/bin/php $BASEDIR/tracking-category-count.php
+/usr/bin/php $BASEDIR/geo-tag-counts.php
diff --git a/config.json b/config.json
index 8c3c83c..07ad537 100644
--- a/config.json
+++ b/config.json
@@ -4,5 +4,11 @@
     "categories": {
         "kartographer-tracking-category": "kartographer.pages.%WIKI%.hourly",
         "graph-tracking-category": "graph.pages.%WIKI%.hourly"
+    },
+    "geoCoordinates": {
+        "contentNamespaces": {
+            "default": [ 0 ],
+            "commonswiki": [ 6 ]
+        }
     }
 }
diff --git a/geo-tag-counts.php b/geo-tag-counts.php
new file mode 100644
index 0000000..37495d3
--- /dev/null
+++ b/geo-tag-counts.php
@@ -0,0 +1,59 @@
+<?php
+
+namespace DiscoveryStats;
+
+require_once( __DIR__ . '/vendor/autoload.php' );
+
+$config = json_decode( file_get_contents( __DIR__ . '/config.json' ) );
+$wikiBlacklist = [
+    'labswiki',
+    'labtestwiki',
+];
+
+$matrix = new SiteMatrix();
+$db = Mysql::connect( '/etc/mysql/conf.d/analytics-research-client.cnf',
+    'analytics-store.eqiad.wmnet'
+);
+$graphite = new Graphite( $config );
+
+foreach ( $matrix->getSites() as $site ) {
+    $dbName = $site->getDbName();
+    // Can't quote it, have to validate
+    if ( !preg_match( '/^[a-z0-9_]+$/', $dbName ) ) {
+        throw new \Exception( "Invalid database '$dbName'" );
+    }
+    if ( $site->isPrivate() || in_array( $dbName, $wikiBlacklist ) ) {
+        continue;
+    }
+
+    query( "USE $dbName" );
+    $siteKey = $site->getFamily() . '.' . $site->getCode();
+
+    $res = query( 'SELECT count(*) AS num FROM geo_tags WHERE gt_primary=1' );
+    if ( $res && ( $row = $res->fetch() ) ) {
+        $graphite->record( "geodata.pages.$siteKey.hourly", $row['num'] );
+    }
+
+    $ns = isset( $config->geoCoordinates->contentNamespaces->$dbName )
+        ? $config->geoCoordinates->contentNamespaces->$dbName
+        : $config->geoCoordinates->contentNamespaces->default;
+    $ns = implode( ', ', $ns );
+    $res = query( 'SELECT count(*) AS num FROM geo_tags, page WHERE 
page_id=gt_page_id '
+        . "AND page_namespace IN ($ns) AND gt_primary=1"
+    );
+    if ( $res && ( $row = $res->fetch() ) ) {
+        $graphite->record( "geodata.content.$siteKey.hourly", $row['num'] );
+    }
+}
+
+function query( $sql ) {
+    global $db;
+
+    $res = $db->query( $sql );
+    if ( !$res ) {
+        $err = $db->errorInfo();
+        throw new \Exception( "{$err[0]}: {$err[2]}" );
+    }
+
+    return $res;
+}
diff --git a/src/Graphite.php b/src/Graphite.php
new file mode 100644
index 0000000..7a65164
--- /dev/null
+++ b/src/Graphite.php
@@ -0,0 +1,26 @@
+<?php
+
+namespace DiscoveryStats;
+
+class Graphite {
+    /** @var string */
+    private $host;
+    /** @var int */
+    private $port;
+    /* @var int */
+    private $timestamp;
+
+    public function __construct( $config ) {
+        $this->host = $config->graphiteHost;
+        $this->port = $config->graphitePort;
+        $this->timestamp = time();
+    }
+
+    public function record( $metric, $value ) {
+        $packet = "{$metric} {$value} {$this->timestamp}";
+        $nc = "nc -q0 {$this->host} {$this->port}";
+        $command = "echo \"$packet\" | $nc";
+
+        exec( $command );
+    }
+}
diff --git a/src/Mysql.php b/src/Mysql.php
new file mode 100644
index 0000000..271c398
--- /dev/null
+++ b/src/Mysql.php
@@ -0,0 +1,24 @@
+<?php
+
+namespace DiscoveryStats;
+
+use Exception;
+use PDO;
+
+class Mysql {
+    /**
+     *
+     */
+    public static function connect( $config, $host ) {
+        $ini = parse_ini_file( $config );
+
+        if ( !$ini ) {
+            throw new Exception( "Error opening mysql config $config" );
+        }
+
+        return new PDO( "mysql:host=$host",
+            $ini['user'],
+            $ini['password']
+        );
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/319260
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I1fa2d69549af5527105efa4403329182399ab238
Gerrit-PatchSet: 6
Gerrit-Project: analytics/discovery-stats
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to