Milimetric has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/231574

Change subject: [WIP] Add an Analytics specific instance of RESTBase
......................................................................

[WIP] Add an Analytics specific instance of RESTBase

Very much a work in progress.  I am not at all sure how hiera works and
if I've copy/pasted properly here.  I'll work on this hopefully with
some help.

Bug: T107056
Change-Id: I29c872ed6a811cf1fd1ca9e5242bf513cba401ba
---
A hieradata/role/common/cassandra-analytics.yaml
A hieradata/role/common/restbase-analytics.yaml
A manifests/role/cassandra-analytics.pp
A manifests/role/restbase-analytics.pp
M manifests/site.pp
A modules/restbase/templates/config.analytics.yaml.erb
6 files changed, 357 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/74/231574/1

diff --git a/hieradata/role/common/cassandra-analytics.yaml 
b/hieradata/role/common/cassandra-analytics.yaml
new file mode 100644
index 0000000..3780567
--- /dev/null
+++ b/hieradata/role/common/cassandra-analytics.yaml
@@ -0,0 +1,20 @@
+cassandra::metrics::graphite_host: 'graphite-in.eqiad.wmnet'
+cassandra::start_rpc: 'false'
+
+# TODO: set up a cluster variable similar to MySQL clusters to share
+# cassandra cluster configs between cassandra & clients
+
+cassandra::seeds:
+    - restbase-analytics1001.eqiad.wmnet
+    - restbase-analytics1002.eqiad.wmnet
+cassandra::max_heap_size: 16g
+# 1/4 heap size, no more than 100m/thread
+cassandra::heap_newsize: 2048m
+cassandra::compaction_throughput_mb_per_sec: 60
+cassandra::concurrent_compactors: 10
+cassandra::concurrent_writes: 18
+cassandra::concurrent_reads: 18
+
+cassandra::dc: "%{::site}"
+cassandra::cluster_name: "%{::site}"
+
diff --git a/hieradata/role/common/restbase-analytics.yaml 
b/hieradata/role/common/restbase-analytics.yaml
new file mode 100644
index 0000000..1839610
--- /dev/null
+++ b/hieradata/role/common/restbase-analytics.yaml
@@ -0,0 +1,21 @@
+#
+# RESTBase
+#
+cluster: restbase-analytics
+restbase::seeds:
+    - restbase-analytics1001.eqiad.wmnet
+    - restbase-analytics1002.eqiad.wmnet
+restbase::config_template: restbase/config.analytics.yaml.erb
+restbase::logstash_host: logstash1001.eqiad.wmnet
+restbase::cassandra_defaultConsistency: localQuorum
+restbase::cassandra_localDc: "%{::site}"
+restbase::statsd_host: statsd.eqiad.wmnet
+restbase::parsoid_uri: undef
+restbase::graphoid_host_port: undef
+
+# TODO: I don't understand this
+#lvs::realserver::realserver_ips:
+  #- '10.2.2.17' # restbase.svc.eqiad.wmnet
+
+admin::groups:
+  - restbase-roots
diff --git a/manifests/role/cassandra-analytics.pp 
b/manifests/role/cassandra-analytics.pp
new file mode 100644
index 0000000..7f583ba
--- /dev/null
+++ b/manifests/role/cassandra-analytics.pp
@@ -0,0 +1,54 @@
+# == Class role::cassandra-analytics
+#
+class role::cassandra-analytics {
+    # Parameters to be set by Hiera
+    class { '::cassandra': }
+    class { '::cassandra::metrics': }
+    class { '::cassandra::logging': }
+
+    # temporary collector, T78514
+    diamond::collector { 'CassandraCollector':
+        ensure => absent,
+    }
+
+    system::role { 'role::cassandra-analytics':
+        description => 'Analytics Cassandra server',
+    }
+
+    # Emit an Icinga alert unless there is exactly one Java process belonging
+    # to user 'cassandra' and with 'CassandraDaemon' in its argument list.
+    nrpe::monitor_service { 'cassandra-analytics':
+        description  => 'Analytics Cassandra database',
+        nrpe_command => '/usr/lib/nagios/plugins/check_procs -c 1:1 -u 
cassandra -C java -a CassandraDaemon',
+    }
+
+    # CQL query interface monitoring (T93886)
+    monitoring::service { 'cassandra-analytics-cql':
+        description   => 'Analytics Cassanda CQL query interface',
+        check_command => 'check_tcp!9042',
+        contact_group => 'admins,team-analytics',
+    }
+
+    $cassandra_hosts = hiera('cassandra::seeds')
+    $cassandra_hosts_ferm = join($cassandra_hosts, ' ')
+
+    # Cassandra intra-node messaging
+    ferm::service { 'cassandra-analytics-intra-node':
+        proto  => 'tcp',
+        port   => '7000',
+        srange => "@resolve(($cassandra_hosts_ferm))",
+    }
+    # Cassandra JMX/RMI
+    ferm::service { 'cassandra-analytics-jmx-rmi':
+        proto  => 'tcp',
+        port   => '7199',
+        srange => "@resolve(($cassandra_hosts_ferm))",
+    }
+    # Cassandra CQL query interface
+    ferm::service { 'cassandra-analytics-cql':
+        proto  => 'tcp',
+        port   => '9042',
+        srange => "@resolve(($cassandra_hosts_ferm))",
+    }
+
+}
diff --git a/manifests/role/restbase-analytics.pp 
b/manifests/role/restbase-analytics.pp
new file mode 100644
index 0000000..4a26f6b
--- /dev/null
+++ b/manifests/role/restbase-analytics.pp
@@ -0,0 +1,124 @@
+# == Class role::restbase
+#
+
+@monitoring::group { 'restbase_analytics_eqiad': description => 'Analytics 
Restbase eqiad' }
+@monitoring::group { 'restbase_analytics_codfw': description => 'Analytics 
Restbase codfw' }
+
+# Config should be pulled from hiera
+class role::restbase-analytics {
+    system::role { 'restbase-analytics': description => "Analytics Restbase 
${::realm}" }
+
+    include ::restbase
+    include ::restbase::monitoring
+
+    include lvs::realserver
+
+
+    ferm::service {'restbase_web':
+        proto => 'tcp',
+        port  => '7231',
+    }
+
+}
+
+class role::restbase::alerts {
+    monitoring::graphite_threshold { 'restbase_analytics_request_5xx_rate':
+        description   => 'Analytics RESTBase req/s returning 5xx 
http://grafana.wikimedia.org/#/dashboard/db/restbase',
+        metric        => 
'transformNull(restbase.v1_page_html_-title-_-revision--_tid-.GET.5xx.sample_rate,
 0)',
+        from          => '10min',
+        warning       => '1', # 1 5xx/s
+        critical      => '3', # 5 5xx/s
+        percentage    => '20',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_html_storage_hit_latency':
+        description   => 'Analytics RESTBase HTML storage load mean latency ms 
http://grafana.wikimedia.org/#/dashboard/db/restbase',
+        metric        => 
'movingMedian(restbase.sys_key-rev-value_-bucket-_-key--_revision--_tid-.GET.2xx.mean,
 15)',
+        from          => '10min',
+        warning       => '25', # 25ms
+        critical      => '50', # 50ms
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_html_storage_hit_latency_99p':
+        description   => 'Analytics RESTBase HTML storage load 99p latency ms 
http://grafana.wikimedia.org/#/dashboard/db/restbase',
+        metric        => 
'movingMedian(restbase.sys_key-rev-value_-bucket-_-key--_revision--_tid-.GET.2xx.p99,
 15)',
+        from          => '10min',
+        warning       => '1500', # 1.5s
+        critical      => '3000', # 3s
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_storage_exceptions':
+        description   => 'Analytics RESTBase Cassandra highest storage 
exceptions 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-storage',
+        metric        => 
'highestMax(nonNegativeDerivative(cassandra.restbase10*.org.apache.cassandra.metrics.Storage.Exceptions.count),
 1)',
+        from          => '10min',
+        warning       => '5',
+        critical      => '10',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_total_hints':
+        description   => 'Analytics RESTBase Cassandra highest total hints 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-storage',
+        metric        => 
'highestMax(nonNegativeDerivative(cassandra.restbase10*.org.apache.cassandra.metrics.Storage.TotalHints.count),
 1)',
+        from          => '10min',
+        warning       => '600',
+        critical      => '1000',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_pending_compactions':
+        description   => 'Analytics RESTBase Cassandra highest pending 
compactions 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-compaction',
+        metric        => 
'highestMax(cassandra.restbase10*.org.apache.cassandra.metrics.Compaction.PendingTasks.value,
 1)',
+        from          => '60min',
+        warning       => '100',
+        critical      => '400',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_sstables_per_read':
+        description   => 'Analytics RESTBase Cassandra highest SSTables 
per-read 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-cf-sstables-per-read',
+        metric        => 
'highestMax(cassandra.restbase10*.org.apache.cassandra.metrics.ColumnFamily.all.SSTablesPerReadHistogram.99percentile,
 1)',
+        from          => '10min',
+        warning       => '6',
+        critical      => '10',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_tombstones_scanned':
+        description   => 'Analytics RESTBase Cassandra highest tombstones 
scanned 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-cf-tombstones-scanned',
+        metric        => 
'highestMax(cassandra.restbase10*.org.apache.cassandra.metrics.ColumnFamily.all.TombstoneScannedHistogram.99percentile,
 1)',
+        from          => '10min',
+        warning       => '1000',
+        critical      => '1500',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_pending_internal':
+        description   => 'Analytics RESTBase Cassandra highest pending 
internal thread pool tasks 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-thread-pools',
+        metric        => 
'highestMax(exclude(cassandra.restbase10*.org.apache.cassandra.metrics.ThreadPools.internal.*.PendingTasks.value,
 "CompactionExecutor"), 1)',
+        from          => '10min',
+        warning       => '500',
+        critical      => '1000',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+
+    monitoring::graphite_threshold { 
'restbase_analytics_cassandra_highest_dropped_messages':
+        description   => 'Analytics RESTBase Cassandra highest dropped message 
rate 
http://grafana.wikimedia.org/#/dashboard/db/restbase-cassandra-dropped-messages',
+        metric        => 
'highestMax(cassandra.restbase10*.org.apache.cassandra.metrics.DroppedMessage.*.Dropped.1MinuteRate,
 1)',
+        from          => '10min',
+        warning       => '50',
+        critical      => '100',
+        percentage    => '50',
+        contact_group => 'team-analytics',
+    }
+}
diff --git a/manifests/site.pp b/manifests/site.pp
index db124ec..644a4f4 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -2157,6 +2157,13 @@
     include standard
 }
 
+# analytics restbase eqiad cluster
+node /^restbase-analytics100[1-2]\.eqiad\.wmnet$/ {
+    role restbase-analytics, cassandra-analytics
+    include base::firewall
+    include standard
+}
+
 # network insights (netflow/pmacct, etc.)
 node 'rhenium.wikimedia.org' {
     role pmacct
diff --git a/modules/restbase/templates/config.analytics.yaml.erb 
b/modules/restbase/templates/config.analytics.yaml.erb
new file mode 100644
index 0000000..768fd9e
--- /dev/null
+++ b/modules/restbase/templates/config.analytics.yaml.erb
@@ -0,0 +1,131 @@
+# Analytics RESTBase config
+
+info:
+  name: restbase-analytics
+
+
+
+# Swagger spec templates, referenced using yaml references in the spec section
+# below.
+templates:
+
+  wmf-content-1.0.0: &wp/content/1.0.0
+    swagger: '2.0'
+    # swagger options, overriding the shared ones from the merged specs (?)
+    info:
+      version: 1.0.0-beta
+      title: Wikimedia REST API
+      description: >
+          This API aims to provide straightforward and low-latency access to
+          Wikimedia Analytics data. It is currently in beta testing, so
+          things aren't completely locked down yet. Each entry point has
+          explicit stability markers to inform you about development status
+          and change policy, according to [our API version
+          policy](https://www.mediawiki.org/wiki/API_versioning).
+
+          ### High-volume access
+            - As a general rule, don't perform more than 200 requests/s to
+              this API.
+            - Set a unique `User-Agent` header that allows us to contact you
+              quickly. Email addresses or URLs of contact pages work well.
+            - Consider using our [HTML
+              dumps](https://phabricator.wikimedia.org/T17017) once they
+              become available.
+
+      termsOfService: https://wikimediafoundation.org/wiki/Terms_of_Use
+      contact:
+        name: the Wikimedia Services team
+        url: http://mediawiki.org/wiki/RESTBase
+      license:
+        name: Apache2
+        url: http://www.apache.org/licenses/LICENSE-2.0
+
+    # Override the base path for host-based (proxied) requests. In our case,
+    # we proxy https://{domain}/api/rest_v1/ to the API.
+    x-host-basePath: /api/rest_v1
+
+    security:
+      # ACLs for public *.wikipedia.org wikis
+      - mediaWikiAuth:
+        - user:read
+    x-subspecs:
+      - analytics/pageviews
+
+  wmf-sys-1.0.0: &wp/sys/1.0.0
+    info:
+      title: Default MediaWiki sys API module
+      version: 1.0.0
+    paths:
+      /{module:table}:
+        x-modules:
+          # There can be multiple modules too per stanza, as long as the
+          # exported symbols don't conflict. The operationIds from the spec
+          # will be resolved against all of the modules.
+          - name: restbase-mod-table-cassandra
+            version: 1.0.0
+            type: npm
+            options: # Passed to the module constructor
+              conf:
+                hosts: [<%= Array(@seeds).join(',') %>]
+                keyspace: system
+                localDc: <%= @cassandra_localDc %>
+                username: <%= @cassandra_user %>
+                password: <%= @cassandra_password %>
+                defaultConsistency: <%= @cassandra_defaultConsistency %>
+                storage_groups:
+                  - name: phase0.group.local
+                    domains:
+                      - /^(?:test.*\.wiki.*\.org|www.mediawiki.org)$/
+                  - name: wikipedia.group.local
+                    domains: /\.wikipedia.org$/
+                  # Catch-all group
+                  - name: default.group.local
+                    domains: /./
+
+  wp-default-1.0.0: &wp/default/1.0.0
+    x-subspecs:
+      - paths:
+          /{api:v1}:
+            x-subspec: *wp/content/1.0.0
+      - paths:
+          /{api:sys}:
+            x-subspec: *wp/sys/1.0.0
+
+
+# Swagger spec root.
+spec: &spec
+  title: "The Analytics RESTBase root"
+  paths:
+    # list taken from Parsoid's beta config
+    /{domain:analytics.wikimedia.org}: *wp/default/1.0.0
+
+
+# The main service setup. Each worker can offer one or more services.
+services:
+  - name: restbase-analytics
+    module: ./restbase/lib/server
+    conf:
+      port: <%= @port %>
+      spec: *spec
+      salt: <%= @salt_key %>
+      default_page_size: <%= @page_size %>
+
+# Log error messages and gracefully restart a worker if v8 reports using more
+# heap (note: not RSS).
+worker_heap_limit_mb: 300
+
+logging:
+  name: restbase-analytics
+  level: <%= @logging_level %>
+  streams:
+  # XXX: Use gelf-stream -> logstash
+  - type: gelf
+    host: <%= @logstash_host %>
+    port: <%= @logstash_port %>
+
+# StatsD metrics collection
+metrics:
+  name: restbase-analytics
+  type: statsd # default, but lets be explicit
+  host: <%= @statsd_host %>
+  port: <%= @statsd_port %>

-- 
To view, visit https://gerrit.wikimedia.org/r/231574
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I29c872ed6a811cf1fd1ca9e5242bf513cba401ba
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Milimetric <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to