Gehel has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/378245 )

Change subject: [WIP] maps: move to vector tiles and cleartables
......................................................................

[WIP] maps: move to vector tiles and cleartables

* new OSM update script
* new database name

Bug: T157613
Change-Id: Ida723dee967cffb4ed44b46b14f7257f0a146ffd
---
A hieradata/role/common/maps/test/vectortiles_master.yaml
A modules/osm/files/process-osm-data.sh
A modules/osm/manifests/cleartables_sync.pp
A modules/osm/manifests/meddo.pp
M modules/profile/manifests/maps/osm_master.pp
M modules/profile/templates/maps/grants-gis.sql.erb
A modules/role/manifests/maps/test/vectortiles_master.pp
7 files changed, 456 insertions(+), 19 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/45/378245/1

diff --git a/hieradata/role/common/maps/test/vectortiles_master.yaml 
b/hieradata/role/common/maps/test/vectortiles_master.yaml
new file mode 100644
index 0000000..6833913
--- /dev/null
+++ b/hieradata/role/common/maps/test/vectortiles_master.yaml
@@ -0,0 +1,32 @@
+# the following passwords are defined in private repo:
+#profile::maps::osm_master::kartotherian_pass: some_password
+#profile::maps::osm_master::tilerator_pass: some_password
+#profile::maps::osm_master::tileratorui_pass: some_password
+#profile::maps::osm_master::osmimporter_pass: some_password
+#profile::maps::osm_master::osmupdater_pass: some_password
+#profile::maps::osm_master::replication_pass: some_password
+#profile::maps::cassandra::kartotherian_pass: some_password
+#profile::maps::cassandra::tilerator_pass: some_password
+#profile::maps::cassandra::tileratorui_pass: some_password
+#profile::cassandra::single_instance::super_pass: some_password
+
+admin::groups:
+  - maps-admins
+  - kartotherian-admin
+  - tilerator-admin
+
+cluster: 'maps-test-vector'
+
+profile::cassandra::single_instance::graphite_host: 'graphite-in.eqiad.wmnet'
+service::configuration::statsd_host: 'statsd.eqiad.wmnet'
+
+cassandra::metrics::blacklist:
+  - .*\.metrics\.Table\..*$
+
+profile::redis::master::instances: ['6379']
+profile::redis::master::settings:
+  bind: "0.0.0.0"
+
+profile::maps::postgresql_common::shared_buffers: '4GB'
+profile::maps::postgresql_common::maintenance_work_mem: '3GB'
+profile::maps::osm_master::cleartables: true
diff --git a/modules/osm/files/process-osm-data.sh 
b/modules/osm/files/process-osm-data.sh
new file mode 100644
index 0000000..db79fea
--- /dev/null
+++ b/modules/osm/files/process-osm-data.sh
@@ -0,0 +1,283 @@
+#!/usr/bin/env bash
+
+# This script will
+# 1. Download OSM data and load it into a DB
+# 2. Update that DB
+# 3. Keep a local copy of the planet up to date
+
+# Requirements
+# - osmium-tool
+# - osmosis
+# - osm2pgsql
+# - ClearTables
+# - meddo
+
+set -e
+
+CREATE_DB=false
+
+BASE_DIR="/srv/osm_replication"
+CLEARTABLES="/srv/deployment/tilerator/deploy/ClearTables"
+MEDDO="/srv/deployment/tilerator/deploy/meddo"
+
+DATABASE="ct"
+
+PLANET_DIR="$BASE_DIR/planet"
+PLANET_REPLICATION_BASE="$PLANET_DIR/planet-replication"
+DATABASE_REPLICATION_BASE="$PLANET_DIR/database-replication"
+
+# -E 3857 is not required on newer versions of osm2pgsql
+osm2pgsql_common_opts="-E 3857  --flat-nodes $PLANET_DIR/nodes.bin"
+osm2pgsql_import_opts="--cache 10000 --number-processes 2"
+osm2pgsql_update_opts="--cache 1000 --number-processes 1"
+
+function show_setup_help() {
+  cat << EOF
+Usage: ${0##*/} setup data_url state_url replication_url
+
+Examples:
+  ${0##*/} setup 
http://download.geofabrik.de/north-america/canada/british-columbia-170101.osm.pbf
 \\
+    
http://download.geofabrik.de/north-america/canada/british-columbia-updates/000/001/384.state.txt
 \\
+    http://download.geofabrik.de/north-america/canada/british-columbia-updates
+
+EOF
+exit 1
+}
+
+function setup_data() {
+  if [ -z "$1" ]; then
+    echo "data_url not set"
+    show_setup_help
+    exit 0
+  fi
+  if [ -z "$2" ]; then
+    echo "state_url not set"
+    show_setup_help
+    exit 0
+  fi
+  if [ -z "$3" ]; then
+    echo "replication_url not set"
+    show_setup_help
+    exit 0
+  fi
+  PLANET_URL="$1"
+  STATE_URL="$2"
+  REPLICATION_BASE="$3"
+
+  mkdir -p "$PLANET_DIR"
+  mkdir -p "$PLANET_REPLICATION_BASE"
+
+  cat <<EOF > "$PLANET_REPLICATION_BASE/configuration.txt"
+# The URL of the directory containing change files.
+baseUrl=$REPLICATION_BASE
+
+# Allow 3 days of downloads
+maxInterval = 259200
+EOF
+
+  echo "Downloading files"
+  curl --retry 5 -o "$PLANET_DIR/osm-data.osm.pbf" "$PLANET_URL"
+  curl --retry 5 -o "$PLANET_REPLICATION_BASE/state.txt" "$STATE_URL"
+
+  # Call a function here to update the planet later
+}
+
+function onplanetupdateexit {
+    [ -f "$PLANET_REPLICATION_BASE/state-prev.txt" ] && mv 
"$PLANET_REPLICATION_BASE/state-prev.txt" "$PLANET_REPLICATION_BASE/state.txt"
+}
+
+function load_borders() {
+  echo "Loading borders"
+  psql -d ct -v ON_ERROR_STOP=1 -Xq <<EOF
+CREATE SCHEMA IF NOT EXISTS loading;
+DROP TABLE IF EXISTS loading.osmborder_lines;
+CREATE TABLE loading.osmborder_lines (
+  osm_id bigint,
+  admin_level int,
+  dividing_line bool,
+  disputed bool,
+  maritime bool,
+  way Geometry(LineString, 3857));
+\copy loading.osmborder_lines FROM $PLANET_DIR/osmborder_lines.csv
+CREATE INDEX osmborder_lines_way_idx ON loading.osmborder_lines USING gist 
(way) WITH (fillfactor=100);
+CLUSTER loading.osmborder_lines USING osmborder_lines_way_idx;
+CREATE INDEX osmborder_lines_way_low_idx ON loading.osmborder_lines USING gist 
(way) WITH (fillfactor=100) WHERE admin_level <= 4;
+ANALYZE loading.osmborder_lines;
+BEGIN;
+DROP TABLE IF EXISTS public.osmborder_lines;
+ALTER TABLE loading.osmborder_lines SET SCHEMA public;
+COMMIT;
+EOF
+}
+
+function planet_update() {
+  pushd "$PLANET_REPLICATION_BASE"
+  trap onplanetupdateexit EXIT
+  set -e
+  cp state.txt state-prev.txt
+  # Clean up from any previous runs
+  rm -f "$PLANET_DIR/changes.osc"
+  osmosis --read-replication-interval --write-xml-change 
file="$PLANET_DIR/changes.osc"
+  osmium apply-changes -v --fsync "$PLANET_DIR/osm-data.osm.pbf" 
"$PLANET_DIR/changes.osc" -o "$PLANET_DIR/osm-data-new.osm.pbf"
+  mv "$PLANET_DIR/osm-data-new.osm.pbf" "$PLANET_DIR/osm-data.osm.pbf"
+  # File is updated, clean up derived files
+  rm -f "$PLANET_DIR/changes.osc" "$PLANET_DIR/osm-filtered.osm.pbf" 
"$PLANET_DIR/osmborder_lines.csv"
+  osmborder_filter -o "$PLANET_DIR/osm-filtered.osm.pbf" 
"$PLANET_DIR/osm-data.osm.pbf"
+  osmborder -o "$PLANET_DIR/osmborder_lines.csv" 
"$PLANET_DIR/osm-filtered.osm.pbf"
+  load_borders
+  rm state-prev.txt
+}
+
+function create_database() {
+  if [ $CREATE_DB = true ] ; then
+      createdb $DATABASE
+      psql -1Xq -d $DATABASE -c 'CREATE EXTENSION postgis; CREATE EXTENSION 
hstore;'
+      # Meddo needs these extensions above and beyond what ClearTables needs
+      psql -1Xq -d $DATABASE -c 'CREATE EXTENSION unaccent; CREATE EXTENSION 
fuzzystrmatch;'
+  fi
+  psql -d $DATABASE -f "$MEDDO/functions.sql"
+}
+
+function import_data() {
+  create_database
+
+  # Snapshot the current state
+  cp -r "$PLANET_REPLICATION_BASE" "$DATABASE_REPLICATION_BASE"
+
+  # https://github.com/openstreetmap/osm2pgsql/issues/321 requires switching 
directories
+  pushd "$CLEARTABLES"
+
+  # Build the ClearTables files
+  cat cleartables.yaml wikidata.yaml | ./yaml2json.py > 
"${BASE_DIR}/cleartables.json"
+  cat cleartables.yaml wikidata.yaml | ./createcomments.py > 
"${BASE_DIR}/comments.sql"
+
+  cat sql/types/*.sql | psql -1Xq -d $DATABASE
+
+  osm2pgsql $osm2pgsql_common_opts $osm2pgsql_import_opts --create --slim \
+    -d $DATABASE --output multi --style ${BASE_DIR}/cleartables.json \
+    -G "$PLANET_DIR/osm-data.osm.pbf"
+  cat sql/post/*.sql ${BASE_DIR}/comments.sql | psql -1Xq -d $DATABASE
+  popd
+}
+
+function static_update() {
+  # Quite a simple function thanks to Meddo's scripts
+  pushd "$MEDDO"
+  "$MEDDO/get-external-data.py"
+  popd
+}
+function onupdateexit {
+    [ -f "$DATABASE_REPLICATION_BASE/state-prev.txt" ] && mv 
"$DATABASE_REPLICATION_BASE/state-prev.txt" 
"$DATABASE_REPLICATION_BASE/state.txt"
+}
+
+function database_update() {
+# see 
https://github.com/openstreetmap/chef/blob/master/cookbooks/tile/templates/default/replicate.erb
 for another example
+# The OSMF example is a daemon with a while true loop, this is a one-shot 
script, but they both do the same task
+  pushd "$DATABASE_REPLICATION_BASE"
+
+  trap onupdateexit EXIT
+  . state.txt
+  cp state.txt state-prev.txt
+  file="$PWD/changes-${sequenceNumber}.osm.gz"
+  osmosis --read-replication-interval --write-xml-change file="${file}" 
compressionMethod="gzip"
+
+  prevSequenceNumber=$sequenceNumber
+  . state.txt
+  if [ "${sequenceNumber}" == "${prevSequenceNumber}" ]
+  then
+    echo "No new data available. Sleeping..."
+    #  Remove file, it will just be an empty changeset
+    rm ${file}
+    # No need to rollback now
+    rm state-prev.txt
+    exit 0
+  else
+    echo "Fetched new data from ${prevSequenceNumber} to ${sequenceNumber} 
into ${file}"
+
+    # https://github.com/openstreetmap/osm2pgsql/issues/321 requires switching 
directories
+    pushd "$CLEARTABLES"
+    make
+    osm2pgsql $osm2pgsql_common_opts $osm2pgsql_update_opts --append --slim \
+      -d $DATABASE --output multi --style cleartables.json \
+      -G ${file}
+
+    # Something should be done to create expire lists and process them
+    popd
+
+    rm state-prev.txt
+
+    # expire tiles
+
+    find . -name 'changes-*.gz' -mmin +300 -exec rm -f {} \;
+  fi
+  popd
+}
+
+
+function clean () {
+  if [ "$really" != "yes" ]; then
+    echo "This will delete downloaded files and drop the database. If you 
really want to do this, set the enviornment variable \"really\" to yes"
+    exit 1
+  fi
+
+  rm -rf "$PLANET_DIR"
+  dropdb $DATABASE
+}
+
+function show_help() {
+  cat << EOF
+Usage: ${0##*/} mode
+
+Modes:
+  setup: Downloads initial data, updates it, and sets up replication (see 
setup --help for more info)
+  import: Import the data with osm2pgsql
+  static-update: Update the static data tables
+  planet-update: Update the planet file and regenerate borders
+  database-update: Update the database
+  clean: Clean everything up
+
+EOF
+}
+
+if [ "$#" == "0" ]; then
+  show_help
+  exit 1
+fi
+
+command="$1"
+
+case "$command" in
+    setup)
+    shift
+    setup_data $@
+    ;;
+
+    import)
+    shift
+    import_data
+    ;;
+
+    static-update)
+    shift
+    static_update
+    ;;
+
+    planet-update)
+    shift
+    planet_update
+    ;;
+
+    database-update)
+    shift
+    database_update
+    ;;
+
+    clean)
+    shift
+    clean
+    ;;
+
+    *)
+    show_help
+    ;;
+esac
diff --git a/modules/osm/manifests/cleartables_sync.pp 
b/modules/osm/manifests/cleartables_sync.pp
new file mode 100644
index 0000000..384c29c
--- /dev/null
+++ b/modules/osm/manifests/cleartables_sync.pp
@@ -0,0 +1,59 @@
+define osm::cleartables_sync (
+    $pg_password,
+    $ensure = 'present',
+    $hour   = '*',
+    $minute = '*/30',
+    $postreplicate_command = undef,
+    $proxy='webproxy.eqiad.wmnet:8080',
+) {
+
+    $log_dir = '/var/log/osm_replication/'
+
+    include ::osm::meddo
+    include ::osm::users
+
+    file { '/usr/local/bin/process-osm-data':
+        ensure => present,
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0555',
+        source => 'puppet:///modules/osm/process-osm-data.sh',
+    }
+
+    file { [ $log_dir, '/srv/osm_replication' ]:
+        ensure => directory,
+        owner  => 'osmupdater',
+        group  => 'osmupdater',
+        mode   => '0755',
+    }
+
+    logrotate::rule {'cleartables-sync':
+        ensure  => present,
+        frequency => 'daily',
+        max_age => 30,
+        rotate => 5,
+        date_ext => true,
+        compress => true,
+        missing_ok => true,
+        no_create => true,
+    }
+
+    $base_cron_command = "/usr/local/bin/process-osm-data planet-update >> 
${log_dir}/planet-update.log 2>&1"
+    $cron_command = $postreplicate_command ? {
+        undef   => $base_cron_command,
+        default => "${base_cron_command} ; ${postreplicate_command} >> 
${log_dir}/planet-update.log 2>&1"
+    }
+
+    cron { "planet_sync-${name}":
+        ensure      => $ensure,
+        command     => $cron_command,
+        user        => 'osmupdater',
+        hour        => $hour,
+        minute      => $minute,
+        environment => [
+            "PGPASSWORD=${pg_password}",
+            "https_proxy=https://${proxy}";,
+        ],
+    }
+
+}
diff --git a/modules/osm/manifests/meddo.pp b/modules/osm/manifests/meddo.pp
new file mode 100644
index 0000000..3252050
--- /dev/null
+++ b/modules/osm/manifests/meddo.pp
@@ -0,0 +1,19 @@
+# = Class: osm::meddo
+#
+# Meddo is a tool to manage OSM datasources.
+# See https://github.com/kartotherian/meddo for details.
+#
+# At this point, meddo is very much related to all our OSM data import.
+# Depending on how it evolves, it might make sense to move this to its own
+# module at some point.
+#
+# == Parameters:
+class osm::meddo {
+    # make sure all dependencies for meddo are present
+    require_package([
+        'python3-requests',
+        'python3-psycopg2',
+        'python3-yaml',
+        'gdal-bin',
+    ])
+}
diff --git a/modules/profile/manifests/maps/osm_master.pp 
b/modules/profile/manifests/maps/osm_master.pp
index c51b5ef..41291f4 100644
--- a/modules/profile/manifests/maps/osm_master.pp
+++ b/modules/profile/manifests/maps/osm_master.pp
@@ -10,11 +10,17 @@
     $osmupdater_pass    = hiera('profile::maps::osm_master::osmupdater_pass'),
     $replication_pass   = hiera('profile::maps::osm_master::replication_pass'),
     $postgres_slaves    = hiera('profile::maps::osm_master::slaves', undef),
+    $cleartables        = hiera('profile::maps::osm_master::cleartables', 
false),
 ) {
+
+    require ::profile::maps::postgresql_common
 
     $maps_hosts_ferm = join($maps_hosts, ' ')
 
-    require ::profile::maps::postgresql_common
+    $db_name = $cleartables ? {
+        true    => 'ct',
+        default => 'gis',
+    }
 
     class { '::postgresql::master':
         pgversion           => '9.4',
@@ -30,22 +36,22 @@
     postgresql::user { 'kartotherian':
         user     => 'kartotherian',
         password => $kartotherian_pass,
-        database => 'gis',
+        database => $db_name,
     }
     postgresql::user { 'tileratorui':
         user     => 'tileratorui',
         password => $tileratorui_pass,
-        database => 'gis',
+        database => $db_name,
     }
     postgresql::user { 'osmimporter':
         user     => 'osmimporter',
         password => $osmimporter_pass,
-        database => 'gis',
+        database => $db_name,
     }
     postgresql::user { 'osmupdater':
         user     => 'osmupdater',
         password => $osmupdater_pass,
-        database => 'gis',
+        database => $db_name,
     }
 
     profile::maps::tilerator_user { 'localhost':
@@ -62,7 +68,7 @@
     }
 
     # Grants
-    file { '/usr/local/bin/maps-grants-gis.sql':
+    file { "/usr/local/bin/maps-grants-${db_name}.sql":
         owner   => 'root',
         group   => 'root',
         mode    => '0400',
@@ -76,8 +82,19 @@
     }
 
     # DB setup
-    postgresql::spatialdb { 'gis': }
-
+    postgresql::spatialdb { $db_name: }
+    if $cleartables {
+        postgresql::db::extension { "${title}-fuzzystrmatch":
+            database => $db_name,
+            extname  => 'fuzzystrmatch',
+            require  => Postgresql::Db[$db_name],
+        }
+        postgresql::db::extension { "${title}-unaccent":
+            database => $db_name,
+            extname  => 'unaccent',
+            require  => Postgresql::Db[$db_name],
+        }
+    }
 
     # some additional logging for the postgres master to help diagnose import
     # performance issues
@@ -111,16 +128,26 @@
         ],
     }
 
-    osm::planet_sync { 'gis':
-        ensure                => present,
-        flat_nodes            => true,
-        expire_levels         => '15',
-        num_threads           => 4,
-        pg_password           => $osmupdater_pass,
-        period                => $planet_sync_period,
-        hour                  => $planet_sync_hour,
-        minute                => $planet_sync_minute,
-        postreplicate_command => 'sudo -u tileratorui 
/usr/local/bin/notify-tilerator',
+    if $cleartables {
+        osm::cleartables_sync { $db_name:
+            ensure                => present,
+            pg_password           => $osmupdater_pass,
+            hour                  => $planet_sync_hour,
+            minute                => $planet_sync_minute,
+            postreplicate_command => 'sudo -u tileratorui 
/usr/local/bin/notify-tilerator',
+        }
+    } else {
+        osm::planet_sync { $db_name:
+            ensure                => present,
+            flat_nodes            => true,
+            expire_levels         => '15',
+            num_threads           => 4,
+            pg_password           => $osmupdater_pass,
+            period                => $planet_sync_period,
+            hour                  => $planet_sync_hour,
+            minute                => $planet_sync_minute,
+            postreplicate_command => 'sudo -u tileratorui 
/usr/local/bin/notify-tilerator',
+        }
     }
 
     class { 'osm::prometheus':
diff --git a/modules/profile/templates/maps/grants-gis.sql.erb 
b/modules/profile/templates/maps/grants-gis.sql.erb
index b776b4c..8e1352b 100644
--- a/modules/profile/templates/maps/grants-gis.sql.erb
+++ b/modules/profile/templates/maps/grants-gis.sql.erb
@@ -9,7 +9,7 @@
 
 GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO 
osmimporter;
 ALTER DEFAULT PRIVILEGES GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO 
osmimporter;
-GRANT CREATE, TEMPORARY ON DATABASE gis TO osmimporter;
+GRANT CREATE, TEMPORARY ON DATABASE <%= @db_name %> TO osmimporter;
 
 GRANT USAGE, SELECT, UPDATE ON ALL SEQUENCES IN SCHEMA public to osmimporter;
 GRANT USAGE, SELECT, UPDATE ON ALL SEQUENCES IN SCHEMA public to osmupdater;
diff --git a/modules/role/manifests/maps/test/vectortiles_master.pp 
b/modules/role/manifests/maps/test/vectortiles_master.pp
new file mode 100644
index 0000000..f84f269
--- /dev/null
+++ b/modules/role/manifests/maps/test/vectortiles_master.pp
@@ -0,0 +1,17 @@
+# Sets up a maps server master
+class role::maps::test::vectortiles_master {
+    include ::standard
+    include ::base::firewall
+
+    include ::profile::maps::apps
+    include ::profile::maps::cassandra
+    include ::profile::maps::osm_master
+    include ::profile::redis::master
+
+    system::role { 'role::maps::test::master':
+      ensure      => 'present',
+      description => 'Maps master with vector tiles (postgresql, cassandra, 
redis, tilerator, kartotherian)',
+    }
+
+}
+

-- 
To view, visit https://gerrit.wikimedia.org/r/378245
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ida723dee967cffb4ed44b46b14f7257f0a146ffd
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Gehel <guillaume.leder...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to