Faidon has submitted this change and it was merged. Change subject: Initial Ceph module, role class & site.pp ......................................................................
Initial Ceph module, role class & site.pp This is inspired from the puppet-ceph module and the Chef recipes, although it's very different from either of them. The puppet-ceph module is really weird and wouldn't work well for us at all. Change-Id: Ia3dc6838fc776628da1cf9d9cc1e9fff6899f5b1 --- A manifests/role/ceph.pp M manifests/site.pp A modules/ceph/files/ceph-add-disk A modules/ceph/files/logrotate-radosgw A modules/ceph/manifests/bootstrap_key.pp A modules/ceph/manifests/init.pp A modules/ceph/manifests/mds.pp A modules/ceph/manifests/mon.pp A modules/ceph/manifests/osd.pp A modules/ceph/manifests/radosgw.pp A modules/ceph/templates/ceph.conf.erb A modules/ceph/templates/radosgw/vhost.erb 12 files changed, 540 insertions(+), 7 deletions(-) Approvals: Faidon: Looks good to me, approved jenkins-bot: Verified diff --git a/manifests/role/ceph.pp b/manifests/role/ceph.pp new file mode 100644 index 0000000..a0eeffa --- /dev/null +++ b/manifests/role/ceph.pp @@ -0,0 +1,108 @@ +@monitor_group { 'Ceph': description => 'Ceph servers' } + +class role::ceph::base { + $cluster = 'ceph' + $nagios_group = 'ceph' + + include standard +} + +class role::ceph::eqiad inherits role::ceph::base { + system_role { 'role::ceph::eqiad': description => 'Ceph eqiad cluster' } + + include passwords::ceph::eqiad + class { 'ceph': + admin_key => $passwords::ceph::eqiad::admin_key, + config => { + fsid => 'c9da36e1-694a-4166-b346-9d8d4d1d1ac1', + mon_initial_members => [ + 'ms-fe1001', + 'ms-fe1002', + 'ms-be1003', + ], + mon_addresses => [ + '10.64.0.167:6789', # ms-fe1001 + '10.64.0.168:6789', # ms-fe1002 + '10.64.0.175:6789', # ms-be1003 + ], + 'global' => { + # eqiad hardware has H710s which have a BBU + 'osd fs mount options xfs' => 'noatime,nobarrier', + }, + 'mon' => { + 'mon osd down out interval' => '600', + }, + 'osd' => { + 'osd journal' => '/var/lib/ceph/journal/$cluster-$id', + 'osd journal size' => '10240', + 'osd op thread timeout' => '7200', + 'osd recovery thread timeout' => '7200', + }, + 'radosgw' => { + 'rgw enable ops log' => 'false', + 'rgw enable usage log' => 'false', + 'rgw extended http attrs' => 'x_content_duration', + 'debug rgw' => '1', + }, + }, + } + + class mon inherits role::ceph::eqiad { + system_role { 'role::ceph::eqiad::mon': + description => 'Ceph eqiad monitor', + } + + class { 'ceph::mon': + monitor_secret => $passwords::ceph::eqiad::monitor_secret, + } + + # FIXME: need a Ceph nagios check + } + + class osd inherits role::ceph::eqiad { + system_role { 'role::ceph::eqiad::osd': + description => 'Ceph eqiad OSD', + } + + include ceph::osd + + # I/O busy systems, tune a few knobs to avoid page alloc failures + sysctl { 'sys.vm.min_free_kbytes': + value => '512000', + } + sysctl { 'sys.vm.vfs_cache_pressure': + value => '120', + } + } + + class radosgw inherits role::ceph::eqiad { + system_role { 'role::ceph::eqiad::radosgw': + description => 'Ceph eqiad radosgw', + } + + class { "lvs::realserver": realserver_ips => [ "10.2.2.27" ] } + + include generic::sysctl::high-http-performance + + class { 'ceph::radosgw': + servername => 'ms-fe.eqiad.wmnet', + serveradmin => 'webmas...@wikimedia.org', + } + + monitor_service { 'http': + description => 'HTTP', + check_command => 'check_http', + } + # FIXME: more high-level Swift/S3 check, actual file check + + file { '/usr/lib/ganglia/python_modules/apache_status.py': + source => 'puppet:///files/ganglia/plugins/apache_status.py', + notify => Service['gmond'], + } + + file { '/etc/ganglia/conf.d/apache_status.pyconf': + source => 'puppet:///files/ganglia/plugins/apache_status.pyconf', + notify => Service['gmond'], + } + } +} diff --git a/manifests/site.pp b/manifests/site.pp index d03bd84..4b20501 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -1837,20 +1837,20 @@ } node /^ms-fe100[1-4]\.eqiad\.wmnet$/ { - $cluster = "ceph" + include role::ceph::eqiad::radosgw - if $hostname =~ /^ms-fe100[12]$/ { + if $::hostname =~ /^ms-fe100[12]$/ { $ganglia_aggregator = true + include role::ceph::eqiad::mon } - class { "lvs::realserver": realserver_ips => [ "10.2.2.27" ] } - - include standard } node /^ms-be10[01][0-9]\.eqiad\.wmnet$/ { - $cluster = "ceph" + include role::ceph::eqiad::osd - include standard + if $::hostname == 'ms-be1003' { + include role::ceph::eqiad::mon + } } node /^ms-be300[1-4]\.esams\.wikimedia\.org$/ { diff --git a/modules/ceph/files/ceph-add-disk b/modules/ceph/files/ceph-add-disk new file mode 100644 index 0000000..694aaff --- /dev/null +++ b/modules/ceph/files/ceph-add-disk @@ -0,0 +1,117 @@ +#!/bin/bash + +# Wrapper around ceph-disk-prepare & ceph-disk-activate Workarounds a few bugs +# & limitations of theirs (see bugs inline) +# +# Works with certain Wikimedia-specific assumptions (hostnames, RAID +# controllers, number of disks) and would probably make little sense for +# everyone else. + +#set -e + +## remove: +# MegaCli -DiscardPreservedCache -L8 -a0 +# MegaCli -CfgLDDel -L8 -aALL + +usage() { + echo "I: Usage: $0 <target-id>" + echo "I: target-id is the SCSI target ID, as can be seen with e.g." + echo "I: # MegaCli -LDInfo -Lall -aALL | grep ^Virtual" + echo "I: WARNING: It is NOT the Virtual Disk number." +} +if [ $# -eq 0 ]; then usage; exit 1; fi + +options=':hl:' +while getopts $options option +do + case $option in + h) usage; exit 1;; + l) LD=$OPTARG;; + \?) echo "Unknown option: -$OPTARG" >&2; exit 1;; + :) echo "Missing option argument for -$OPTARG" >&2; exit 1;; + esac +done + +if ! [ "$LD" -eq "$LD" ] 2>/dev/null; then + echo "E: $LD is not a valid target ID" + exit 1 +fi + +if ! MegaCli -LDInfo -Lall -aALL |grep -q "(Target Id: $LD)"; then + echo "E: Unknown target ID $LD on MegaCli" + echo "I: You might want to run:" + echo "I: MegaCli -PDList -aALL | less # identify the unconfigured disk" + echo "I: MegaCli -CfgLdAdd -r0[32:\$pd] -a0" + exit 2 +fi + +if [ ! -e /dev/disk/by-path/pci-*-scsi-0:2:$LD:0 ]; then + echo "I: Rescanning the SCSI bus" + echo "- $LD -" > /sys/class/scsi_host/host0/scan + sleep 2 +fi + +# H710 disks always appear as channel 2 +disk=$(readlink -e /dev/disk/by-path/pci-*-scsi-0\:2\:$LD:0) +if [ "x$disk" = "x" ]; then + echo "E: Couldn't identify the device node for target ID $LD" + exit 2 +fi + +# this is per our convention and assumes 12 disks per box +boxid=$(hostname |sed 's/^ms-be//') +osdid=$(((${boxid} % 1000 - 1) * 12 + $LD - 1)) + +echo -n "I: Setting up OSD $osdid using disk $LD, device node $disk. Confirm (y/n)? " +read answer +if [ "x${answer}" != "xy" ]; then + echo "Aborting" + exit 2 +fi + +# sanity checks +if grep -q $disk /proc/mounts; then + echo "E: $disk or a partition of it seems to be mounted:" + mount | grep $disk + exit 2 +fi + +if grep -q "/var/lib/ceph/osd/ceph-${osdid}" /proc/mounts; then + echo "E: OSD ${osdid} seems to be mounted already:" + mount | grep ceph-${osdid} + exit 2 +fi + +if pgrep -f -- "/usr/bin/ceph-osd --cluster=ceph -i ${osdid}" > /dev/null; then + echo "E: Ceph OSD daemon for ${osdid} seems to be running" + exit 2 +fi + +echo "I: Partioning and formatting the disk" +# disable ceph-disk-activate, or it is spawned prematurely by prepare +# http://tracker.ceph.com/issues/3255 +mv /usr/sbin/ceph-disk-activate /usr/sbin/ceph-disk-activate.off +ceph-disk-prepare ${disk} +sleep 2 +mv /usr/sbin/ceph-disk-activate.off /usr/sbin/ceph-disk-activate + +# explicitly set whoami +# WARNING: this can be dangerous if not previously cleaned up properly +# http://tracker.ceph.com/issues/4032 +mkdir -p /var/lib/ceph/tmp +mountpoint=$(mktemp -d --tmpdir=/var/lib/ceph/tmp/) +mount ${disk}1 ${mountpoint}/ +echo ${osdid} > ${mountpoint}/whoami +umount ${mountpoint} +rmdir ${mountpoint} + +# cleanup possibly old journal +rm /var/lib/ceph/journal/ceph-${osdid} + +# http://tracker.ceph.com/issues/4031 +mount ${disk}1 /var/lib/ceph/osd/ceph-${osdid} +ceph-osd --cluster=ceph -i ${osdid} --mkjournal -f +umount /var/lib/ceph/osd/ceph-${osdid} + +echo "I: activating the new OSD" +ceph-disk-activate --mount ${disk}1 diff --git a/modules/ceph/files/logrotate-radosgw b/modules/ceph/files/logrotate-radosgw new file mode 100644 index 0000000..7fb3391 --- /dev/null +++ b/modules/ceph/files/logrotate-radosgw @@ -0,0 +1,24 @@ +/var/log/radosgw/*.log { + rotate 7 + daily + compress + sharedscripts + postrotate + if which invoke-rc.d > /dev/null 2>&1 && [ -x `which invoke-rc.d` ]; then + invoke-rc.d radosgw reload >/dev/null + elif which service > /dev/null 2>&1 && [ -x `which service` ]; then + service radosgw reload >/dev/null + fi + # Possibly reload twice, but depending on ceph.conf the reload above may be a no-op + if which initctl > /dev/null 2>&1 && [ -x `which initctl` ]; then + # upstart reload isn't very helpful here: + # https://bugs.launchpad.net/upstart/+bug/1012938 + initctl list \ + | sed -n 's/^\(radosgw\+\)[ \t]\+(\([^ \/]\+\)\/\([^ \/]\+\))[ \t]\+start\/.*$/\1 cluster=\2 id=\3/p' \ + | while read l; do + initctl reload -- $l 2>/dev/null || : + done + fi + endscript + missingok +} diff --git a/modules/ceph/manifests/bootstrap_key.pp b/modules/ceph/manifests/bootstrap_key.pp new file mode 100644 index 0000000..1fdf3ce --- /dev/null +++ b/modules/ceph/manifests/bootstrap_key.pp @@ -0,0 +1,34 @@ +define ceph::bootstrap_key($type, $cluster='ceph') { + $keyring = "/var/lib/ceph/bootstrap-${type}/${cluster}.keyring" + + file { "/var/lib/ceph/bootstrap-${type}": + ensure => directory, + mode => '0644', + owner => 'root', + group => 'root', + } + + # ping-pong trickery to securely do permissions, puppet has no umask on exec + file { $keyring: + ensure => present, + mode => '0600', + owner => 'root', + group => 'root', + require => File["/var/lib/ceph/bootstrap-${type}"], + } + + $caps = $type ? { + 'osd' => 'mon "allow command osd create ...; allow command osd crush set ...; allow command auth add * osd allow\ * mon allow\ rwx; allow command mon getmap"', + 'mds' => 'mon "allow command auth get-or-create * osd allow\ * mds allow mon allow\ rwx; allow command mon getmap"', + } + + exec { "ceph bootstrap ${keyring}": + command => "/usr/bin/ceph \ + --cluster=${cluster} \ + auth get-or-create client.bootstrap-${type} \ + ${caps} \ + > ${keyring}", + unless => "/usr/bin/test -s ${keyring}", + require => File[$keyring], + } +} diff --git a/modules/ceph/manifests/init.pp b/modules/ceph/manifests/init.pp new file mode 100644 index 0000000..7d6d389 --- /dev/null +++ b/modules/ceph/manifests/init.pp @@ -0,0 +1,25 @@ +class ceph( + $admin_key, + $config={}, +) { + package { [ 'ceph', 'ceph-dbg' ]: + ensure => present, + } + + file { '/etc/ceph/ceph.conf': + ensure => present, + mode => '0444', + owner => 'root', + group => 'root', + content => template('ceph/ceph.conf.erb'), + require => Package['ceph'], + } + + exec { 'ceph client.admin': + command => "/usr/bin/ceph-authtool /etc/ceph/ceph.client.admin.keyring \ + --create-keyring --name=client.admin \ + --add-key=${admin_key}", + creates => '/etc/ceph/ceph.client.admin.keyring', + require => Package['ceph'], + } +} diff --git a/modules/ceph/manifests/mds.pp b/modules/ceph/manifests/mds.pp new file mode 100644 index 0000000..4aeee73 --- /dev/null +++ b/modules/ceph/manifests/mds.pp @@ -0,0 +1,7 @@ +class ceph::mds { + Class['ceph::mds'] -> Class['ceph'] + + ceph::bootstrap_key { 'ceph-mds': + type => 'mds', + } +} diff --git a/modules/ceph/manifests/mon.pp b/modules/ceph/manifests/mon.pp new file mode 100644 index 0000000..46a54df --- /dev/null +++ b/modules/ceph/manifests/mon.pp @@ -0,0 +1,47 @@ +class ceph::mon( + $monitor_secret, +) { + Class['ceph::mon'] -> Class['ceph'] + + $cluster = 'ceph' + $mon_data = "/var/lib/ceph/mon/ceph-${::hostname}" + $keyring = "/var/lib/ceph/tmp/${cluster}-${::hostname}.mon.keyring" + + file { $mon_data: + ensure => directory, + mode => '0600', + owner => 'root', + group => 'root', + } + + exec { 'ceph-mon-keyring': + command => "/usr/bin/ceph-authtool \ + '${keyring}' \ + --create-keyring \ + --name=mon. \ + --add-key='${monitor_secret}' \ + --cap mon 'allow *'", + creates => $keyring, + unless => "/usr/bin/test -e ${mon_data}/keyring", + before => Exec['ceph-mon-mkfs'], + } + + exec { 'ceph-mon-mkfs': + command => "/usr/bin/ceph-mon --mkfs \ + -i ${::hostname} \ + --keyring ${keyring}", + creates => "${mon_data}/keyring", + notify => Exec['ceph-create-keys'], + } + + exec { 'ceph-create-keys': + command => "/usr/bin/ceph --name=mon. --keyring=${mon_data}/keyring \ + auth add client.admin \ + --in-file=/etc/ceph/ceph.client.admin.keyring \ + mon 'allow *' osd 'allow *' mds allow", + onlyif => "/usr/bin/ceph \ + --admin-daemon /var/run/ceph/ceph-mon.${::hostname}.asok \ + mon_status | egrep -v '\"state\": \"(leader|peon)\"'", + refreshonly => true, + } +} diff --git a/modules/ceph/manifests/osd.pp b/modules/ceph/manifests/osd.pp new file mode 100644 index 0000000..da334dd --- /dev/null +++ b/modules/ceph/manifests/osd.pp @@ -0,0 +1,23 @@ +class ceph::osd { + Class['ceph::osd'] -> Class['ceph'] + + package { ['parted', 'gdisk' ]: + ensure => present, + } + + ceph::bootstrap_key { 'ceph-osd': + type => 'osd', + } + + file { '/usr/local/sbin/ceph-add-disk': + ensure => present, + owner => root, + group => root, + mode => '0555', + source => 'puppet:///modules/ceph/ceph-add-disk', + } + + file { '/var/lib/ceph/journal': + ensure => directory, + } +} diff --git a/modules/ceph/manifests/radosgw.pp b/modules/ceph/manifests/radosgw.pp new file mode 100644 index 0000000..bf3f896 --- /dev/null +++ b/modules/ceph/manifests/radosgw.pp @@ -0,0 +1,60 @@ +class ceph::radosgw( + $servername='localhost', + $serveradmin='webmaster@localhost', +) { + Class['ceph::radosgw'] -> Class['ceph'] + + package { [ 'radosgw', 'radosgw-dbg' ]: + ensure => present, + } + + service { 'radosgw id=radosgw': + ensure => 'running', + hasrestart => true, + hasstatus => true, + provider => 'upstart', + } + + $id = 'client.radosgw' + $keyfname = "/etc/ceph/ceph.${id}.keyring" + exec { "ceph auth ${id}": + command => "/usr/bin/ceph \ + auth get-or-create \ + ${id} \ + mon 'allow r' osd 'allow rwx' > ${keyfname}", + creates => $keyfname, + } + + # for <= bobtail, http://tracker.newdream.net/issues/3813 + file { '/etc/logrotate.d/radosgw': + ensure => present, + source => 'puppet:///modules/ceph/logrotate-radosgw', + } + + class { 'apache': + default_mods => false, + serveradmin => $serveradmin, + } + apache::mod { 'fastcgi': + package => 'libapache2-mod-fastcgi', + } + apache::mod { 'rewrite': } + + file { '/etc/apache2/sites-available/radosgw': + ensure => present, + content => template('ceph/radosgw/vhost.erb'), + require => Package['apache2'], + notify => Service['apache2'], + } + file { '/etc/apache2/sites-enabled/radosgw': + ensure => link, + target => '../sites-available/radosgw', + require => File['/etc/apache2/sites-available/radosgw'], + notify => Service['apache2'], + } + file { '/etc/apache2/sites-enabled/000default': + ensure => absent, + require => Package['apache2'], + notify => Service['apache2'], + } +} diff --git a/modules/ceph/templates/ceph.conf.erb b/modules/ceph/templates/ceph.conf.erb new file mode 100644 index 0000000..9fe7e95 --- /dev/null +++ b/modules/ceph/templates/ceph.conf.erb @@ -0,0 +1,54 @@ +[global] + auth cluster required = cephx + auth service required = cephx + auth client required = cephx + cephx require signatures = true + + keyring = /etc/ceph/$cluster.$name.keyring + + fsid = <%= @config['fsid'] %> + mon host = <%= @config['mon_addresses'].join(' ') %> + mon initial members = <%= @config['mon_initial_members'].join(' ') %> +<% if (! @config['global'].nil?) -%> + + <%- @config['global'].sort.each do |k, v| -%> + <%= k %> = <%= v %> + <%- end -%> +<% end -%> + +[mon] + keyring = /var/lib/ceph/mon/$cluster-$id/keyring +<% if (! @config['mon'].nil?) -%> + + <%- @config['mon'].sort.each do |k, v| -%> + <%= k %> = <%= v %> + <%- end -%> +<% end -%> + +[osd] + keyring = /var/lib/ceph/osd/$cluster-$id/keyring +<% if (! @config['osd'].nil?) -%> + + <%- @config['osd'].sort.each do |k, v| -%> + <%= k %> = <%= v %> + <%- end -%> +<% end -%> + +[mds] + keyring = /var/lib/ceph/mds/$cluster-$id/keyring +<% if (! @config['mds'].nil?) -%> + + <%- @config['mds'].sort.each do |key, value| -%> + <%= key %> = <%= value %> + <%- end -%> +<% end -%> + +[client.radosgw] + log file = /var/log/radosgw/radosgw.log + rgw socket path = /run/radosgw.sock +<% if (! @config['radosgw'].nil?) -%> + + <%- @config['radosgw'].sort.each do |key, value| -%> + <%= key %> = <%= value %> + <%- end -%> +<% end -%> diff --git a/modules/ceph/templates/radosgw/vhost.erb b/modules/ceph/templates/radosgw/vhost.erb new file mode 100644 index 0000000..0944fbe --- /dev/null +++ b/modules/ceph/templates/radosgw/vhost.erb @@ -0,0 +1,34 @@ +StartServers 5 +ServerLimit 50 +MaxClients 800 +FastCgiExternalServer /var/www/dummyradosgw.fcgi -socket /run/radosgw.sock + +LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" proxy_combined +LogFormat "%{X-Forwarded-For}i %h %l %u %t \"%r\" %>s %O \"%{Referer}i\" \"%{User-Agent}i\"" proxy_debug + +<VirtualHost *:80> + ServerName <%= @servername %> + ServerAdmin <%= @serveradmin %> + ServerSignature Off + DocumentRoot /var/www/ + + RewriteEngine On + RewriteCond %{REQUEST_URI} !^/monitoring/ + RewriteCond %{REQUEST_URI} !^/($|index.html|favicon.ico|robots.txt) + RewriteRule ^/(.*) /dummyradosgw.fcgi?params=$1&%{QUERY_STRING} [E=HTTP_AUTHORIZATION:%{HTTP:Authorization},L] + + <IfModule mod_fastcgi.c> + <Directory /var/www/> + Options +ExecCGI + AllowOverride All + Order allow,deny + Allow from all + AuthBasicAuthoritative Off + </Directory> + </IfModule> + + AllowEncodedSlashes On + + ErrorLog /var/log/apache2/error.log + CustomLog /var/log/apache2/access.log proxy_debug +</VirtualHost> -- To view, visit https://gerrit.wikimedia.org/r/60997 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia3dc6838fc776628da1cf9d9cc1e9fff6899f5b1 Gerrit-PatchSet: 4 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Faidon <fai...@wikimedia.org> Gerrit-Reviewer: Faidon <fai...@wikimedia.org> Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits