Re: [pve-devel] [PATCH manager] ceph: extend pveceph purge

2020-05-04 Thread Thomas Lamprecht
On 5/3/20 5:53 PM, Alwin Antreich wrote:
> to clean service directories as well as disable and stop Ceph services.
> Addtionally provide the option to remove crash and log information.
> 
> This patch is in addtion to #2607, as the current cleanup doesn't allow
> to re-configure Ceph, without manual steps during purge.
> 
> Signed-off-by: Alwin Antreich 
> ---
>  PVE/CLI/pveceph.pm | 47 +-
>  PVE/Ceph/Tools.pm  | 71 ++
>  2 files changed, 99 insertions(+), 19 deletions(-)
> 
> diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
> index 064ae545..e77cca2b 100755
> --- a/PVE/CLI/pveceph.pm
> +++ b/PVE/CLI/pveceph.pm
> @@ -18,6 +18,7 @@ use PVE::Storage;
>  use PVE::Tools qw(run_command);
>  use PVE::JSONSchema qw(get_standard_option);
>  use PVE::Ceph::Tools;
> +use PVE::Ceph::Services;
>  use PVE::API2::Ceph;
>  use PVE::API2::Ceph::FS;
>  use PVE::API2::Ceph::MDS;
> @@ -49,25 +50,57 @@ __PACKAGE__->register_method ({
>  parameters => {
>   additionalProperties => 0,
>   properties => {
> + logs => {
> + description => 'Additionally purge Ceph logs, /var/log/ceph.',
> + type => 'boolean',
> + optional => 1,
> + },
> + crash => {
> + description => 'Additionally purge Ceph crash logs, 
> /var/lib/ceph/crash.',
> + type => 'boolean',
> + optional => 1,
> + },
>   },
>  },
>  returns => { type => 'null' },
>  code => sub {
>   my ($param) = @_;
>  
> - my $monstat;
> + my $message;
> + my $pools = [];
> + my $monstat = {};
> + my $mdsstat = {};
> + my $osdstat = [];
>  
>   eval {
>   my $rados = PVE::RADOS->new();
> - my $monstat = $rados->mon_command({ prefix => 'mon_status' });
> + $pools = PVE::Ceph::Tools::ls_pools(undef, $rados);
> + $monstat = PVE::Ceph::Services::get_services_info('mon', undef, 
> $rados);
> + $mdsstat = PVE::Ceph::Services::get_services_info('mds', undef, 
> $rados);
> + $osdstat = $rados->mon_command({ prefix => 'osd metadata' });
>   };
> - my $err = $@;

maybe still a `warn $@ if $@` 

Nonetheless of above I get "unable to get monitor info from DNS SRV with 
service name: ceph-mon"
but with the warn I see that the rados commands failed too, giving some context.

>  
> - die "detected running ceph services- unable to purge data\n"
> - if !$err;
> + my $osd = map { $_->{hostname} eq $nodename ? 1 : () } @$osdstat;

hmm, the map here is not really intuitive, IMO, why not grep {} ? as you check
if some condition is there not really mapping anything to something else.

my $osd = grep { $_->{hostname} eq $nodename } @$osdstat;

is even shorter :)

> + my $mds = map { $mdsstat->{$_}->{host} eq $nodename ? 1 : () } keys 
> %$mdsstat;
> + my $mon = map { $monstat->{$_}->{host} eq $nodename ? 1 : () } keys 
> %$monstat;

same as above for above two.

> +
> + # no pools = no data
> + $message .= "- remove pools, this will !!DESTROY DATA!!\n" if @$pools;
> + $message .= "- remove active OSD on $nodename\n" if $osd;
> + $message .= "- remove active MDS on $nodename\n" if $mds;
> + $message .= "- remove other MONs, $nodename is not the last MON\n"
> + if scalar(keys %$monstat) > 1 && $mon;
> +
> + # display all steps at once
> + die "Unable to purge Ceph!\n\nTo continue:\n$message" if $message;
> +
> + my $services = PVE::Ceph::Services::get_local_services();
> + $services->{mon} = $monstat if $mon;
> + $services->{crash}->{$nodename} = { direxists => 1 } if $param->{crash};
> + $services->{logs}->{$nodename} = { direxists => 1 } if $param->{logs};
>  
> - # fixme: this is dangerous - should we really support this function?
> - PVE::Ceph::Tools::purge_all_ceph_files();
> + PVE::Ceph::Tools::purge_all_ceph_services($services);
> + PVE::Ceph::Tools::purge_all_ceph_files($services);
>  
>   return undef;
>  }});
> diff --git a/PVE/Ceph/Tools.pm b/PVE/Ceph/Tools.pm
> index e6225b78..09d22d36 100644
> --- a/PVE/Ceph/Tools.pm
> +++ b/PVE/Ceph/Tools.pm
> @@ -11,6 +11,8 @@ use JSON;
>  use PVE::Tools qw(run_command dir_glob_foreach);
>  use PVE::Cluster qw(cfs_read_file);
>  use PVE::RADOS;
> +use PVE::Ceph::Services;
> +use PVE::CephConfig;
>  
>  my $ccname = 'ceph'; # ceph cluster name
>  my $ceph_cfgdir = "/etc/ceph";
> @@ -42,6 +44,7 @@ my $config_hash = {
>  ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
>  ceph_mds_data_dir => $ceph_mds_data_dir,
>  long_rados_timeout => 60,
> +ceph_cfgpath => $ceph_cfgpath,
>  };
>  
>  sub get_local_version {
> @@ -89,20 +92,64 @@ sub get_config {
>  }
>  
>  sub purge_all_ceph_files {
> -# fixme: this is very dangerous - should we really support this function?
> -
> -unlink $ceph_cfgpath;
> -
> -unlink $pve_ceph_cfgpath;
> -unlink 

[pve-devel] [PATCH manager] ceph: extend pveceph purge

2020-05-03 Thread Alwin Antreich
to clean service directories as well as disable and stop Ceph services.
Addtionally provide the option to remove crash and log information.

This patch is in addtion to #2607, as the current cleanup doesn't allow
to re-configure Ceph, without manual steps during purge.

Signed-off-by: Alwin Antreich 
---
 PVE/CLI/pveceph.pm | 47 +-
 PVE/Ceph/Tools.pm  | 71 ++
 2 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
index 064ae545..e77cca2b 100755
--- a/PVE/CLI/pveceph.pm
+++ b/PVE/CLI/pveceph.pm
@@ -18,6 +18,7 @@ use PVE::Storage;
 use PVE::Tools qw(run_command);
 use PVE::JSONSchema qw(get_standard_option);
 use PVE::Ceph::Tools;
+use PVE::Ceph::Services;
 use PVE::API2::Ceph;
 use PVE::API2::Ceph::FS;
 use PVE::API2::Ceph::MDS;
@@ -49,25 +50,57 @@ __PACKAGE__->register_method ({
 parameters => {
additionalProperties => 0,
properties => {
+   logs => {
+   description => 'Additionally purge Ceph logs, /var/log/ceph.',
+   type => 'boolean',
+   optional => 1,
+   },
+   crash => {
+   description => 'Additionally purge Ceph crash logs, 
/var/lib/ceph/crash.',
+   type => 'boolean',
+   optional => 1,
+   },
},
 },
 returns => { type => 'null' },
 code => sub {
my ($param) = @_;
 
-   my $monstat;
+   my $message;
+   my $pools = [];
+   my $monstat = {};
+   my $mdsstat = {};
+   my $osdstat = [];
 
eval {
my $rados = PVE::RADOS->new();
-   my $monstat = $rados->mon_command({ prefix => 'mon_status' });
+   $pools = PVE::Ceph::Tools::ls_pools(undef, $rados);
+   $monstat = PVE::Ceph::Services::get_services_info('mon', undef, 
$rados);
+   $mdsstat = PVE::Ceph::Services::get_services_info('mds', undef, 
$rados);
+   $osdstat = $rados->mon_command({ prefix => 'osd metadata' });
};
-   my $err = $@;
 
-   die "detected running ceph services- unable to purge data\n"
-   if !$err;
+   my $osd = map { $_->{hostname} eq $nodename ? 1 : () } @$osdstat;
+   my $mds = map { $mdsstat->{$_}->{host} eq $nodename ? 1 : () } keys 
%$mdsstat;
+   my $mon = map { $monstat->{$_}->{host} eq $nodename ? 1 : () } keys 
%$monstat;
+
+   # no pools = no data
+   $message .= "- remove pools, this will !!DESTROY DATA!!\n" if @$pools;
+   $message .= "- remove active OSD on $nodename\n" if $osd;
+   $message .= "- remove active MDS on $nodename\n" if $mds;
+   $message .= "- remove other MONs, $nodename is not the last MON\n"
+   if scalar(keys %$monstat) > 1 && $mon;
+
+   # display all steps at once
+   die "Unable to purge Ceph!\n\nTo continue:\n$message" if $message;
+
+   my $services = PVE::Ceph::Services::get_local_services();
+   $services->{mon} = $monstat if $mon;
+   $services->{crash}->{$nodename} = { direxists => 1 } if $param->{crash};
+   $services->{logs}->{$nodename} = { direxists => 1 } if $param->{logs};
 
-   # fixme: this is dangerous - should we really support this function?
-   PVE::Ceph::Tools::purge_all_ceph_files();
+   PVE::Ceph::Tools::purge_all_ceph_services($services);
+   PVE::Ceph::Tools::purge_all_ceph_files($services);
 
return undef;
 }});
diff --git a/PVE/Ceph/Tools.pm b/PVE/Ceph/Tools.pm
index e6225b78..09d22d36 100644
--- a/PVE/Ceph/Tools.pm
+++ b/PVE/Ceph/Tools.pm
@@ -11,6 +11,8 @@ use JSON;
 use PVE::Tools qw(run_command dir_glob_foreach);
 use PVE::Cluster qw(cfs_read_file);
 use PVE::RADOS;
+use PVE::Ceph::Services;
+use PVE::CephConfig;
 
 my $ccname = 'ceph'; # ceph cluster name
 my $ceph_cfgdir = "/etc/ceph";
@@ -42,6 +44,7 @@ my $config_hash = {
 ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
 ceph_mds_data_dir => $ceph_mds_data_dir,
 long_rados_timeout => 60,
+ceph_cfgpath => $ceph_cfgpath,
 };
 
 sub get_local_version {
@@ -89,20 +92,64 @@ sub get_config {
 }
 
 sub purge_all_ceph_files {
-# fixme: this is very dangerous - should we really support this function?
-
-unlink $ceph_cfgpath;
-
-unlink $pve_ceph_cfgpath;
-unlink $pve_ckeyring_path;
-unlink $pve_mon_key_path;
-
-unlink $ceph_bootstrap_osd_keyring;
-unlink $ceph_bootstrap_mds_keyring;
+my ($services) = @_;
+my $is_local_mon;
+my $monlist = [ split(',', 
PVE::CephConfig::get_monaddr_list($pve_ceph_cfgpath)) ];
+
+foreach my $ceph (keys %$services) {
+   my $type = $services->{$ceph};
+   next if (!%$type);
+
+   foreach my $name (keys %$type) {
+   my $dir_exists = $type->{$name}->{direxists};
+
+   $is_local_mon = grep($type->{$name}->{addr}, @$monlist)
+   if $ceph eq 'mon';
+
+   my $path = "/var/lib/ceph/$ceph";
+   $path =