On 12/06/2017 03:29 PM, Fabian Grünbichler wrote: > some nits inline, most not introduced in the patch itself ;) > > On Mon, Dec 04, 2017 at 12:11:09PM +0100, Thomas Lamprecht wrote: >> Factor out common code, which will be used by the new API endpoint to >> join a cluster and the old legacy SSH method which we will keep for a >> bit. >> >> Signed-off-by: Thomas Lamprecht <t.lampre...@proxmox.com> >> --- >> data/PVE/CLI/pvecm.pm | 152 >> ++------------------------------------------------ >> data/PVE/Cluster.pm | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 151 insertions(+), 147 deletions(-) >> >> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm >> index a92f831..7aedd3d 100755 >> --- a/data/PVE/CLI/pvecm.pm >> +++ b/data/PVE/CLI/pvecm.pm >> @@ -25,40 +25,6 @@ my $backupdir = "/var/lib/pve-cluster/backup"; >> my $dbfile = "$libdir/config.db"; >> my $authfile = "/etc/corosync/authkey"; >> >> -sub backup_database { >> - >> - print "backup old database\n"; >> - >> - mkdir $backupdir; >> - >> - my $ctime = time(); >> - my $cmd = [ >> - ['echo', '.dump'], >> - ['sqlite3', $dbfile], >> - ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"], >> - ]; >> - >> - run_command($cmd, 'errmsg' => "cannot backup old database\n"); >> - >> - # purge older backup >> - my $maxfiles = 10; >> - >> - my @bklist = (); >> - foreach my $fn (<$backupdir/config-*.sql.gz>) { >> - if ($fn =~ m!/config-(\d+)\.sql.gz$!) { >> - push @bklist, [$fn, $1]; >> - } >> - } >> - >> - @bklist = sort { $b->[1] <=> $a->[1] } @bklist; >> - >> - while (scalar (@bklist) >= $maxfiles) { >> - my $d = pop @bklist; >> - print "delete old backup '$d->[0]'\n"; >> - unlink $d->[0]; >> - } >> -} >> - >> >> __PACKAGE__->register_method ({ >> name => 'keygen', >> @@ -300,67 +266,10 @@ __PACKAGE__->register_method ({ >> PVE::Cluster::setup_rootsshconfig(); >> PVE::Cluster::setup_ssh_keys(); >> >> + PVE::Cluster::assert_joinable($param->{ring0_addr}, >> $param->{ring1_addr}, $param->{force}); >> + >> my $host = $param->{hostname}; >> >> - my ($errors, $warnings) = ('', ''); >> - >> - my $error = sub { >> - my ($msg, $suppress) = @_; >> - >> - if ($suppress) { >> - $warnings .= "* $msg\n"; >> - } else { >> - $errors .= "* $msg\n"; >> - } >> - }; >> - >> - if (!$param->{force}) { >> - >> - if (-f $authfile) { >> - &$error("authentication key '$authfile' already exists", >> $param->{force}); >> - } >> - >> - if (-f $clusterconf) { >> - &$error("cluster config '$clusterconf' already exists", >> $param->{force}); >> - } >> - >> - my $vmlist = PVE::Cluster::get_vmlist(); >> - if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) { >> - &$error("this host already contains virtual guests", >> $param->{force}); >> - } >> - >> - if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) { >> - &$error("corosync is already running, is this node already in a >> cluster?!", $param->{force}); >> - } >> - } >> - >> - # check if corosync ring IPs are configured on the current nodes >> interfaces >> - my $check_ip = sub { >> - my $ip = shift; >> - if (defined($ip)) { >> - if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) { >> - my $host = $ip; >> - eval { $ip = PVE::Network::get_ip_from_hostname($host); }; >> - if ($@) { >> - &$error("cannot use '$host': $@\n") ; >> - return; >> - } >> - } >> - >> - my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32"; >> - my $configured_ips = >> PVE::Network::get_local_ip_from_cidr($cidr); >> - >> - &$error("cannot use IP '$ip', it must be configured exactly >> once on local node!\n") >> - if (scalar(@$configured_ips) != 1); >> - } >> - }; >> - >> - &$check_ip($param->{ring0_addr}); >> - &$check_ip($param->{ring1_addr}); >> - >> - warn "warning, ignore the following errors:\n$warnings" if $warnings; >> - die "detected the following error(s):\n$errors" if $errors; >> - >> # make sure known_hosts is on local filesystem >> PVE::Cluster::ssh_unmerge_known_hosts(); >> >> @@ -372,11 +281,8 @@ __PACKAGE__->register_method ({ >> 'pvecm', 'addnode', $nodename, '--force', 1]; >> >> push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid}; >> - >> push @$cmd, '--votes', $param->{votes} if defined($param->{votes}); >> - >> push @$cmd, '--ring0_addr', $param->{ring0_addr} if >> defined($param->{ring0_addr}); >> - >> push @$cmd, '--ring1_addr', $param->{ring1_addr} if >> defined($param->{ring1_addr}); >> >> if (system (@$cmd) != 0) { >> @@ -394,58 +300,10 @@ __PACKAGE__->register_method ({ >> >> system(@$cmd) == 0 || die "can't rsync data from host '$host'\n"; >> >> - mkdir "/etc/corosync"; >> - my $confbase = basename($clusterconf); >> + my $corosync_conf = >> PVE::Tools::file_get_contents("$tmpdir/corosync.conf"); >> + my $corosync_authkey = >> PVE::Tools::file_get_contents("$tmpdir/authkey"); >> >> - $cmd = "cp '$tmpdir/$confbase' '/etc/corosync/$confbase'"; >> - system($cmd) == 0 || die "can't copy cluster configuration\n"; >> - >> - my $keybase = basename($authfile); >> - system ("cp '$tmpdir/$keybase' '$authfile'") == 0 || >> - die "can't copy '$tmpdir/$keybase' to '$authfile'\n"; >> - >> - print "stopping pve-cluster service\n"; >> - >> - system("umount $basedir -f >/dev/null 2>&1"); >> - system("systemctl stop pve-cluster") == 0 || >> - die "can't stop pve-cluster service\n"; >> - >> - backup_database(); >> - >> - unlink $dbfile; >> - >> - system("systemctl start pve-cluster") == 0 || >> - die "starting pve-cluster failed\n"; >> - >> - system("systemctl start corosync"); >> - >> - # wait for quorum >> - my $printqmsg = 1; >> - while (!PVE::Cluster::check_cfs_quorum(1)) { >> - if ($printqmsg) { >> - print "waiting for quorum..."; >> - STDOUT->flush(); >> - $printqmsg = 0; >> - } >> - sleep(1); >> - } >> - print "OK\n" if !$printqmsg; >> - >> - my $local_ip_address = PVE::Cluster::remote_node_ip($nodename); >> - >> - print "generating node certificates\n"; >> - PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address); >> - >> - print "merge known_hosts file\n"; >> - PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, >> 1); >> - >> - print "restart services\n"; >> - # restart pvedaemon (changed certs) >> - system("systemctl restart pvedaemon"); >> - # restart pveproxy (changed certs) >> - system("systemctl restart pveproxy"); >> - >> - print "successfully added node '$nodename' to cluster.\n"; >> + PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey); >> }; >> my $err = $@; >> >> diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm >> index 9a248ed..531a19d 100644 >> --- a/data/PVE/Cluster.pm >> +++ b/data/PVE/Cluster.pm >> @@ -1669,4 +1669,150 @@ sub ssh_info_to_command { >> return $cmd; >> } >> >> +sub assert_joinable { >> + my ($ring0_addr, $ring1_addr, $force) = @_; >> + >> + my $clusterconf = "/etc/pve/corosync.conf"; >> + my $authfile = "/etc/corosync/authkey"; > > not sure if we want to re-use those somewhere else at some point and > move them to the other paths in the beginning of the file? >
They where on top of the CLIHandler, so yes, probably. As this was the single place where we used it, I put it initially here and as it is only cosmetic I let it be for the RFC. AFAIK, the CLIHandler could need still a cleanup after this series for such things, some may have become unused. >> + >> + my ($errors, $warnings) = ('', ''); >> + my $error = sub { >> + my ($msg, $suppress) = @_; >> + >> + if ($suppress) { >> + $warnings .= "* $msg\n"; >> + } else { >> + $errors .= "* $msg\n"; >> + } >> + }; >> + >> + if (-f $authfile) { >> + $error->("authentication key '$authfile' already exists", $force); >> + } >> + >> + if (-f $clusterconf) { >> + $error->("cluster config '$clusterconf' already exists", $force); >> + } > > you changed the behaviour (probably to what was originally intended, but > should be mentioned in the commit message ;)) > Oh, sorry, will mention it in the commit message in v3. >> + >> + my $vmlist = PVE::Cluster::get_vmlist(); >> + if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) { >> + $error->("this host already contains virtual guests", $force); >> + } >> + >> + if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) { >> + $error->("corosync is already running, is this node already in a >> cluster?!", $force); >> + } >> + >> + # check if corosync ring IPs are configured on the current nodes >> interfaces >> + my $check_ip = sub { >> + my $ip = shift // return; >> + if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) { >> + my $host = $ip; >> + eval { $ip = PVE::Network::get_ip_from_hostname($host); }; >> + if ($@) { >> + $error->("cannot use '$host': $@\n", 1) ; >> + return; >> + } >> + } >> + >> + my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32"; >> + my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr); >> + >> + $error->("cannot use IP '$ip', it must be configured exactly once on >> local node!\n") >> + if (scalar(@$configured_ips) != 1); >> + }; >> + >> + $check_ip->($ring0_addr); >> + $check_ip->($ring1_addr); >> + >> + warn "warning, ignore the following errors:\n$warnings" if $warnings; >> + die "detected the following error(s):\n$errors" if $errors; >> +} >> + >> +my $backup_cfs_database = sub { >> + my ($dbfile) = @_; >> + >> + print "backup old database\n"; >> + >> + my $backupdir = "/var/lib/pve-cluster/backup"; >> + mkdir $backupdir; >> + >> + my $ctime = time(); >> + my $cmd = [ >> + ['echo', '.dump'], >> + ['sqlite3', $dbfile], >> + ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"], >> + ]; >> + >> + PVE::Tools::run_command($cmd, 'errmsg' => "cannot backup old >> database\n"); >> + >> + # purge older backup >> + my $maxfiles = 10; >> + >> + my @bklist = (); >> + foreach my $fn (<$backupdir/config-*.sql.gz>) { >> + if ($fn =~ m!/config-(\d+)\.sql.gz$!) { >> + push @bklist, [$fn, $1]; >> + } >> + } >> + >> + @bklist = sort { $b->[1] <=> $a->[1] } @bklist; >> + >> + while (scalar (@bklist) >= $maxfiles) { >> + my $d = pop @bklist; >> + print "delete old backup '$d->[0]'\n"; >> + unlink $d->[0]; >> + } >> +}; >> + >> +sub finish_join { >> + my ($nodename, $corosync_conf, $corosync_authkey) = @_; >> + >> + my $dbfile = "/var/lib/pve-cluster/config.db"; >> + my $localclusterconf = "/etc/corosync/corosync.conf"; >> + my $authfile = "/etc/corosync/authkey"; > > see above? > >> + >> + mkdir "/etc/corosync"; >> + PVE::Tools::file_set_contents($authfile, $corosync_authkey); >> + PVE::Tools::file_set_contents($localclusterconf, $corosync_conf); >> + >> + print "stopping pve-cluster service\n"; >> + >> + system("umount $basedir -f >/dev/null 2>&1"); > > this seems a bit harsh, maybe re-order it after stopping the service or > drop altogether? > Yes, this did not really caught my attention while copying the code. I'd rather drop it, if the service could be stopped it really *shouldn't* be running anymore >> + die "can't stop pve-cluster service\n" if system("systemctl stop >> pve-cluster") != 0; >> + >> + $backup_cfs_database->($dbfile); >> + unlink $dbfile; >> + >> + system("systemctl start pve-cluster") == 0 || die "starting pve-cluster >> failed\n"; >> + system("systemctl start corosync"); >> + >> + # wait for quorum >> + my $printqmsg = 1; >> + while (!PVE::Cluster::check_cfs_quorum(1)) { >> + if ($printqmsg) { >> + print "waiting for quorum..."; >> + STDOUT->flush(); >> + $printqmsg = 0; >> + } >> + sleep(1); >> + } >> + print "OK\n" if !$printqmsg; >> + >> + my $local_ip_address = PVE::Cluster::remote_node_ip($nodename); >> + >> + print "generating node certificates\n"; >> + PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address); >> + >> + print "merge known_hosts file\n"; >> + PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1); >> + >> + print "restart services\n"; >> + # restart pvedaemon and pveproxy (changed certs) >> + system("systemctl restart pvedaemon pveproxy"); >> + >> + print "successfully added node '$nodename' to cluster.\n"; >> +} >> + >> + >> 1; >> -- >> 2.11.0 _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel