The initial implementation of fetching a ha resource's rules in the scheduler and other users is rather inefficient as it needs to go through the raw rules hash map and gathers quite a lot of information which doesn't change unless the HA rules need to be re-translated anyway.
Therefore add a compile stage when re-translating the HA rules, which compiles each rule plugin's rules into a more compact data structure. This reduces the unnecessary recomputation of invariant data done in the hot path of the scheduler's select_service_node(...). Signed-off-by: Daniel Kral <d.k...@proxmox.com> --- I didn't add this to the commit message as I'm already referencing the future in quite a lot of these, so I'll put it here: The $node_affinity contains a `nodes` property for each ha resource $sid, because in a future patch series, we'll need more information about the $node_affinity to properly implement the migration blockers for node affinity rules as well. I didn't want to add a patch in that future series just to make a big unnecessary diff there, but if that's better I'll happily send a v2 for this. src/PVE/HA/Config.pm | 9 +-- src/PVE/HA/Manager.pm | 29 +++++---- src/PVE/HA/Rules.pm | 59 +++++++++++++++++ src/PVE/HA/Rules/NodeAffinity.pm | 82 ++++++++++++------------ src/PVE/HA/Rules/ResourceAffinity.pm | 95 ++++++++++++++-------------- src/test/test_failover1.pl | 10 +-- 6 files changed, 175 insertions(+), 109 deletions(-) diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm index bd87a0ab..af2a1cde 100644 --- a/src/PVE/HA/Config.pm +++ b/src/PVE/HA/Config.pm @@ -224,7 +224,7 @@ sub read_and_check_rules_config { return $rules; } -sub read_and_check_effective_rules_config { +sub read_and_compile_rules_config { my $rules = read_and_check_rules_config(); @@ -239,7 +239,7 @@ sub read_and_check_effective_rules_config { PVE::HA::Rules->transform($rules, $nodes); - return $rules; + return PVE::HA::Rules->compile($rules, $nodes); } sub write_rules_config { @@ -391,8 +391,9 @@ sub get_resource_motion_info { my $ss = $manager_status->{service_status}; my $ns = $manager_status->{node_status}; - my $rules = read_and_check_effective_rules_config(); - my ($together, $separate) = get_affinitive_resources($rules, $sid); + my $affinity = read_and_compile_rules_config(); + my ($together, $separate) = + get_affinitive_resources($affinity->{'resource-affinity'}, $sid); for my $csid (sort keys %$together) { next if !defined($ss->{$csid}); diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 3013d369..748667e8 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -121,11 +121,11 @@ sub flush_master_status { =head3 select_service_node(...) -=head3 select_service_node($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference) +=head3 select_service_node($affinity, $online_node_usage, $sid, $service_conf, $sd, $node_preference) Used to select the best fitting node for the service C<$sid>, with the -configuration C<$service_conf> and state C<$sd>, according to the rules defined -in C<$rules>, available node utilization in C<$online_node_usage>, and the +configuration C<$service_conf> and state C<$sd>, according to the affinity +in C<$affinity>, available node utilization in C<$online_node_usage>, and the given C<$node_preference>. The C<$node_preference> can be set to: @@ -143,20 +143,21 @@ The C<$node_preference> can be set to: =cut sub select_service_node { - my ($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_; + my ($affinity, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_; die "'$node_preference' is not a valid node_preference for select_service_node\n" if $node_preference !~ m/(none|best-score|try-next)/; my ($current_node, $tried_nodes, $maintenance_fallback) = $sd->@{qw(node failed_nodes maintenance_node)}; + my ($node_affinity, $resource_affinity) = $affinity->@{qw(node-affinity resource-affinity)}; my $online_nodes = { map { $_ => 1 } $online_node_usage->list_nodes() }; - my ($allowed_nodes, $pri_nodes) = get_node_affinity($rules, $sid, $online_nodes); + my ($allowed_nodes, $pri_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes); return undef if !%$pri_nodes; - my ($together, $separate) = get_resource_affinity($rules, $sid, $online_node_usage); + my ($together, $separate) = get_resource_affinity($resource_affinity, $sid, $online_node_usage); # stay on current node if possible (avoids random migrations) if ( @@ -418,7 +419,8 @@ sub execute_migration { my ($haenv, $ss) = $self->@{qw(haenv ss)}; - my ($together, $separate) = get_affinitive_resources($self->{rules}, $sid); + my $resource_affinity = $self->{affinity}->{'resource-affinity'}; + my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid); for my $csid (sort keys %$separate) { next if !defined($ss->{$csid}); @@ -746,7 +748,7 @@ sub manage { PVE::HA::Groups::migrate_groups_to_resources($self->{groups}, $sc); if ( - !$self->{rules} + !$self->{affinity} || $has_changed_nodelist || $new_rules->{digest} ne $self->{last_rules_digest} || $self->{groups}->{digest} ne $self->{last_groups_digest} @@ -756,11 +758,12 @@ sub manage { my $nodes = $self->{ns}->list_nodes(); my $messages = PVE::HA::Rules->transform($new_rules, $nodes); + my $new_affinity = PVE::HA::Rules->compile($new_rules, $nodes); $haenv->log('info', $_) for @$messages; - $self->{rules} = $new_rules; + $self->{affinity} = $new_affinity; - $self->{last_rules_digest} = $self->{rules}->{digest}; + $self->{last_rules_digest} = $new_rules->{digest}; $self->{last_groups_digest} = $self->{groups}->{digest}; $self->{last_services_digest} = $services_digest; } @@ -1020,7 +1023,7 @@ sub next_state_request_start { if ($self->{crs}->{rebalance_on_request_start}) { my $selected_node = select_service_node( - $self->{rules}, + $self->{affinity}, $self->{online_node_usage}, $sid, $cd, @@ -1187,7 +1190,7 @@ sub next_state_started { } my $node = select_service_node( - $self->{rules}, + $self->{affinity}, $self->{online_node_usage}, $sid, $cd, @@ -1306,7 +1309,7 @@ sub next_state_recovery { $self->recompute_online_node_usage(); # we want the most current node state my $recovery_node = select_service_node( - $self->{rules}, + $self->{affinity}, $self->{online_node_usage}, $sid, $cd, diff --git a/src/PVE/HA/Rules.pm b/src/PVE/HA/Rules.pm index a075feac..d7593532 100644 --- a/src/PVE/HA/Rules.pm +++ b/src/PVE/HA/Rules.pm @@ -47,6 +47,12 @@ Each I<rule plugin> is required to implement the methods C<L<type()>>, C<L<properties()>>, and C<L<options>> from the C<L<PVE::SectionConfig>> to extend the properties of this I<base plugin> with plugin-specific properties. +Each I<rule plugin> is required to implement the method +C<L<< plugin_compile()|/$class->plugin_compile(...) >>> to distill a compiled +representation of the more verbose C<$rules> from the config file, which is +returned by C<L<< compile()|/$class->compile(...) >>> to be more appropriate +and efficient for the scheduler and other users. + =head2 REGISTERING CHECKS In order to C<L<< register checks|/$class->register_check(...) >>> for a rule @@ -453,6 +459,59 @@ sub transform : prototype($$$) { return $messages; } +=head3 $class->plugin_compile(...) + +=head3 $class->plugin_compile($rules, $nodes) + +B<MANDATORY:> Must be implemented in a I<rule plugin>. + +Called in C<$class->compile($rules, $nodes)> in order to get a more compact +representation of the rule plugin's rules in C<$rules>, which holds only the +relevant information for the scheduler and other users. + +C<$nodes> is a list of the configured cluster nodes. + +=cut + +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + die "implement in subclass"; +} + +=head3 $class->compile(...) + +=head3 $class->compile($rules, $nodes) + +Compiles and returns a hash, where each key-value pair represents a rule +plugin's more compact representation compiled from the more verbose rules +defined in C<$rules>. + +C<$nodes> is a list of the configured cluster nodes. + +The transformation to the compact representation for each rule plugin is +implemented in C<L<< plugin_compile()|/$class->plugin_compile(...) >>>. + +=cut + +sub compile { + my ($class, $rules, $nodes) = @_; + + my $affinity = {}; + + for my $type (@$types) { + my $plugin = $class->lookup($type); + my $plugin_affinity = $plugin->plugin_compile($rules, $nodes); + + die "plugin_compile(...) of type '$type' must return hash reference\n" + if ref($plugin_affinity) ne 'HASH'; + + $affinity->{$type} = $plugin_affinity; + } + + return $affinity; +} + =head1 FUNCTIONS =cut diff --git a/src/PVE/HA/Rules/NodeAffinity.pm b/src/PVE/HA/Rules/NodeAffinity.pm index b7abf9a4..1af442ec 100644 --- a/src/PVE/HA/Rules/NodeAffinity.pm +++ b/src/PVE/HA/Rules/NodeAffinity.pm @@ -155,6 +155,40 @@ sub get_plugin_check_arguments { return $result; } +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + my $node_affinity = {}; + + PVE::HA::Rules::foreach_rule( + $rules, + sub { + my ($rule) = @_; + + my $effective_nodes = dclone($rule->{nodes}); + + # add remaining nodes with low priority for non-strict node affinity + if (!$rule->{strict}) { + for my $node (@$nodes) { + next if defined($effective_nodes->{$node}); + + $effective_nodes->{$node} = { priority => -1 }; + } + } + + for my $sid (keys $rule->{resources}->%*) { + $node_affinity->{$sid} = { + nodes => $effective_nodes, + }; + } + }, + type => 'node-affinity', + exclude_disabled_rules => 1, + ); + + return $node_affinity; +} + =head1 NODE AFFINITY RULE CHECKERS =cut @@ -217,30 +251,10 @@ __PACKAGE__->register_check( =cut -my $get_resource_node_affinity_rule = sub { - my ($rules, $sid) = @_; - - # with the current restriction a resource can only be in one node affinity rule - my $node_affinity_rule; - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule) = @_; - - $node_affinity_rule = dclone($rule) if !$node_affinity_rule; - }, - sid => $sid, - type => 'node-affinity', - exclude_disabled_rules => 1, - ); - - return $node_affinity_rule; -}; - -=head3 get_node_affinity($rules, $sid, $online_node_usage) +=head3 get_node_affinity($node_affinity, $sid, $online_node_usage) Returns a list of two hashes representing the node affinity of C<$sid> -according to the node affinity rules in C<$rules> and the available nodes in +according to the node affinity C<$node_affinity> and the available nodes in the C<$online_nodes> hash. The first hash is a hash set of available nodes, i.e. nodes where the @@ -251,31 +265,15 @@ If there are no available nodes at all, returns C<undef>. =cut -sub get_node_affinity : prototype($$$) { - my ($rules, $sid, $online_nodes) = @_; +sub get_node_affinity { + my ($node_affinity, $sid, $online_nodes) = @_; - my $node_affinity_rule = $get_resource_node_affinity_rule->($rules, $sid); - - # default to a node affinity rule with all available nodes - if (!$node_affinity_rule) { - for my $node (keys %$online_nodes) { - $node_affinity_rule->{nodes}->{$node} = { priority => 0 }; - } - } - - # add remaining nodes with low priority for non-strict node affinity rules - if (!$node_affinity_rule->{strict}) { - for my $node (keys %$online_nodes) { - next if defined($node_affinity_rule->{nodes}->{$node}); - - $node_affinity_rule->{nodes}->{$node} = { priority => -1 }; - } - } + return ($online_nodes, $online_nodes) if !defined($node_affinity->{$sid}->{nodes}); my $allowed_nodes = {}; my $prioritized_nodes = {}; - while (my ($node, $props) = each %{ $node_affinity_rule->{nodes} }) { + while (my ($node, $props) = each $node_affinity->{$sid}->{nodes}->%*) { next if !defined($online_nodes->{$node}); # node is offline $allowed_nodes->{$node} = 1; diff --git a/src/PVE/HA/Rules/ResourceAffinity.pm b/src/PVE/HA/Rules/ResourceAffinity.pm index f2d57ce6..c3cde6e8 100644 --- a/src/PVE/HA/Rules/ResourceAffinity.pm +++ b/src/PVE/HA/Rules/ResourceAffinity.pm @@ -100,6 +100,35 @@ sub get_plugin_check_arguments { return $result; } +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + my $positive = {}; + my $negative = {}; + + PVE::HA::Rules::foreach_rule( + $rules, + sub { + my ($rule, $ruleid) = @_; + + my $affinity_set = $rule->{affinity} eq 'positive' ? $positive : $negative; + + for my $sid (keys $rule->{resources}->%*) { + for my $csid (keys $rule->{resources}->%*) { + $affinity_set->{$sid}->{$csid} = 1 if $csid ne $sid; + } + } + }, + type => 'resource-affinity', + exclude_disabled_rules => 1, + ); + + return { + positive => $positive, + negative => $negative, + }; +} + =head1 RESOURCE AFFINITY RULE CHECKERS =cut @@ -403,12 +432,12 @@ __PACKAGE__->register_transform(sub { =cut -=head3 get_affinitive_resources($rules, $sid) +=head3 get_affinitive_resources($resource_affinity, $sid) Returns a list of two hash sets, where the first hash set contains the resources, which C<$sid> is positively affinitive to, and the second hash contains the resources, which C<$sid> is negatively affinitive to, acording to -the resource affinity rules in C<$rules>. +the resource's resource affinity in C<$resource_affinity>. Note that a resource C<$sid> becomes part of any negative affinity relation of its positively affinitive resources. @@ -429,36 +458,20 @@ affinitive to C<'ct:200'> and C<'ct:201'>, the returned value will be: =cut sub get_affinitive_resources : prototype($$) { - my ($rules, $sid) = @_; + my ($resource_affinity, $sid) = @_; - my $together = {}; - my $separate = {}; - - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule, $ruleid) = @_; - - my $affinity_set = $rule->{affinity} eq 'positive' ? $together : $separate; - - for my $csid (sort keys %{ $rule->{resources} }) { - $affinity_set->{$csid} = 1 if $csid ne $sid; - } - }, - sid => $sid, - type => 'resource-affinity', - exclude_disabled_rules => 1, - ); + my $together = $resource_affinity->{positive}->{$sid} // {}; + my $separate = $resource_affinity->{negative}->{$sid} // {}; return ($together, $separate); } -=head3 get_resource_affinity($rules, $sid, $online_node_usage) +=head3 get_resource_affinity($resource_affinity, $sid, $online_node_usage) Returns a list of two hashes, where the first describes the positive resource affinity and the second hash describes the negative resource affinity for -resource C<$sid> according to the resource affinity rules in C<$rules> and the -resource locations in C<$online_node_usage>. +resource C<$sid> according to the resource's resource affinity in +C<$resource_affinity> and the resource locations in C<$online_node_usage>. For the positive resource affinity of a resource C<$sid>, each element in the hash represents an online node, where other resources, which C<$sid> is in @@ -487,36 +500,26 @@ resource C<$sid> is in a negative affinity with, the returned value will be: =cut sub get_resource_affinity : prototype($$$) { - my ($rules, $sid, $online_node_usage) = @_; + my ($resource_affinity, $sid, $online_node_usage) = @_; my $together = {}; my $separate = {}; - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule) = @_; + my ($positive, $negative) = get_affinitive_resources($resource_affinity, $sid); - for my $csid (keys %{ $rule->{resources} }) { - next if $csid eq $sid; + for my $csid (keys $positive->%*) { + my $nodes = $online_node_usage->get_service_nodes($csid); + next if !$nodes || !@$nodes; # skip unassigned ha resources - my $nodes = $online_node_usage->get_service_nodes($csid); + $together->{$_}++ for @$nodes; + } - next if !$nodes || !@$nodes; # skip unassigned nodes + for my $csid (keys $negative->%*) { + my $nodes = $online_node_usage->get_service_nodes($csid); + next if !$nodes || !@$nodes; # skip unassigned ha resources - if ($rule->{affinity} eq 'positive') { - $together->{$_}++ for @$nodes; - } elsif ($rule->{affinity} eq 'negative') { - $separate->{$_} = 1 for @$nodes; - } else { - die "unimplemented resource affinity type $rule->{affinity}\n"; - } - } - }, - sid => $sid, - type => 'resource-affinity', - exclude_disabled_rules => 1, - ); + $separate->{$_} = 1 for @$nodes; + } return ($together, $separate); } diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl index 78a001eb..62476a93 100755 --- a/src/test/test_failover1.pl +++ b/src/test/test_failover1.pl @@ -20,11 +20,13 @@ node-affinity: prefer_node1 nodes node1 EOD +my $nodes = ['node1', 'node2', 'node3']; + +my $affinity = PVE::HA::Rules->compile($rules, $nodes); + # Relies on the fact that the basic plugin doesn't use the haenv. my $online_node_usage = PVE::HA::Usage::Basic->new(); -$online_node_usage->add_node("node1"); -$online_node_usage->add_node("node2"); -$online_node_usage->add_node("node3"); +$online_node_usage->add_node($_) for @$nodes; my $service_conf = { node => 'node1', @@ -43,7 +45,7 @@ sub test { my $select_node_preference = $try_next ? 'try-next' : 'none'; my $node = PVE::HA::Manager::select_service_node( - $rules, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference, + $affinity, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference, ); my (undef, undef, $line) = caller(); -- 2.47.2 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel