The initial implementation of fetching a ha resource's rules in the scheduler and other users is rather inefficient as it needs to go through the raw rules hash map and gathers quite a lot of information which doesn't change unless the HA rules need to be re-translated anyway.
Therefore add a compile stage when re-translating the HA rules, which compiles each rule plugin's rules into a more compact data structure. This reduces the unnecessary recomputation of invariant data done in the hot path of the scheduler's select_service_node(...). A benchmark done with NYTProf [0] shows the following runtime improvement, where each duration is the average time needed per call. The benchmark was done by starting up a 3-node cluster with 1200 configured HA resources, where each HA resource is in a node affinity rule and a negative resource affinity to two other HA resources. +----------------------------------+--------+-------+ | Function | Before | After | +----------------------------------+--------+-------+ | select_service_node | 10.5ms | 44µs | | get_node_affinity | 5.26ms | 10µs | | get_resource_affinity | 5.17ms | 12µs | +----------------------------------+--------+-------+ [0] https://metacpan.org/pod/Devel::NYTProf Signed-off-by: Daniel Kral <[email protected]> --- changes since v3: - do not add R-b and T-b from Michael because... - fix auto-vivification in get_resource_affinity(...), or more like propagate that fix from the granular accounting v4 series src/PVE/HA/Config.pm | 9 ++- src/PVE/HA/Manager.pm | 31 ++++---- src/PVE/HA/Rules.pm | 59 +++++++++++++++ src/PVE/HA/Rules/NodeAffinity.pm | 80 ++++++++++---------- src/PVE/HA/Rules/ResourceAffinity.pm | 107 ++++++++++++++------------- src/test/test_failover1.pl | 15 +++- 6 files changed, 188 insertions(+), 113 deletions(-) diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm index bd87a0ab..548a89e6 100644 --- a/src/PVE/HA/Config.pm +++ b/src/PVE/HA/Config.pm @@ -224,7 +224,7 @@ sub read_and_check_rules_config { return $rules; } -sub read_and_check_effective_rules_config { +sub read_and_compile_rules_config { my $rules = read_and_check_rules_config(); @@ -239,7 +239,7 @@ sub read_and_check_effective_rules_config { PVE::HA::Rules->transform($rules, $nodes); - return $rules; + return PVE::HA::Rules->compile($rules, $nodes); } sub write_rules_config { @@ -391,8 +391,9 @@ sub get_resource_motion_info { my $ss = $manager_status->{service_status}; my $ns = $manager_status->{node_status}; - my $rules = read_and_check_effective_rules_config(); - my ($together, $separate) = get_affinitive_resources($rules, $sid); + my $compiled_rules = read_and_compile_rules_config(); + my $resource_affinity = $compiled_rules->{'resource-affinity'}; + my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid); for my $csid (sort keys %$together) { next if !defined($ss->{$csid}); diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index a26bde76..e17e3209 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -126,12 +126,12 @@ sub flush_master_status { =head3 select_service_node(...) -=head3 select_service_node($rules, $online_node_usage, $sid, $service_conf, $ss, $node_preference) +=head3 select_service_node($compiled_rules, $online_node_usage, $sid, $service_conf, $ss, $node_preference) Used to select the best fitting node for the service C<$sid>, with the -configuration C<$service_conf>, according to the rules defined in C<$rules>, -available node utilization in C<$online_node_usage>, the service states in -C<$ss> and the given C<$node_preference>. +configuration C<$service_conf>, according to the rules defined in +C<$compiled_rules>, available node utilization in C<$online_node_usage>, the +service states in C<$ss> and the given C<$node_preference>. The C<$node_preference> can be set to: @@ -148,7 +148,7 @@ The C<$node_preference> can be set to: =cut sub select_service_node { - my ($rules, $online_node_usage, $sid, $service_conf, $ss, $node_preference) = @_; + my ($compiled_rules, $online_node_usage, $sid, $service_conf, $ss, $node_preference) = @_; die "'$node_preference' is not a valid node_preference for select_service_node\n" if $node_preference !~ m/(none|best-score|try-next)/; @@ -156,13 +156,15 @@ sub select_service_node { my $sd = $ss->{$sid}; my ($current_node, $tried_nodes, $maintenance_fallback) = $sd->@{qw(node failed_nodes maintenance_node)}; + my ($node_affinity, $resource_affinity) = + $compiled_rules->@{qw(node-affinity resource-affinity)}; my $online_nodes = { map { $_ => 1 } $online_node_usage->list_nodes() }; - my ($allowed_nodes, $pri_nodes) = get_node_affinity($rules, $sid, $online_nodes); + my ($allowed_nodes, $pri_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes); return undef if !%$pri_nodes; - my ($together, $separate) = get_resource_affinity($rules, $sid, $ss, $online_nodes); + my ($together, $separate) = get_resource_affinity($resource_affinity, $sid, $ss, $online_nodes); # stay on current node if possible (avoids random migrations) if ( @@ -389,7 +391,8 @@ sub execute_migration { my ($haenv, $ss) = $self->@{qw(haenv ss)}; - my ($together, $separate) = get_affinitive_resources($self->{rules}, $sid); + my $resource_affinity = $self->{compiled_rules}->{'resource-affinity'}; + my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid); for my $csid (sort keys %$separate) { next if !defined($ss->{$csid}); @@ -719,7 +722,7 @@ sub manage { PVE::HA::Groups::migrate_groups_to_resources($self->{groups}, $sc); if ( - !$self->{rules} + !$self->{compiled_rules} || $has_changed_nodelist || $new_rules->{digest} ne $self->{last_rules_digest} || $self->{groups}->{digest} ne $self->{last_groups_digest} @@ -731,9 +734,9 @@ sub manage { my $messages = PVE::HA::Rules->transform($new_rules, $nodes); $haenv->log('info', $_) for @$messages; - $self->{rules} = $new_rules; + $self->{compiled_rules} = PVE::HA::Rules->compile($new_rules, $nodes); - $self->{last_rules_digest} = $self->{rules}->{digest}; + $self->{last_rules_digest} = $new_rules->{digest}; $self->{last_groups_digest} = $self->{groups}->{digest}; $self->{last_services_digest} = $services_digest; } @@ -991,7 +994,7 @@ sub next_state_request_start { if ($self->{crs}->{rebalance_on_request_start}) { my $selected_node = select_service_node( - $self->{rules}, + $self->{compiled_rules}, $self->{online_node_usage}, $sid, $cd, @@ -1158,7 +1161,7 @@ sub next_state_started { } my $node = select_service_node( - $self->{rules}, + $self->{compiled_rules}, $self->{online_node_usage}, $sid, $cd, @@ -1272,7 +1275,7 @@ sub next_state_recovery { my $fenced_node = $sd->{node}; # for logging purpose my $recovery_node = select_service_node( - $self->{rules}, + $self->{compiled_rules}, $self->{online_node_usage}, $sid, $cd, diff --git a/src/PVE/HA/Rules.pm b/src/PVE/HA/Rules.pm index a075feac..69c53356 100644 --- a/src/PVE/HA/Rules.pm +++ b/src/PVE/HA/Rules.pm @@ -47,6 +47,12 @@ Each I<rule plugin> is required to implement the methods C<L<type()>>, C<L<properties()>>, and C<L<options>> from the C<L<PVE::SectionConfig>> to extend the properties of this I<base plugin> with plugin-specific properties. +Each I<rule plugin> is required to implement the method +C<L<< plugin_compile()|/$class->plugin_compile(...) >>> to distill a compiled +representation of the more verbose C<$rules> from the config file, which is +returned by C<L<< compile()|/$class->compile(...) >>> to be more appropriate +and efficient for the scheduler and other users. + =head2 REGISTERING CHECKS In order to C<L<< register checks|/$class->register_check(...) >>> for a rule @@ -453,6 +459,59 @@ sub transform : prototype($$$) { return $messages; } +=head3 $class->plugin_compile(...) + +=head3 $class->plugin_compile($rules, $nodes) + +B<MANDATORY:> Must be implemented in a I<rule plugin>. + +Called in C<$class->compile($rules, $nodes)> in order to get a more compact +representation of the rule plugin's rules in C<$rules>, which holds only the +relevant information for the scheduler and other users. + +C<$nodes> is a list of the configured cluster nodes. + +=cut + +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + die "implement in subclass"; +} + +=head3 $class->compile(...) + +=head3 $class->compile($rules, $nodes) + +Compiles and returns a hash, where each key-value pair represents a rule +plugin's more compact representation compiled from the more verbose rules +defined in C<$rules>. + +C<$nodes> is a list of the configured cluster nodes. + +The transformation to the compact representation for each rule plugin is +implemented in C<L<< plugin_compile()|/$class->plugin_compile(...) >>>. + +=cut + +sub compile { + my ($class, $rules, $nodes) = @_; + + my $compiled_rules = {}; + + for my $type (@$types) { + my $plugin = $class->lookup($type); + my $compiled_plugin_rules = $plugin->plugin_compile($rules, $nodes); + + die "plugin_compile(...) of type '$type' must return hash reference\n" + if ref($compiled_plugin_rules) ne 'HASH'; + + $compiled_rules->{$type} = $compiled_plugin_rules; + } + + return $compiled_rules; +} + =head1 FUNCTIONS =cut diff --git a/src/PVE/HA/Rules/NodeAffinity.pm b/src/PVE/HA/Rules/NodeAffinity.pm index 111ff6fe..adef97d0 100644 --- a/src/PVE/HA/Rules/NodeAffinity.pm +++ b/src/PVE/HA/Rules/NodeAffinity.pm @@ -155,6 +155,40 @@ sub get_plugin_check_arguments { return $result; } +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + my $node_affinity = {}; + + PVE::HA::Rules::foreach_rule( + $rules, + sub { + my ($rule) = @_; + + my $effective_nodes = dclone($rule->{nodes}); + + # add remaining nodes with low priority for non-strict node affinity + if (!$rule->{strict}) { + for my $node (@$nodes) { + next if defined($effective_nodes->{$node}); + + $effective_nodes->{$node} = { priority => -1 }; + } + } + + for my $sid (keys $rule->{resources}->%*) { + $node_affinity->{$sid} = { + nodes => $effective_nodes, + }; + } + }, + type => 'node-affinity', + exclude_disabled_rules => 1, + ); + + return $node_affinity; +} + =head1 NODE AFFINITY RULE CHECKERS =cut @@ -217,30 +251,10 @@ __PACKAGE__->register_check( =cut -my $get_resource_node_affinity_rule = sub { - my ($rules, $sid) = @_; - - # with the current restriction a resource can only be in one node affinity rule - my $node_affinity_rule; - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule) = @_; - - $node_affinity_rule = dclone($rule) if !$node_affinity_rule; - }, - sid => $sid, - type => 'node-affinity', - exclude_disabled_rules => 1, - ); - - return $node_affinity_rule; -}; - -=head3 get_node_affinity($rules, $sid, $online_nodes) +=head3 get_node_affinity($node_affinity, $sid, $online_nodes) Returns a list of two hashes representing the node affinity of C<$sid> -according to the node affinity rules in C<$rules> and the available nodes in +according to the node affinity C<$node_affinity> and the available nodes in the C<$online_nodes> hash. The first hash is a hash set of available nodes, i.e. nodes where the @@ -252,30 +266,14 @@ If there are no available nodes at all, returns C<undef>. =cut sub get_node_affinity : prototype($$$) { - my ($rules, $sid, $online_nodes) = @_; + my ($node_affinity, $sid, $online_nodes) = @_; - my $node_affinity_rule = $get_resource_node_affinity_rule->($rules, $sid); - - # default to a node affinity rule with all available nodes - if (!$node_affinity_rule) { - for my $node (keys %$online_nodes) { - $node_affinity_rule->{nodes}->{$node} = { priority => 0 }; - } - } - - # add remaining nodes with low priority for non-strict node affinity rules - if (!$node_affinity_rule->{strict}) { - for my $node (keys %$online_nodes) { - next if defined($node_affinity_rule->{nodes}->{$node}); - - $node_affinity_rule->{nodes}->{$node} = { priority => -1 }; - } - } + return ($online_nodes, $online_nodes) if !defined($node_affinity->{$sid}); my $allowed_nodes = {}; my $prioritized_nodes = {}; - while (my ($node, $props) = each %{ $node_affinity_rule->{nodes} }) { + while (my ($node, $props) = each $node_affinity->{$sid}->{nodes}->%*) { next if !defined($online_nodes->{$node}); # node is offline $allowed_nodes->{$node} = 1; diff --git a/src/PVE/HA/Rules/ResourceAffinity.pm b/src/PVE/HA/Rules/ResourceAffinity.pm index 4ef49f2c..91be21cd 100644 --- a/src/PVE/HA/Rules/ResourceAffinity.pm +++ b/src/PVE/HA/Rules/ResourceAffinity.pm @@ -101,6 +101,35 @@ sub get_plugin_check_arguments { return $result; } +sub plugin_compile { + my ($class, $rules, $nodes) = @_; + + my $positive = {}; + my $negative = {}; + + PVE::HA::Rules::foreach_rule( + $rules, + sub { + my ($rule, $ruleid) = @_; + + my $affinity_set = $rule->{affinity} eq 'positive' ? $positive : $negative; + + for my $sid (keys $rule->{resources}->%*) { + for my $csid (keys $rule->{resources}->%*) { + $affinity_set->{$sid}->{$csid} = 1 if $csid ne $sid; + } + } + }, + type => 'resource-affinity', + exclude_disabled_rules => 1, + ); + + return { + positive => $positive, + negative => $negative, + }; +} + =head1 RESOURCE AFFINITY RULE CHECKERS =cut @@ -404,12 +433,12 @@ __PACKAGE__->register_transform(sub { =cut -=head3 get_affinitive_resources($rules, $sid) +=head3 get_affinitive_resources($resource_affinity, $sid) Returns a list of two hash sets, where the first hash set contains the resources, which C<$sid> is positively affinitive to, and the second hash contains the resources, which C<$sid> is negatively affinitive to, acording to -the resource affinity rules in C<$rules>. +the resource's resource affinity in C<$resource_affinity>. Note that a resource C<$sid> becomes part of any negative affinity relation of its positively affinitive resources. @@ -430,36 +459,20 @@ affinitive to C<'ct:200'> and C<'ct:201'>, the returned value will be: =cut sub get_affinitive_resources : prototype($$) { - my ($rules, $sid) = @_; + my ($resource_affinity, $sid) = @_; - my $together = {}; - my $separate = {}; - - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule, $ruleid) = @_; - - my $affinity_set = $rule->{affinity} eq 'positive' ? $together : $separate; - - for my $csid (sort keys %{ $rule->{resources} }) { - $affinity_set->{$csid} = 1 if $csid ne $sid; - } - }, - sid => $sid, - type => 'resource-affinity', - exclude_disabled_rules => 1, - ); + my $together = $resource_affinity->{positive}->{$sid} // {}; + my $separate = $resource_affinity->{negative}->{$sid} // {}; return ($together, $separate); } -=head3 get_resource_affinity($rules, $sid, $ss, $online_nodes) +=head3 get_resource_affinity($resource_affinity, $sid, $ss, $online_nodes) Returns a list of two hashes, where the first describes the positive resource affinity and the second hash describes the negative resource affinity for -resource C<$sid> according to the resource affinity rules in C<$rules>, the -service status C<$ss> and the C<$online_nodes> hash. +resource C<$sid> according to the resource's resource affinity rules in +C<$resource_affinity>, the service status C<$ss> and the C<$online_nodes> hash. For the positive resource affinity of a resource C<$sid>, each element in the hash represents an online node, where other resources, which C<$sid> is in @@ -488,39 +501,33 @@ resource C<$sid> is in a negative affinity with, the returned value will be: =cut sub get_resource_affinity : prototype($$$$) { - my ($rules, $sid, $ss, $online_nodes) = @_; + my ($resource_affinity, $sid, $ss, $online_nodes) = @_; my $together = {}; my $separate = {}; - PVE::HA::Rules::foreach_rule( - $rules, - sub { - my ($rule) = @_; + my ($positive, $negative) = get_affinitive_resources($resource_affinity, $sid); - for my $csid (keys %{ $rule->{resources} }) { - next if $csid eq $sid; - next if !defined($ss->{$csid}); + my $get_used_service_nodes = sub { + my ($sid) = @_; + return (undef, undef) if !defined($ss->{$sid}); + my ($state, $node, $target) = $ss->{$sid}->@{qw(state node target)}; + return PVE::HA::Usage::get_used_service_nodes($online_nodes, $state, $node, $target); + }; - my ($state, $node, $target) = $ss->{$csid}->@{qw(state node target)}; - my ($current_node, $target_node) = - PVE::HA::Usage::get_used_service_nodes($online_nodes, $state, $node, $target); + for my $csid (keys $positive->%*) { + my ($current_node, $target_node) = $get_used_service_nodes->($csid); - if ($rule->{affinity} eq 'positive') { - $together->{$current_node}++ if defined($current_node); - $together->{$target_node}++ if defined($target_node); - } elsif ($rule->{affinity} eq 'negative') { - $separate->{$current_node} = 1 if defined($current_node); - $separate->{$target_node} = 1 if defined($target_node); - } else { - die "unimplemented resource affinity type $rule->{affinity}\n"; - } - } - }, - sid => $sid, - type => 'resource-affinity', - exclude_disabled_rules => 1, - ); + $together->{$current_node}++ if defined($current_node); + $together->{$target_node}++ if defined($target_node); + } + + for my $csid (keys $negative->%*) { + my ($current_node, $target_node) = $get_used_service_nodes->($csid); + + $separate->{$current_node} = 1 if defined($current_node); + $separate->{$target_node} = 1 if defined($target_node); + } return ($together, $separate); } diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl index 495d4b4b..1d336be0 100755 --- a/src/test/test_failover1.pl +++ b/src/test/test_failover1.pl @@ -21,11 +21,13 @@ node-affinity: prefer_node1 nodes node1 EOD +my $nodes = ['node1', 'node2', 'node3']; + +my $compiled_rules = PVE::HA::Rules->compile($rules, $nodes); + # Relies on the fact that the basic plugin doesn't use the haenv. my $online_node_usage = PVE::HA::Usage::Basic->new(); -$online_node_usage->add_node("node1"); -$online_node_usage->add_node("node2"); -$online_node_usage->add_node("node3"); +$online_node_usage->add_node($_) for @$nodes; my $service_conf = { node => 'node1', @@ -46,7 +48,12 @@ sub test { my $select_node_preference = $try_next ? 'try-next' : 'none'; my $node = PVE::HA::Manager::select_service_node( - $rules, $online_node_usage, "$sid", $service_conf, $ss, $select_node_preference, + $compiled_rules, + $online_node_usage, + $sid, + $service_conf, + $ss, + $select_node_preference, ); my (undef, undef, $line) = caller(); -- 2.47.3 _______________________________________________ pve-devel mailing list [email protected] https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
