On Mon, Oct 28, 2019 at 1:46 PM Mark Michelson <mmich...@redhat.com> wrote: > > On 10/28/19 7:46 AM, Dumitru Ceara wrote: > > On Fri, Oct 25, 2019 at 11:07 PM Mark Michelson <mmich...@redhat.com> wrote: > >> > >> There is a maximum number of resubmits that can occur during packet > >> processing. Deliberately creating a flow table that might cross this > >> threshold is irresponsible. > >> > >> This commit causes the ofctrl code to track the maximum width and depth > >> of conjunctions in the desired flow table. By multiplying them, we have > >> a possible worst case for number of resubmits required. If this number > >> exceeds a quarter of the maximum resubmits allowed, then we fall back to > >> forcing crossproducts. > >> > >> The resulting flow table can end up being a mixture of conjunction and > >> non-conjunction flows if the limit is exceeded. > >> > >> Signed-off-by: Mark Michelson <mmich...@redhat.com> > > > > Hi Mark, > > > > I've been testing the series on a setup with: > > - 100 logical routers connected to a logical gateway router > > - 100 logical switches (each connected to a logical router) > > - 200 VIFs (2 per logical switch) > > - 2 NAT rules on each router > > > > I bound all VIFs to OVS internal interfaces on the machine when > > ovn-northd is running. > > > > If I start ovn-controller on the same node, without your series I see > > ovn-controller taking more than 100s for a single flow computation > > run. > > > > If I apply your series, the maximum duration of a flow computation run > > in the same scenario drops to ~4s. > > > > This is really nice, however, I see some issues with flow removal (see > > below). > > Thanks for the test, Dumitru. > > > > >> --- > >> controller/lflow.c | 11 +++++++++ > >> controller/ofctrl.c | 69 > >> +++++++++++++++++++++++++++++++++++++++++++++++++++++ > >> controller/ofctrl.h | 6 +++++ > >> include/ovn/expr.h | 2 ++ > >> lib/expr.c | 6 +++++ > >> 5 files changed, 94 insertions(+) > >> > >> diff --git a/controller/lflow.c b/controller/lflow.c > >> index 34b7c36a6..318066465 100644 > >> --- a/controller/lflow.c > >> +++ b/controller/lflow.c > >> @@ -37,6 +37,13 @@ > >> VLOG_DEFINE_THIS_MODULE(lflow); > >> > >> COVERAGE_DEFINE(lflow_run); > >> + > >> +/* Limit maximum conjunctions to a quarter of the max > >> + * resubmits. Unfortunatley, max resubmits is not publicly > >> + * defined in a header file, so if max resubmits is changed > >> + * from 4096, we have to make sure to update this as well > >> + */ > >> +#define MAX_CONJUNCTIONS (4096 / 4) > >> > >> /* Symbol table. */ > >> > >> @@ -602,6 +609,10 @@ consider_logical_flow( > >> struct expr *prereqs; > >> char *error; > >> > >> + uint32_t conj_worst_case; > >> + conj_worst_case = flow_table->max_conj_width * > >> flow_table->max_conj_depth; > >> + expr_set_force_crossproduct(conj_worst_case >= MAX_CONJUNCTIONS); > >> + > >> error = ovnacts_parse_string(lflow->actions, &pp, &ovnacts, > >> &prereqs); > >> if (error) { > >> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > >> diff --git a/controller/ofctrl.c b/controller/ofctrl.c > >> index afb0036f1..2b2fde3c9 100644 > >> --- a/controller/ofctrl.c > >> +++ b/controller/ofctrl.c > >> @@ -686,6 +686,64 @@ ofctrl_check_and_add_flow(struct > >> ovn_desired_flow_table *flow_table, > >> f->uuid_hindex_node.hash); > >> } > >> > >> +static void > >> +get_conjunction_dimensions(const struct ovn_flow *f, uint32_t > >> *conj_width_p, > >> + uint32_t *conj_depth_p) > >> +{ > >> + struct ofpact *ofpact; > >> + uint32_t conj_width = 0; > >> + uint32_t conj_depth = 0; > >> + OFPACT_FOR_EACH (ofpact, f->ofpacts, f->ofpacts_len) { > >> + if (ofpact->type == OFPACT_CONJUNCTION) { > >> + struct ofpact_conjunction *conj = > >> ofpact_get_CONJUNCTION(ofpact); > >> + if (conj->n_clauses > conj_depth) { > >> + conj_depth = conj->n_clauses; > >> + } > >> + conj_width++; > >> + } > >> + } > >> + > >> + *conj_width_p = conj_width; > >> + *conj_depth_p = conj_depth; > >> +} > >> + > >> +static void > >> +adjust_conjunction_count(struct ovn_desired_flow_table *desired_flows, > >> + const struct ovn_flow *f, bool add) > >> +{ > >> + uint32_t conj_width; > >> + uint32_t conj_depth; > >> + > >> + get_conjunction_dimensions(f, &conj_width, &conj_depth); > >> + > >> + if (add) { > >> + if (conj_width > desired_flows->max_conj_width) { > >> + desired_flows->max_conj_width = conj_width; > >> + } > >> + if (conj_depth > desired_flows->max_conj_depth) { > >> + desired_flows->max_conj_depth = conj_depth; > >> + } > >> + } else if (desired_flows->max_conj_width <= conj_width || > >> + desired_flows->max_conj_depth <= conj_depth) { > >> + /* Removing either the widest or deepest conjunction. > >> + * Walk the table and recalculate maximums > >> + */ > >> + struct ovn_flow *iter; > >> + desired_flows->max_conj_width = 0; > >> + desired_flows->max_conj_depth = 0; > >> + HMAP_FOR_EACH (iter, match_hmap_node, > >> + &desired_flows->match_flow_table) { > >> + get_conjunction_dimensions(iter, &conj_width, &conj_depth); > >> + if (conj_width > desired_flows->max_conj_width) { > >> + desired_flows->max_conj_width = conj_width; > >> + } > >> + if (conj_depth > desired_flows->max_conj_depth) { > >> + desired_flows->max_conj_depth = conj_depth; > >> + } > >> + } > > > > I think this is quite expensive because we change single flow removal > > complexity from amortized O(1) to O(n). > > > > On the same setup as above if I continue testing and remove all > > logical switches from the NB database it will trigger all flows to be > > removed. > > > > In this scenario, with your series I see that two flow computation > > runs take up to 17s. > > On the other hand, if I do the deletion test without applying your > > third patch, flow computation runs never take longer than 5s. > > > > I agree that it's good to limit the number of conjunctive flows but I > > think we need to find a better way to track the max_conj_width/depth. > > I guess we could track max flow conj_width/depth by storing ovn_flows > > in a max-heap but that would change both flow add and remove > > operations' complexities to O(log(n)). > > > > Or do you think we can have a different heuristic for disabling > > conjunctive flows? > > Short version: we probably should just remove the limitation. > > Long version: > > My calculation method could stand to be improved. > > 1) The max_width * max_depth calculation assumes that the flow with the > max_width correlates with the conjunction with max depth. This may not > be the case. > 2) The max_width * max_depth calculation only accounts for one possible > conjunction's contribution to the resubmit count. If there are multiple > conjunctive match flows, we don't account for that. > 3) As you found, removing a flow requires an expensive traversal of the > desired flows table. There could be some ways to mitigate this, but no > matter what, the O(1) algorithm is toast. > 4) I more-or-less guessed what the upper bound should be before setting > force_crossproduct. > > So given all that, the proper things to do would be to either remove the > limitation checks or come up with a different calculation. > > When it comes to calculations, we can either perform a running > calculation (like I've done), or we can do a once-per-loop estimation > based on configuration.
An alternative would be to do the current check periodically instead of at every flow addition/deletion. On the long run that would keep complexity to O(1) as we just walk the whole hashtable one more time on a timer. The disadvantage is that we might need to reinstall the existing conjunctive flows once we determine that the max_depth/width was reached. > > The running calculation has the advantage of potentially being more > accurate. It also allows for us to enable force_crossproduct partway > through an ovn-controller loop. We can have some conjunctions, and some > crossproducts in the resulting flow table. The problem is that this > requires extra CPU time in order to keep track of the current > conjunction count. The amount can be better than what I've put forth, > but it's still going to be a downgrade from what it currently is. > > The one-time calculation might work by scanning the configured ACLs, > Address_Sets, and Port_Groups to make an educated guess at the worst > case for flow traversal. Unless we're willing to do some early parsing > of ACLs, this likely would be only a rough estimate for the worst case. > If we calculate that the number of potential conjunctions would exceed > our upper bound, then we likely would need to completely disable > conjunctions when calculating flows. > > The problem of the resubmit count is hypothetical. It's hard to know how > likely it is for real systems to exceed the max resubmit count. It's > also hard to estimate what the proper upper bound for conjunctions > should be. I think that it would take an extreme network hitting the > absolute worst cases to hit the resubmit limit. I think it's safe to > continue without the limitations. If we ever get reports about max > resubmit issues due to conjunctions, we can examine the individual > scenarios and try to formulate some good heuristics. This is also an option. And it's basically what OVN had before conjunctions were disabled. Thanks, Dumitru > > > > > Thanks, > > Dumitru > > > > > > >> + } > >> +} > >> + > >> /* Removes a bundles of flows from the flow table. */ > >> void > >> ofctrl_remove_flows(struct ovn_desired_flow_table *flow_table, > >> @@ -697,6 +755,7 @@ ofctrl_remove_flows(struct ovn_desired_flow_table > >> *flow_table, > >> &flow_table->uuid_flow_table) { > >> if (uuid_equals(&f->sb_uuid, sb_uuid)) { > >> ovn_flow_log(f, "ofctrl_remove_flow"); > >> + adjust_conjunction_count(flow_table, f, false); > >> hmap_remove(&flow_table->match_flow_table, > >> &f->match_hmap_node); > >> hindex_remove(&flow_table->uuid_flow_table, > >> &f->uuid_hindex_node); > >> @@ -747,6 +806,12 @@ ofctrl_add_or_append_flow(struct > >> ovn_desired_flow_table *desired_flows, > >> existing->ofpacts = xmemdup(compound.data, compound.size); > >> existing->ofpacts_len = compound.size; > >> > >> + /* It's a bit of a cheat that we only call > >> adjust_conjunction_count in > >> + * this function and not in ofctrl_add(). However, we know that > >> + * conjunctions are only ever added with this function > >> + */ > >> + adjust_conjunction_count(desired_flows, existing, true); > >> + > >> ovn_flow_destroy(f); > >> } else { > >> hmap_insert(&desired_flows->match_flow_table, > >> &f->match_hmap_node, > >> @@ -862,6 +927,8 @@ ovn_desired_flow_table_init(struct > >> ovn_desired_flow_table *flow_table) > >> { > >> hmap_init(&flow_table->match_flow_table); > >> hindex_init(&flow_table->uuid_flow_table); > >> + flow_table->max_conj_width = 0; > >> + flow_table->max_conj_depth = 0; > >> } > >> > >> void > >> @@ -874,6 +941,8 @@ ovn_desired_flow_table_clear(struct > >> ovn_desired_flow_table *flow_table) > >> hindex_remove(&flow_table->uuid_flow_table, > >> &f->uuid_hindex_node); > >> ovn_flow_destroy(f); > >> } > >> + flow_table->max_conj_width = 0; > >> + flow_table->max_conj_depth = 0; > >> } > >> > >> void > >> diff --git a/controller/ofctrl.h b/controller/ofctrl.h > >> index 21d2ce648..c295c04ed 100644 > >> --- a/controller/ofctrl.h > >> +++ b/controller/ofctrl.h > >> @@ -37,6 +37,12 @@ struct ovn_desired_flow_table { > >> > >> /* SB uuid index for the nodes in match_flow_table.*/ > >> struct hindex uuid_flow_table; > >> + > >> + /* Highest known number of conjunction() actions in a single flow */ > >> + uint32_t max_conj_width; > >> + > >> + /* Largest number of clauses for a conjunction in the table */ > >> + uint32_t max_conj_depth; > >> }; > >> > >> /* Interface for OVN main loop. */ > >> diff --git a/include/ovn/expr.h b/include/ovn/expr.h > >> index 22f633e57..714e21add 100644 > >> --- a/include/ovn/expr.h > >> +++ b/include/ovn/expr.h > >> @@ -425,6 +425,8 @@ bool expr_evaluate(const struct expr *, const struct > >> flow *uflow, > >> bool (*lookup_port)(const void *aux, const char > >> *port_name, > >> unsigned int *portp), > >> const void *aux); > >> + > >> +void expr_set_force_crossproduct(bool should_force); > >> > >> /* Converting expressions to OpenFlow flows. */ > >> > >> diff --git a/lib/expr.c b/lib/expr.c > >> index 71de61543..4c164caed 100644 > >> --- a/lib/expr.c > >> +++ b/lib/expr.c > >> @@ -3278,6 +3278,12 @@ expr_evaluate(const struct expr *e, const struct > >> flow *uflow, > >> OVS_NOT_REACHED(); > >> } > >> } > >> + > >> +void > >> +expr_set_force_crossproduct(bool should_force) > >> +{ > >> + force_crossproduct = should_force; > >> +} > >> > >> /* Action parsing helper. */ > >> > >> -- > >> 2.14.5 > >> > >> _______________________________________________ > >> dev mailing list > >> d...@openvswitch.org > >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev