Is this the same as what you've been working on, Ethan? If not, is it complementary, or...? On Mar 10, 2014 6:39 PM, "Jarno Rajahalme" <jrajaha...@nicira.com> wrote:
> Intended to send this as an RFC, > > Jarno > > On Mar 10, 2014, at 6:43 PM, Jarno Rajahalme <jrajaha...@nicira.com> > wrote: > > > This should optimize port masks for megaflows for typical port usage > > in matches. Each subtable has it's own trie if the subtable matches > > any of the ports bits. This trie is consulted only after failing > > lookup to determine the number of bits that needs to be unwildcarded > > to guarantee that any packet that should match on any of the other > > rules will NOT match this megaflow. > > > > This version is not configurable and is always on. > > > > Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com> > > --- > > lib/classifier.c | 90 > +++++++++++++++++++++++++++++++++++++++++++++++---- > > lib/classifier.h | 2 ++ > > tests/classifier.at | 6 ++-- > > 3 files changed, 89 insertions(+), 9 deletions(-) > > > > diff --git a/lib/classifier.c b/lib/classifier.c > > index 30a91b7..c36ccf6 100644 > > --- a/lib/classifier.c > > +++ b/lib/classifier.c > > @@ -30,6 +30,10 @@ > > > > VLOG_DEFINE_THIS_MODULE(classifier); > > > > +/* Ports trie depends on both ports sharing the same ovs_be32. */ > > +#define TP_PORTS_OFS32 (offsetof(struct flow, tp_src) / 4) > > +BUILD_ASSERT_DECL((offsetof(struct flow, tp_dst) / 4) == > TP_PORTS_OFS32); > > + > > struct trie_ctx; > > static struct cls_subtable *find_subtable(const struct classifier *, > > const struct minimask *); > > @@ -71,10 +75,16 @@ static void trie_init(struct classifier *, int > trie_idx, > > const struct mf_field *); > > static unsigned int trie_lookup(const struct cls_trie *, const struct > flow *, > > unsigned int *checkbits); > > - > > +static unsigned int trie_lookup_value(const struct trie_node *, > > + const ovs_be32 value[], > > + unsigned int *checkbits); > > static void trie_destroy(struct trie_node *); > > static void trie_insert(struct cls_trie *, const struct cls_rule *, int > mlen); > > +static void trie_insert_prefix(struct trie_node **, const ovs_be32 > *prefix, > > + int mlen); > > static void trie_remove(struct cls_trie *, const struct cls_rule *, int > mlen); > > +static void trie_remove_prefix(struct trie_node **, const ovs_be32 > *prefix, > > + int mlen); > > static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t > be32ofs, > > unsigned int nbits); > > static bool mask_prefix_bits_set(const struct flow_wildcards *, > > @@ -389,6 +399,23 @@ classifier_replace(struct classifier *cls, struct > cls_rule *rule) > > trie_insert(&cls->tries[i], rule, > subtable->trie_plen[i]); > > } > > } > > + > > + /* Ports trie. */ > > + if (subtable->ports_mask_len) { > > + ovs_be32 ports, mask, masked_ports; > > + > > + /* We mask the value to be inserted to always have the > wildcarded > > + * bits in known (zero) state, so we can include them in > comparison > > + * and they will always match (== their original value does > not > > + * matter). */ > > + ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow, > > + TP_PORTS_OFS32); > > + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); > > + masked_ports = ports & mask; > > + > > + trie_insert_prefix(&subtable->ports_trie, &masked_ports, > > + subtable->ports_mask_len); > > + } > > } else { > > rule->partition = old_rule->partition; > > } > > @@ -421,6 +448,17 @@ classifier_remove(struct classifier *cls, struct > cls_rule *rule) > > > > subtable = find_subtable(cls, &rule->match.mask); > > > > + if (subtable->ports_mask_len) { > > + ovs_be32 ports, mask, masked_ports; > > + > > + ports = (OVS_FORCE ovs_be32)miniflow_get(&rule->match.flow, > > + TP_PORTS_OFS32); > > + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); > > + masked_ports = ports & mask; > > + > > + trie_remove_prefix(&subtable->ports_trie, > > + &masked_ports, subtable->ports_mask_len); > > + } > > for (i = 0; i < cls->n_tries; i++) { > > if (subtable->trie_plen[i]) { > > trie_remove(&cls->tries[i], rule, subtable->trie_plen[i]); > > @@ -859,6 +897,11 @@ insert_subtable(struct classifier *cls, const > struct minimask *mask) > > > cls->tries[i].field); > > } > > > > + /* Ports trie. */ > > + subtable->ports_trie = NULL; > > + subtable->ports_mask_len = 32 > > + - ctz32(ntohl((OVS_FORCE ovs_be32)minimask_get(mask, > TP_PORTS_OFS32))); > > + > > return subtable; > > } > > > > @@ -867,6 +910,9 @@ destroy_subtable(struct classifier *cls, struct > cls_subtable *subtable) > > { > > int i; > > > > + /* Port tries. */ > > + trie_destroy(subtable->ports_trie); > > + > > for (i = 0; i < subtable->n_indices; i++) { > > hindex_destroy(&subtable->indices[i]); > > } > > @@ -1121,6 +1167,23 @@ find_match_wc(const struct cls_subtable > *subtable, const struct flow *flow, > > * but it didn't match. */ > > rule = NULL; > > } > > + if (!rule && subtable->ports_mask_len) { > > + /* Ports are always part of the final range, if any. > > + * No match was found for the ports. Use the ports trie to > figure out > > + * which ports bits to unwildcard. */ > > + unsigned int mbits; > > + ovs_be32 value, mask; > > + > > + mask = minimask_get(&subtable->mask, TP_PORTS_OFS32); > > + value = ((ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; > > + trie_lookup_value(subtable->ports_trie, &value, &mbits); > > + > > + ((ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= > > + mask & htonl(~0 << (32 - mbits)); > > + > > + ofs.start = TP_PORTS_OFS32; > > + goto range_out; > > + } > > out: > > /* Must unwildcard all the fields, as they were looked at. */ > > flow_wildcards_fold_minimask(wc, &subtable->mask); > > @@ -1513,14 +1576,18 @@ minimatch_get_prefix(const struct minimatch > *match, const struct mf_field *mf) > > static void > > trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) > > { > > - const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, > trie->field); > > + trie_insert_prefix(&trie->root, > > + minimatch_get_prefix(&rule->match, trie->field), > mlen); > > +} > > + > > +static void > > +trie_insert_prefix(struct trie_node **edge, const ovs_be32 *prefix, int > mlen) > > +{ > > struct trie_node *node; > > - struct trie_node **edge; > > int ofs = 0; > > > > /* Walk the tree. */ > > - for (edge = &trie->root; > > - (node = *edge) != NULL; > > + for (; (node = *edge) != NULL; > > edge = trie_next_edge(node, prefix, ofs)) { > > unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, > mlen); > > ofs += eqbits; > > @@ -1561,16 +1628,25 @@ trie_insert(struct cls_trie *trie, const struct > cls_rule *rule, int mlen) > > static void > > trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) > > { > > - const ovs_be32 *prefix = minimatch_get_prefix(&rule->match, > trie->field); > > + trie_remove_prefix(&trie->root, > > + minimatch_get_prefix(&rule->match, trie->field), > mlen); > > +} > > + > > +/* 'mlen' must be the (non-zero) CIDR prefix length of the > 'trie->field' mask > > + * in 'rule'. */ > > +static void > > +trie_remove_prefix(struct trie_node **root, const ovs_be32 *prefix, int > mlen) > > +{ > > struct trie_node *node; > > struct trie_node **edges[sizeof(union mf_value) * 8]; > > int depth = 0, ofs = 0; > > > > /* Walk the tree. */ > > - for (edges[depth] = &trie->root; > > + for (edges[0] = root; > > (node = *edges[depth]) != NULL; > > edges[++depth] = trie_next_edge(node, prefix, ofs)) { > > unsigned int eqbits = trie_prefix_equal_bits(node, prefix, ofs, > mlen); > > + > > if (eqbits < node->nbits) { > > /* Mismatch, nothing to be removed. This should never > happen, as > > * only rules in the classifier are ever removed. */ > > diff --git a/lib/classifier.h b/lib/classifier.h > > index c3c1c3b..a1f4e4a 100644 > > --- a/lib/classifier.h > > +++ b/lib/classifier.h > > @@ -276,6 +276,8 @@ struct cls_subtable { > > uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. > */ > > struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ > > unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in > 'mask'. */ > > + struct trie_node *ports_trie; /* NULL if none. */ > > + int ports_mask_len; > > }; > > > > /* Returns true if 'table' is a "catch-all" subtable that will match > every > > diff --git a/tests/classifier.at b/tests/classifier.at > > index a46c526..96fb29b 100644 > > --- a/tests/classifier.at > > +++ b/tests/classifier.at > > @@ -55,7 +55,7 @@ Datapath actions: drop > > ]) > > AT_CHECK([ovs-appctl ofproto/trace br0 > 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], > [0], [stdout]) > > AT_CHECK([tail -2 stdout], [0], > > - [Megaflow: > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=79 > > + [Megaflow: > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_dst=0x40/0xfff0 > > Datapath actions: 2 > > ]) > > OVS_VSWITCHD_STOP > > @@ -78,6 +78,8 @@ AT_DATA([flows.txt], [dnl > > table=0 in_port=1 priority=16,tcp,nw_dst= > 10.1.0.0/255.255.0.0,action=output(3) > > table=0 in_port=1 priority=32,tcp,nw_dst= > 10.1.2.0/255.255.255.0,tp_src=79,action=output(2) > > table=0 in_port=1 priority=33,tcp,nw_dst=10.1.2.15,tp_dst=80,action=drop > > +table=0 in_port=1 > priority=33,tcp,nw_dst=10.1.2.15,tp_dst=8080,action=output(2) > > +table=0 in_port=1 > priority=33,tcp,nw_dst=10.1.2.15,tp_dst=192,action=output(2) > > table=0 in_port=1 priority=0,ip,action=drop > > table=0 in_port=2 priority=16,tcp,nw_dst= > 192.168.0.0/255.255.0.0,action=output(1) > > table=0 in_port=2 priority=0,ip,action=drop > > @@ -102,7 +104,7 @@ Datapath actions: drop > > ]) > > AT_CHECK([ovs-appctl ofproto/trace br0 > 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=10.1.2.15,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], > [0], [stdout]) > > AT_CHECK([tail -2 stdout], [0], > > - [Megaflow: > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=8,tp_dst=79 > > + [Megaflow: > skb_priority=0,tcp,in_port=1,nw_dst=10.1.2.15,nw_frag=no,tp_src=0x0/0xffc0,tp_dst=0x40/0xfff0 > > Datapath actions: 3 > > ]) > > OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) > > -- > > 1.7.10.4 > > > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev >
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev