In order to allow handlers directly read upcalls from datapath, we need to support per-handler netlink socket for each vport in datapath. This commit makes this happen. Also, it is guaranteed that the newer datapath is compatible with the branch-2.1 userspace implementation.
Signed-off-by: Alex Wang <al...@nicira.com> --- major changes since RFC: - guarantee the compatibility with branch-2.1 userspace. - use rcu to protect upcall port_id array multi-access. - use skb_get_rxhash() to select the port id to send upcall. --- datapath/datapath.c | 22 +++++---- datapath/vport.c | 107 ++++++++++++++++++++++++++++++++++++++++++- datapath/vport.h | 25 ++++++++-- include/linux/openvswitch.h | 10 ++-- lib/dpif-linux.c | 8 ++-- 5 files changed, 150 insertions(+), 22 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index f7c3391..de4b97a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -242,7 +242,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.portid = p->upcall_portid; + upcall.portid = ovs_vport_find_portid(p, skb); ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -1241,7 +1241,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; ovs_dp_change(dp, a); @@ -1459,7 +1459,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PIDS] = { .type = NLA_UNSPEC }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, }; @@ -1494,8 +1494,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || - nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) + nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport))) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1503,6 +1502,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, &vport_stats)) goto nla_put_failure; + if (ovs_vport_get_upcall_portids(vport, skb)) + goto nla_put_failure; + err = ovs_vport_get_options(vport, skb); if (err == -EMSGSIZE) goto error; @@ -1580,7 +1582,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || - !a[OVS_VPORT_ATTR_UPCALL_PID]) + !a[OVS_VPORT_ATTR_UPCALL_PIDS]) goto exit; ovs_lock(); @@ -1617,7 +1619,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PIDS]; vport = new_vport(&parms); err = PTR_ERR(vport); @@ -1678,8 +1680,10 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (a[OVS_VPORT_ATTR_STATS]) ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS])); - if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + err = ovs_vport_set_upcall_portids(vport, + a[OVS_VPORT_ATTR_UPCALL_PIDS]); + if (err) + goto exit_free; err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW); diff --git a/datapath/vport.c b/datapath/vport.c index 7f12acc..391e6f9 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -135,10 +135,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); + if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) + return ERR_PTR(-EINVAL); + vport->percpu_stats = alloc_percpu(struct pcpu_tstats); if (!vport->percpu_stats) { kfree(vport); @@ -162,8 +164,13 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, */ void ovs_vport_free(struct vport *vport) { + struct nlattr *a = kzalloc(sizeof *a + sizeof(u32), GFP_KERNEL); + + a->nla_len = sizeof(u32); + ovs_vport_set_upcall_portids(vport, a); free_percpu(vport->percpu_stats); kfree(vport); + kfree(a); } /** @@ -348,6 +355,104 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) return 0; } +static void vport_portids_destroy_rcu_cb(struct rcu_head *rcu) +{ + struct vport_portids *ids = container_of(rcu, struct vport_portids, + rcu); + + kfree(ids); +} + +/** + * ovs_vport_set_upcall_portids - set upcall portids of @vport. + * + * @vport: vport to modify. + * @ids: new configuration, an array of port ids. + * + * Sets the vport's upcall_portids to @ids. + * + * Returns 0 if successful, -EINVAL if @ids is NULL or cannot be parsed as + * an array of U32. + * + * Must be called with rcu_read_lock. + */ +int ovs_vport_set_upcall_portids(struct vport *vport, struct nlattr *ids) +{ + struct vport_portids *old, *vport_portids; + + if (nla_len(ids) % sizeof(u32)) + return -EINVAL; + + old = ovsl_dereference(vport->upcall_portids); + + vport_portids = kmalloc(sizeof *vport_portids + nla_len(ids), + GFP_KERNEL); + vport_portids->ids = (void *) vport_portids + sizeof *vport_portids; + vport_portids->n_ids = nla_len(ids) / sizeof(u32); + memcpy(vport_portids->ids, nla_data(ids), nla_len(ids)); + + rcu_assign_pointer(vport->upcall_portids, vport_portids); + + if (old) + call_rcu(&old->rcu, vport_portids_destroy_rcu_cb); + + return 0; +} + +/** + * ovs_vport_get_upcall_portids - get the upcall_portids of @vport. + * + * @vport: vport from which to retrieve the portids. + * @skb: sk_buff where portids should be appended. + * + * Retrieves the configuration of the given vport, appending the + * %OVS_VPORT_ATTR_UPCALL_PIDS attribute which is the array of upcall + * portids to @skb. + * + * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room. + * If an error occurs, @skb is left unmodified. Must be called with + * rcu_read_lock. + */ +int ovs_vport_get_upcall_portids(const struct vport *vport, + struct sk_buff *skb) +{ + struct vport_portids *ids; + int err = 0; + + ids = rcu_dereference_ovsl(vport->upcall_portids); + + if (nla_put(skb, OVS_VPORT_ATTR_UPCALL_PIDS, + ids->n_ids * sizeof *ids->ids, + (void *) ids->ids)) { + err = -EMSGSIZE; + } + + return err; +} + +/** + * ovs_vport_find_portid - find the upcall portid to send upcall. + * + * @vport: vport from which the missed packet is received. + * @skb: skb that the missed packet was received. + * + * Uses the skb_get_rxhash() to select the upcall portid to send the + * upcall. + * + * Returns the portid of the target socket. Must be called with rcu_read_lock. + */ +u32 ovs_vport_find_portid(const struct vport *p, struct sk_buff *skb) +{ + struct vport_portids *ids; + + ids = rcu_dereference_ovsl(p->upcall_portids); + + if (ids->n_ids == 1 && *ids->ids == 0) + return 0; + + return ids->ids[skb_get_rxhash(skb) % ids->n_ids]; +} + /** * ovs_vport_receive - pass up received packet to the datapath for processing * diff --git a/datapath/vport.h b/datapath/vport.h index 18b723e..799e7c3 100644 --- a/datapath/vport.h +++ b/datapath/vport.h @@ -50,6 +50,11 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); +int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids); +int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *); + +u32 ovs_vport_find_portid(const struct vport *, struct sk_buff *); + int ovs_vport_send(struct vport *, struct sk_buff *); /* The following definitions are for implementers of vport devices: */ @@ -60,13 +65,25 @@ struct vport_err_stats { u64 tx_dropped; u64 tx_errors; }; +/** + * struct vport_portids - array of netlink portids of a vport. + * must be protected by rcu. + * @rcu: RCU callback head for deferred destruction. + * @n_ids: Size of @ids array. + * @ids: Array storing the Netlink socket pids to use for packets received + * on this port that miss the flow table. + */ +struct vport_portids { + struct rcu_head rcu; + u32 n_ids; + u32 *ids; +}; /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. * @dp: Datapath to which this port belongs. - * @upcall_portid: The Netlink port to use for packets received on this port that - * miss the flow table. + * @upcall_portids: RCU protected 'struct vport_portids'. * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -80,7 +97,7 @@ struct vport_err_stats { struct vport { struct rcu_head rcu; struct datapath *dp; - u32 upcall_portid; + struct vport_portids __rcu *upcall_portids; u16 port_no; struct hlist_node hash_node; @@ -112,7 +129,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_portid; + struct nlattr *upcall_portids; }; /** diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index d1ff5ec..25adf5f 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -225,9 +225,10 @@ enum ovs_vport_type { * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes * plus a null terminator. * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. - * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that - * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on - * this port. A value of zero indicates that upcalls should not be sent. + * @OVS_VPORT_ATTR_UPCALL_PIDS: The array of Netlink socket pids in userspace + * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets + * received on this port. If this is a single-element array of value 0, + * upcalls should not be sent. * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for * packets sent or received through the vport. * @@ -251,7 +252,8 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ - OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ + OVS_VPORT_ATTR_UPCALL_PIDS, /* array of u32 Netlink socket PIDs for */ + /* receiving upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ __OVS_VPORT_ATTR_MAX }; diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 1c9869e..b6229d0 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1714,7 +1714,7 @@ dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_U32 }, + [OVS_VPORT_ATTR_UPCALL_PIDS] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats), .optional = true }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true }, @@ -1744,8 +1744,8 @@ dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]); vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]); vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]); - if (a[OVS_VPORT_ATTR_UPCALL_PID]) { - vport->upcall_pid = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]); + if (a[OVS_VPORT_ATTR_UPCALL_PIDS]) { + vport->upcall_pid = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PIDS]); } if (a[OVS_VPORT_ATTR_STATS]) { vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]); @@ -1784,7 +1784,7 @@ dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport, } if (vport->upcall_pid) { - nl_msg_put_u32(buf, OVS_VPORT_ATTR_UPCALL_PID, *vport->upcall_pid); + nl_msg_put_u32(buf, OVS_VPORT_ATTR_UPCALL_PIDS, *vport->upcall_pid); } if (vport->stats) { -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev