On Fri, Sep 23, 2011 at 2:38 PM, Ben Pfaff <[email protected]> wrote:
> On Fri, Sep 23, 2011 at 02:20:13PM -0700, Jesse Gross wrote:
>> Currently we publish several multicast groups for upcalls and let
>> userspace sockets subscribe to them. The benefit of this is mostly
>> that userspace is the one doing the subscription - the actual
>> multicast capability is not currently used and probably wouldn't be
>> even if we moved to a multiprocess model. Despite the convenience,
>> multicast sockets have a number of disadvantages, primarily that
>> we only have a limited number of them so there could be collisions.
>> In addition, unicast sockets give additional flexibility to userspace
>> by allowing every object to potentially have a different socket
>> chosen by userspace for upcalls. Finally, any future optimizations
>> for upcalls to reduce copying will likely not be compatible with
>> multicast anyways so disallowing it potentially simplifies things.
>>
>> We also never unregistered the multicast groups registered for upcalls
>> and leaked them on module unload. As a side effect, this solves that
>> problem.
>>
>> Signed-off-by: Jesse Gross <[email protected]>
>
> This seems reasonable, thank you.
>
> The comments in datapath-protocol.h don't mention pid == 0 but maybe
> they should.
>
> We could optionally skip segmenting GSO skbs, saving some CPU, in the
> pid == 0 case.
>
> In dpif_linux_recv_set_mask(), I would rate-limit the new log messages.
These are all good points. I made these changes plus avoiding
querying the actions and also a bug fix where we didn't actually set
the flow key. Here's the incremental:
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 31a1c18..4d40ac3 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL(dp_ioctl_hook);
static LIST_HEAD(dps);
static struct vport *new_vport(const struct vport_parms *);
-static int queue_userspace_packets(struct datapath *, struct sk_buff *,
+static int queue_userspace_packets(struct datapath *, u32 pid, struct
sk_buff *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
@@ -364,8 +364,20 @@ static struct genl_family dp_packet_genl_family = {
int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct
dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
+ u32 pid;
int err;
+ if (OVS_CB(skb)->flow)
+ pid = OVS_CB(skb)->flow->upcall_pid;
+ else
+ pid = OVS_CB(skb)->vport->upcall_pid;
+
+ if (pid == 0) {
+ err = -ENOTCONN;
+ kfree_skb(skb);
+ goto err;
+ }
+
forward_ip_summed(skb, true);
/* Break apart GSO packets into their component pieces. Otherwise
@@ -382,7 +394,7 @@ int dp_upcall(struct datapath *dp, struct sk_buff
*skb, const struct dp_upcall_i
skb = nskb;
}
- err = queue_userspace_packets(dp, skb, upcall_info);
+ err = queue_userspace_packets(dp, pid, skb, upcall_info);
if (err)
goto err;
@@ -405,12 +417,12 @@ err:
* 'upcall_info'. There will be only one packet unless we broke up a GSO
* packet.
*/
-static int queue_userspace_packets(struct datapath *dp, struct sk_buff *skb,
- const struct dp_upcall_info *upcall_info)
+static int queue_userspace_packets(struct datapath *dp, u32 pid,
+ struct sk_buff *skb,
+ const struct dp_upcall_info *upcall_info)
{
int dp_ifindex;
struct sk_buff *nskb;
- u32 pid;
int err;
dp_ifindex = get_dpifindex(dp);
@@ -420,17 +432,6 @@ static int queue_userspace_packets(struct
datapath *dp, struct sk_buff *skb,
goto err_kfree_skbs;
}
- if (OVS_CB(skb)->flow)
- pid = OVS_CB(skb)->flow->upcall_pid;
- else
- pid = OVS_CB(skb)->vport->upcall_pid;
-
- if (pid == 0) {
- err = -ENOTCONN;
- nskb = skb->next;
- goto err_kfree_skbs;
- }
-
do {
struct ovs_header *upcall;
struct sk_buff *user_skb; /* to be queued to userspace */
diff --git a/include/openvswitch/datapath-protocol.h
b/include/openvswitch/datapath-protocol.h
index 98316ae..c48426f 100644
--- a/include/openvswitch/datapath-protocol.h
+++ b/include/openvswitch/datapath-protocol.h
@@ -87,7 +87,8 @@ struct ovs_header {
* dp_ifindex in other requests (with a dp_ifindex of 0).
* @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
* set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on
- * %OVS_DP_CMD_NEW requests.
+ * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
+ * not be sent.
* @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
* datapath. Always present in notifications.
* @OVS_DP_ATTR_IPV4_FRAGS: One of %OVS_DP_FRAG_*. Always present in
@@ -176,7 +177,7 @@ enum ovs_packet_cmd {
* @OVS_PACKET_ATTR_UPCALL_PID: Optionally present for OVS_PACKET_CMD_EXECUTE.
* The Netlink socket in userspace that OVS_PACKET_CMD_USERSPACE and
* OVS_PACKET_CMD_SAMPLE upcalls will be directed to for actions triggered by
- * this packet.
+ * this packet. A value of zero indicates that upcalls should not be sent.
* @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
* notification if the %OVS_ACTION_ATTR_USERSPACE, action's argument was
* nonzero.
@@ -235,7 +236,7 @@ enum ovs_vport_cmd {
* plus a null terminator.
* @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
* OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
- * this port.
+ * this port. A value of zero indicates that upcalls should not be sent.
* @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
* packets sent or received through the vport.
* @OVS_VPORT_ATTR_ADDRESS: A 6-byte Ethernet address for the vport.
@@ -389,7 +390,8 @@ struct ovs_key_nd {
* notifications. Required for %OVS_FLOW_CMD_NEW requests, optional
* @OVS_FLOW_ATTR_UPCALL_PID: The Netlink socket in userspace that
* OVS_PACKET_CMD_USERSPACE and OVS_PACKET_CMD_SAMPLE upcalls will be
- * directed to for packets received on this port.
+ * directed to for packets received on this port. A value of zero indicates
+ * that upcalls should not be sent.
* on %OVS_FLOW_CMD_SET request to change the existing actions, ignored for
* other requests.
* @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index ef8502f..b142f2b 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -862,9 +862,6 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int
listen_mask)
struct dpif_flow_dump flow_dump;
const struct nlattr *key;
size_t key_len;
- const struct nlattr *actions;
- size_t actions_len;
- const struct dpif_flow_stats *flow_stats;
error = nl_sock_create(NETLINK_GENERIC, &dpif->upcall_sock);
if (error) {
@@ -881,24 +878,26 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int
listen_mask)
vport_request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
error = dpif_linux_vport_transact(&vport_request, NULL, NULL);
if (error) {
- VLOG_WARN("%s: failed to set upcall pid on port: %s",
- dpif_name(dpif_), strerror(error));
+ VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on "
+ "port: %s", dpif_name(dpif_), strerror(error));
}
}
dpif_flow_dump_start(&flow_dump, dpif_);
while (dpif_flow_dump_next(&flow_dump, &key, &key_len,
- &actions, &actions_len, &flow_stats)) {
+ NULL, NULL, NULL)) {
struct dpif_linux_flow flow_request;
dpif_linux_flow_init(&flow_request);
flow_request.cmd = OVS_FLOW_CMD_SET;
flow_request.dp_ifindex = dpif->dp_ifindex;
+ flow_request.key = key;
+ flow_request.key_len = key_len;
flow_request.upcall_pid = nl_sock_pid(dpif->upcall_sock);
error = dpif_linux_flow_transact(&flow_request, NULL, NULL);
if (error) {
- VLOG_WARN("%s: failed to set upcall pid on flow: %s",
- dpif_name(dpif_), strerror(error));
+ VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on "
+ "flow: %s", dpif_name(dpif_), strerror(error));
}
}
dpif_flow_dump_done(&flow_dump);
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev