Firstly this patch introduces the notion of reserved priority, as the
filter implementing ingress policing would require the highest priority.
Secondly it allows setting rate limiters while tc-offloads has been
enabled. Lastly it installs a matchall filter that matches all traffic
and then applies a police action, when configuring an ingress rate
limiter.
An example of what to expect:
OvS CLI:
ovs-vsctl set interface <netdev_name> ingress_policing_rate=5000
ovs-vsctl set interface <netdev_name> ingress_policing_burst=100
Resulting TC filter:
filter protocol ip pref 1 matchall chain 0
filter protocol ip pref 1 matchall chain 0 handle 0x1
not_in_hw
action order 1: police 0x1 rate 5Mbit burst 125Kb mtu 64Kb
action drop/continue overhead 0b
ref 1 bind 1 installed 3 sec used 3 sec
Action statistics:
Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
backlog 0b 0p requeues 0
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
10.0.0.200 () port 0 AF_INET : demo
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
131072 16384 16384 60.13 4.49
ovs-vsctl list interface <netdev_name>
_uuid : 2ca774e8-8b95-430f-a2c2-f8f742613ab1
admin_state : up
...
ingress_policing_burst: 100
ingress_policing_rate: 5000
...
type : ""
Signed-off-by: Pieter Jansen van Vuuren <[email protected]>
Reviewed-by: Simon Horman <[email protected]>
---
include/linux/pkt_cls.h | 12 ++++
lib/netdev-linux.c | 131 +++++++++++++++++++++++++++++++++++----
lib/netdev-tc-offloads.c | 2 +-
lib/tc.c | 4 ++
lib/tc.h | 7 +++
5 files changed, 144 insertions(+), 12 deletions(-)
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index 1384d71f9..4adea59e7 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -238,6 +238,18 @@ enum {
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
};
+/* Match-all classifier */
+
+enum {
+ TCA_MATCHALL_UNSPEC,
+ TCA_MATCHALL_CLASSID,
+ TCA_MATCHALL_ACT,
+ TCA_MATCHALL_FLAGS,
+ __TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
#endif /* __KERNEL__ || !HAVE_TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST */
#endif /* __LINUX_PKT_CLS_WRAPPER_H */
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 25d037cb6..92cfb229d 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -113,6 +113,10 @@ COVERAGE_DEFINE(netdev_set_ethtool);
#define TC_RTAB_SIZE 1024
#endif
+#ifndef TCM_IFINDEX_MAGIC_BLOCK
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+#endif
+
/* Linux 2.6.21 introduced struct tpacket_auxdata.
* Linux 2.6.27 added the tp_vlan_tci member.
* Linux 3.0 defined TP_STATUS_VLAN_VALID.
@@ -473,10 +477,10 @@ static int tc_delete_class(const struct netdev *,
unsigned int handle);
static int tc_del_qdisc(struct netdev *netdev);
static int tc_query_qdisc(const struct netdev *netdev);
+void
+tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate);
static int tc_calc_cell_log(unsigned int mtu);
static void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu);
-static void tc_put_rtab(struct ofpbuf *, uint16_t type,
- const struct tc_ratespec *rate);
static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes);
struct netdev_linux {
@@ -2324,6 +2328,109 @@ exit:
return error;
}
+static struct tc_police
+tc_matchall_fill_police(uint32_t kbits_rate, uint32_t kbits_burst)
+{
+ unsigned int bsize = MIN(UINT32_MAX / 1024, kbits_burst) * 1024 / 64;
+ unsigned int bps = ((uint64_t) kbits_rate * 1000) / 8;
+ struct tc_police police;
+ struct tc_ratespec rate;
+ int mtu = 65535;
+
+ memset(&rate, 0, sizeof rate);
+ rate.rate = bps;
+ rate.cell_log = tc_calc_cell_log(mtu);
+ rate.mpu = ETH_TOTAL_MIN;
+
+ memset(&police, 0, sizeof police);
+ police.burst = tc_bytes_to_ticks(bps, bsize);
+ police.action = TC_POLICE_SHOT;
+ police.rate = rate;
+ police.mtu = mtu;
+
+ return police;
+}
+
+static void
+nl_msg_put_act_police(struct ofpbuf *request, struct tc_police police)
+{
+ size_t offset;
+
+ nl_msg_put_string(request, TCA_ACT_KIND, "police");
+ offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS);
+ nl_msg_put_unspec(request, TCA_POLICE_TBF, &police, sizeof police);
+ tc_put_rtab(request, TCA_POLICE_RATE, &police.rate);
+ nl_msg_put_u32(request, TCA_POLICE_RESULT, TC_ACT_UNSPEC);
+ nl_msg_end_nested(request, offset);
+}
+
+static int
+tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
+ uint32_t kbits_burst)
+{
+ uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL);
+ size_t basic_offset, action_offset, inner_offset;
+ uint16_t prio = TC_RESERVED_PRIORITY_POLICE;
+ int ifindex, index, err = 0;
+ struct tc_police pol_act;
+ uint32_t block_id = 0;
+ struct ofpbuf request;
+ struct ofpbuf *reply;
+ struct tcmsg *tcmsg;
+ uint32_t handle = 1;
+
+ err = get_ifindex(netdev, &ifindex);
+ if (err) {
+ return err;
+ }
+
+ index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
+ tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
+ &request);
+ tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+ tcmsg->tcm_info = tc_make_handle(prio, eth_type);
+ tcmsg->tcm_handle = handle;
+
+ pol_act = tc_matchall_fill_police(kbits_rate, kbits_burst);
+ nl_msg_put_string(&request, TCA_KIND, "matchall");
+ basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS);
+ action_offset = nl_msg_start_nested(&request, TCA_MATCHALL_ACT);
+ inner_offset = nl_msg_start_nested(&request, 1);
+ nl_msg_put_act_police(&request, pol_act);
+ nl_msg_end_nested(&request, inner_offset);
+ nl_msg_end_nested(&request, action_offset);
+ nl_msg_end_nested(&request, basic_offset);
+
+ err = tc_transact(&request, &reply);
+ if (!err) {
+ struct tcmsg *tc =
+ ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
+ ofpbuf_delete(reply);
+ }
+
+ return err;
+}
+
+static int
+tc_del_matchall_policer(struct netdev *netdev)
+{
+ uint32_t block_id = 0;
+ int ifindex;
+ int err;
+
+ err = get_ifindex(netdev, &ifindex);
+ if (err) {
+ return err;
+ }
+
+ err = tc_del_filter(ifindex, TC_RESERVED_PRIORITY_POLICE, 1, block_id);
+ if (err) {
+ return err;
+ }
+
+ return 0;
+}
+
/* Attempts to set input rate limiting (policing) policy. Returns 0 if
* successful, otherwise a positive errno value. */
static int
@@ -2335,14 +2442,6 @@ netdev_linux_set_policing(struct netdev *netdev_,
int ifindex;
int error;
- if (netdev_is_flow_api_enabled()) {
- if (kbits_rate) {
- VLOG_WARN_RL(&rl, "%s: policing with offload isn't supported",
- netdev_name);
- }
- return EOPNOTSUPP;
- }
-
kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */
: !kbits_burst ? 8000 /* Default to 8000 kbits if 0. */
: kbits_burst); /* Stick with user-specified value. */
@@ -2368,6 +2467,16 @@ netdev_linux_set_policing(struct netdev *netdev_,
goto out;
}
+ /* Use matchall for policing when offloadling ovs with tc-flower. */
+ if (netdev_is_flow_api_enabled()) {
+ error = tc_del_matchall_policer(netdev_);
+ if (kbits_rate) {
+ error = tc_add_matchall_policer(netdev_, kbits_rate, kbits_burst);
+ }
+ ovs_mutex_unlock(&netdev->mutex);
+ return error;
+ }
+
COVERAGE_INC(netdev_set_policing);
/* Remove any existing ingress qdisc. */
error = tc_add_del_ingress_qdisc(ifindex, false, 0);
@@ -5481,7 +5590,7 @@ tc_fill_rate(struct tc_ratespec *rate, uint64_t Bps, int
mtu)
* attribute of the specified "type".
*
* See tc_calc_cell_log() above for a description of "rtab"s. */
-static void
+void
tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate)
{
uint32_t *rtab;
diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 73ce7b952..cef47d1f6 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -278,7 +278,7 @@ get_prio_for_tc_flower(struct tc_flower *flower)
{
static struct hmap prios = HMAP_INITIALIZER(&prios);
static struct ovs_mutex prios_lock = OVS_MUTEX_INITIALIZER;
- static uint16_t last_prio = 0;
+ static uint16_t last_prio = TC_RESERVED_PRIORITY_MAX;
size_t key_len = sizeof(struct tc_flower_key);
size_t hash = hash_int((OVS_FORCE uint32_t) flower->key.eth_type, 0);
struct prio_map_data *data;
diff --git a/lib/tc.c b/lib/tc.c
index b19f075f2..d31b9d3e4 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1389,6 +1389,10 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct
tc_flower *flower)
flower->mask.eth_type = OVS_BE16_MAX;
flower->prio = tc_get_major(tc->tcm_info);
+ if (flower->prio == TC_RESERVED_PRIORITY_POLICE) {
+ return 0;
+ }
+
if (!flower->handle) {
return EAGAIN;
}
diff --git a/lib/tc.h b/lib/tc.h
index 7196a32d7..dfb482f02 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -41,6 +41,13 @@
#define TC_POLICY_DEFAULT "none"
+enum tc_flower_reserved_prio {
+ TC_RESERVED_PRIORITY_NONE,
+ TC_RESERVED_PRIORITY_POLICE,
+ __TC_RESERVED_PRIORITY_MAX
+};
+#define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)
+
/* Returns tc handle 'major':'minor'. */
static inline unsigned int
tc_make_handle(unsigned int major, unsigned int minor)
--
2.17.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev