Toke Høiland-Jørgensen <[email protected]> writes: > This commit adds configurable overhead compensation support to the rate > shaper. With this feature, userspace can configure the actual bottleneck > link overhead and encapsulation mode used, which will be used by the shaper > to calculate the precise duration of each packet on the wire. > > This feature is needed because CAKE is often deployed one or two hops > upstream of the actual bottleneck (which can be, e.g., inside a DSL or > cable modem). In this case, the link layer characteristics and overhead > reported by the kernel does not match the actual bottleneck. Being able to > set the actual values in use makes it possible to configure the shaper rate > much closer to the actual bottleneck rate (our experience shows it is > possible to get with 0.1% of the actual physical bottleneck rate), thus > keeping latency low without sacrificing bandwidth. > > The overhead compensation has three tunables: A fixed per-packet overhead > size (which, if set, will be accounted from the IP packet header), a > minimum packet size (MPU) and a framing mode supporting either ATM or PTM > framing. We include a set of common keywords in TC to help users configure > the right parameters. If no overhead value is set, the value reported by > the kernel is used. > > Signed-off-by: Toke Høiland-Jørgensen <[email protected]> > --- > net/sched/sch_cake.c | 123 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 122 insertions(+), 1 deletion(-) > > diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c > index ccc6f26b306c..6314a089a204 100644 > --- a/net/sched/sch_cake.c > +++ b/net/sched/sch_cake.c > @@ -275,6 +275,7 @@ enum { > > struct cobalt_skb_cb { > cobalt_time_t enqueue_time; > + u32 adjusted_len; > }; > > static cobalt_time_t cobalt_get_time(void) > @@ -1130,6 +1131,87 @@ static cobalt_time_t cake_ewma(cobalt_time_t avg, > cobalt_time_t sample, > return avg; > } > > +static u32 cake_overhead(struct cake_sched_data *q, struct sk_buff *skb) > +{ > + const struct skb_shared_info *shinfo = skb_shinfo(skb); > + u32 off = skb_network_offset(skb); > + u32 len = qdisc_pkt_len(skb); > + u16 segs = 1; > + > + if (unlikely(shinfo->gso_size)) { > + /* borrowed from qdisc_pkt_len_init() */ > + unsigned int hdr_len; > + > + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); > + > + /* + transport layer */ > + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | > + SKB_GSO_TCPV6))) { > + const struct tcphdr *th; > + struct tcphdr _tcphdr; > + > + th = skb_header_pointer(skb, skb_transport_offset(skb), > + sizeof(_tcphdr), &_tcphdr); > + if (likely(th)) > + hdr_len += __tcp_hdrlen(th); > + } else { > + struct udphdr _udphdr; > + > + if (skb_header_pointer(skb, skb_transport_offset(skb), > + sizeof(_udphdr), &_udphdr)) > + hdr_len += sizeof(struct udphdr); > + } > + > + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) > + segs = DIV_ROUND_UP(skb->len - hdr_len, > + shinfo->gso_size); > + else > + segs = shinfo->gso_segs; > + > + /* The last segment may be shorter; we ignore this, which means > + * that we will over-estimate the size of the whole GSO segment > + * by the difference in size. This is conservative, so we live > + * with that to avoid the complexity of dealing with it. > + */ > + len = shinfo->gso_size + hdr_len; > + } > + > + q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); > + > + if (q->rate_flags & CAKE_FLAG_OVERHEAD) > + len -= off; > + > + if (q->max_netlen < len) > + q->max_netlen = len; > + if (q->min_netlen > len) > + q->min_netlen = len; > + > + len += q->rate_overhead; > + > + if (len < q->rate_mpu) > + len = q->rate_mpu; > + > + if (q->atm_mode == CAKE_ATM_ATM) { > + len += 47; > + len /= 48; > + len *= 53; > + } else if (q->atm_mode == CAKE_ATM_PTM) { > + /* Add one byte per 64 bytes or part thereof. > + * This is conservative and easier to calculate than the > + * precise value. > + */ > + len += (len + 63) / 64; > + } > + > + if (q->max_adjlen < len) > + q->max_adjlen = len; > + if (q->min_adjlen > len) > + q->min_adjlen = len; > + > + get_cobalt_cb(skb)->adjusted_len = len * segs; > + return len;
Well, this is embarrassing; seems that I broke this somewhere along the way. Will resend with a fix... -Toke
