Toke Høiland-Jørgensen <[email protected]> writes:

> This commit adds configurable overhead compensation support to the rate
> shaper. With this feature, userspace can configure the actual bottleneck
> link overhead and encapsulation mode used, which will be used by the shaper
> to calculate the precise duration of each packet on the wire.
>
> This feature is needed because CAKE is often deployed one or two hops
> upstream of the actual bottleneck (which can be, e.g., inside a DSL or
> cable modem). In this case, the link layer characteristics and overhead
> reported by the kernel does not match the actual bottleneck. Being able to
> set the actual values in use makes it possible to configure the shaper rate
> much closer to the actual bottleneck rate (our experience shows it is
> possible to get with 0.1% of the actual physical bottleneck rate), thus
> keeping latency low without sacrificing bandwidth.
>
> The overhead compensation has three tunables: A fixed per-packet overhead
> size (which, if set, will be accounted from the IP packet header), a
> minimum packet size (MPU) and a framing mode supporting either ATM or PTM
> framing. We include a set of common keywords in TC to help users configure
> the right parameters. If no overhead value is set, the value reported by
> the kernel is used.
>
> Signed-off-by: Toke Høiland-Jørgensen <[email protected]>
> ---
>  net/sched/sch_cake.c |  123 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 122 insertions(+), 1 deletion(-)
>
> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> index ccc6f26b306c..6314a089a204 100644
> --- a/net/sched/sch_cake.c
> +++ b/net/sched/sch_cake.c
> @@ -275,6 +275,7 @@ enum {
>  
>  struct cobalt_skb_cb {
>       cobalt_time_t enqueue_time;
> +     u32           adjusted_len;
>  };
>  
>  static cobalt_time_t cobalt_get_time(void)
> @@ -1130,6 +1131,87 @@ static cobalt_time_t cake_ewma(cobalt_time_t avg, 
> cobalt_time_t sample,
>       return avg;
>  }
>  
> +static u32 cake_overhead(struct cake_sched_data *q, struct sk_buff *skb)
> +{
> +     const struct skb_shared_info *shinfo = skb_shinfo(skb);
> +     u32 off = skb_network_offset(skb);
> +     u32 len = qdisc_pkt_len(skb);
> +     u16 segs = 1;
> +
> +     if (unlikely(shinfo->gso_size)) {
> +             /* borrowed from qdisc_pkt_len_init() */
> +             unsigned int hdr_len;
> +
> +             hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
> +
> +             /* + transport layer */
> +             if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
> +                                            SKB_GSO_TCPV6))) {
> +                     const struct tcphdr *th;
> +                     struct tcphdr _tcphdr;
> +
> +                     th = skb_header_pointer(skb, skb_transport_offset(skb),
> +                                             sizeof(_tcphdr), &_tcphdr);
> +                     if (likely(th))
> +                             hdr_len += __tcp_hdrlen(th);
> +             } else {
> +                     struct udphdr _udphdr;
> +
> +                     if (skb_header_pointer(skb, skb_transport_offset(skb),
> +                                            sizeof(_udphdr), &_udphdr))
> +                             hdr_len += sizeof(struct udphdr);
> +             }
> +
> +             if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
> +                     segs = DIV_ROUND_UP(skb->len - hdr_len,
> +                                         shinfo->gso_size);
> +             else
> +                     segs = shinfo->gso_segs;
> +
> +             /* The last segment may be shorter; we ignore this, which means
> +              * that we will over-estimate the size of the whole GSO segment
> +              * by the difference in size. This is conservative, so we live
> +              * with that to avoid the complexity of dealing with it.
> +              */
> +             len = shinfo->gso_size + hdr_len;
> +     }
> +
> +     q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
> +
> +     if (q->rate_flags & CAKE_FLAG_OVERHEAD)
> +             len -= off;
> +
> +     if (q->max_netlen < len)
> +             q->max_netlen = len;
> +     if (q->min_netlen > len)
> +             q->min_netlen = len;
> +
> +     len += q->rate_overhead;
> +
> +     if (len < q->rate_mpu)
> +             len = q->rate_mpu;
> +
> +     if (q->atm_mode == CAKE_ATM_ATM) {
> +             len += 47;
> +             len /= 48;
> +             len *= 53;
> +     } else if (q->atm_mode == CAKE_ATM_PTM) {
> +             /* Add one byte per 64 bytes or part thereof.
> +              * This is conservative and easier to calculate than the
> +              * precise value.
> +              */
> +             len += (len + 63) / 64;
> +     }
> +
> +     if (q->max_adjlen < len)
> +             q->max_adjlen = len;
> +     if (q->min_adjlen > len)
> +             q->min_adjlen = len;
> +
> +     get_cobalt_cb(skb)->adjusted_len = len * segs;
> +     return len;

Well, this is embarrassing; seems that I broke this somewhere along the
way. Will resend with a fix...

-Toke

Reply via email to