On 18/09/15(Fri) 15:55, David Gwynne wrote:
> hashing bits of packet headers to tie connections to particular
> physical interfaces within a trunk turns out to be fairly expensive.
> in my very unscientific testing it is about 20% of the cost of udp
> traffic generated with tcpbench -u.
>
> we could tune or change the hash. eg, going from siphash 2 4 to
> siphash 1 1 halves the overhead of hashing. however, it occurred
> to me that sometimes we already know about connections. why not
> reuse that info if it is available?
Why not, but I'd argue that's orthogonal to the fact that siphash
2 4 has a high cost.
> this lets pf embed the state id into the mbuf as a "flow id" so
> other subsystems can use it. eg, trunk can pull it out and use it.
>
> this diff steals the pad field in mbuf packet headers and uses it
> to embed a flow id. it makes pf fill it in, and trunk use it. this
> avoids the cost of hashing in trunk altogether.
>
> it could be used in other places too, eg, picking an upstream when
> we're going multipath routing.
I've been through RFC 2992 again and indeed I believe we could use that.
What about carp(4) and bridge(4)?
>
> thoughts?
>
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.196
> diff -u -p -r1.196 mbuf.h
> --- sys/mbuf.h 14 Aug 2015 05:25:29 -0000 1.196
> +++ sys/mbuf.h 18 Sep 2015 05:47:30 -0000
> @@ -125,7 +125,7 @@ struct pkthdr {
> SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
> int len; /* total packet length */
> u_int16_t tagsset; /* mtags attached */
> - u_int16_t pad;
> + u_int16_t flowid; /* pseudo unique flow id */
> u_int16_t csum_flags; /* checksum flags */
> u_int16_t ether_vtag; /* Ethernet 802.1p+Q vlan tag */
> u_int ph_rtableid; /* routing table id */
> @@ -236,6 +236,10 @@ struct mbuf {
> #define MT_FTABLE 5 /* fragment reassembly header */
> #define MT_CONTROL 6 /* extra-data protocol message */
> #define MT_OOBDATA 7 /* expedited data */
> +
> +/* flowid field */
> +#define M_FLOWID_VALID 0x8000 /* is the flowid set */
> +#define M_FLOWID_MASK 0x7fff /* flow id to map to path */
>
> /* flags to m_get/MGET */
> #define M_DONTWAIT M_NOWAIT
> Index: net/pf.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.944
> diff -u -p -r1.944 pf.c
> --- net/pf.c 13 Sep 2015 17:53:44 -0000 1.944
> +++ net/pf.c 18 Sep 2015 05:47:30 -0000
> @@ -6531,11 +6531,15 @@ done:
> pd.m->m_pkthdr.pf.qid = qid;
> if (pd.dir == PF_IN && s && s->key[PF_SK_STACK])
> pd.m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK];
> - if (pd.dir == PF_OUT &&
> - pd.m->m_pkthdr.pf.inp && !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
> - s && s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
> - pd.m->m_pkthdr.pf.inp->inp_pf_sk = s->key[PF_SK_STACK];
> - s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
> + if (pd.dir == PF_OUT && s) {
> + if (pd.m->m_pkthdr.pf.inp &&
> + !pd.m->m_pkthdr.pf.inp->inp_pf_sk &&
> + s->key[PF_SK_STACK] && !s->key[PF_SK_STACK]->inp) {
> + pd.m->m_pkthdr.pf.inp->inp_pf_sk = s->key[PF_SK_STACK];
> + s->key[PF_SK_STACK]->inp = pd.m->m_pkthdr.pf.inp;
> + }
> + pd.m->m_pkthdr.flowid = M_FLOWID_VALID |
> + (M_FLOWID_MASK & bemtoh64(&s->id));
> }
>
> /*
> Index: net/if_trunk.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_trunk.c,v
> retrieving revision 1.111
> diff -u -p -r1.111 if_trunk.c
> --- net/if_trunk.c 10 Sep 2015 16:41:30 -0000 1.111
> +++ net/if_trunk.c 18 Sep 2015 05:47:30 -0000
> @@ -985,6 +990,9 @@ trunk_hashmbuf(struct mbuf *m, SIPHASH_K
> #endif
> SIPHASH_CTX ctx;
>
> + if (m->m_pkthdr.flowid & M_FLOWID_VALID)
> + return (m->m_pkthdr.flowid & M_FLOWID_MASK);
> +
> SipHash24_Init(&ctx, key);
> off = sizeof(*eh);
> if (m->m_len < off)
>