> From: Vignesh PS <vignesh.purushotham.srini...@ericsson.com> > > Add support to ip_frag library to perform IPv6 reassembly > when extension headers are present before the fragment > extension in the packet. > > Signed-off-by: Vignesh PS <vignesh.purushotham.srini...@ericsson.com> > --- > .mailmap | 1 + > lib/ip_frag/ip_frag_common.h | 2 + > lib/ip_frag/ip_reassembly.h | 2 + > lib/ip_frag/rte_ipv6_reassembly.c | 68 +++++++++++++++++++++++++++---- > 4 files changed, 64 insertions(+), 9 deletions(-) > > diff --git a/.mailmap b/.mailmap > index 4a508bafad..69b229a5b7 100644 > --- a/.mailmap > +++ b/.mailmap > @@ -1548,6 +1548,7 @@ Viacheslav Ovsiienko <viachesl...@nvidia.com> > <viachesl...@mellanox.com> > Victor Kaplansky <vict...@redhat.com> > Victor Raj <victor....@intel.com> > Vidya Sagar Velumuri <vvelum...@marvell.com> > +Vignesh PS <vignesh.purushotham.srini...@ericsson.com> > <vig.vigneshps1...@gmail.com> > Vignesh Sridhar <vignesh.srid...@intel.com> > Vijayakumar Muthuvel Manickam <mmvi...@gmail.com> > Vijaya Mohan Guvva <vijay1...@gmail.com> > diff --git a/lib/ip_frag/ip_frag_common.h b/lib/ip_frag/ip_frag_common.h > index 51fc9d47fb..db2665e846 100644 > --- a/lib/ip_frag/ip_frag_common.h > +++ b/lib/ip_frag/ip_frag_common.h > @@ -169,6 +169,8 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms) > fp->total_size = UINT32_MAX; > fp->frag_size = 0; > fp->last_idx = IP_MIN_FRAG_NUM; > + fp->exts_len = 0; > + fp->next_proto = NULL; > fp->frags[IP_LAST_FRAG_IDX] = zero_frag; > fp->frags[IP_FIRST_FRAG_IDX] = zero_frag; > } > diff --git a/lib/ip_frag/ip_reassembly.h b/lib/ip_frag/ip_reassembly.h > index 54afed5417..429e74f1b3 100644 > --- a/lib/ip_frag/ip_reassembly.h > +++ b/lib/ip_frag/ip_reassembly.h > @@ -54,6 +54,8 @@ struct __rte_cache_aligned ip_frag_pkt { > uint32_t total_size; /* expected reassembled size */ > uint32_t frag_size; /* size of fragments received */ > uint32_t last_idx; /* index of next entry to fill */ > + uint32_t exts_len; /* length of extension hdrs for > first fragment */ > + uint8_t *next_proto; /* pointer of the next_proto > field */ > struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */ > }; > > diff --git a/lib/ip_frag/rte_ipv6_reassembly.c > b/lib/ip_frag/rte_ipv6_reassembly.c > index 88863a98d1..8decf592a6 100644 > --- a/lib/ip_frag/rte_ipv6_reassembly.c > +++ b/lib/ip_frag/rte_ipv6_reassembly.c > @@ -91,19 +91,19 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) > /* update ipv6 header for the reassembled datagram */ > ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, m->l2_len); > > + payload_len += fp->exts_len; > ip_hdr->payload_len = rte_cpu_to_be_16(payload_len); > > /* > * remove fragmentation header. note that per RFC2460, we need to update > * the last non-fragmentable header with the "next header" field to > contain > - * type of the first fragmentable header, but we currently don't support > - * other headers, so we assume there are no other headers and thus > update > - * the main IPv6 header instead. > + * type of the first fragmentable header. > */ > - move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr); > - frag_hdr = (struct rte_ipv6_fragment_ext *) (ip_hdr + 1); > - ip_hdr->proto = frag_hdr->next_header; > + frag_hdr = (struct rte_ipv6_fragment_ext *) > + ((uint8_t *) (ip_hdr + 1) + fp->exts_len); > + *fp->next_proto = frag_hdr->next_header; > > + move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr); > ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)), > rte_pktmbuf_mtod(m, char*), move_len); > > @@ -112,6 +112,39 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) > return m; > } > > +/* > + * Function to crawl through the extension header stack. > + * This function breaks as soon a the fragment header is > + * found and returns the total length the traversed exts > + * and the last extension before the fragment header > + */ > +static inline uint32_t > +ip_frag_get_last_exthdr(struct rte_ipv6_hdr *ip_hdr, uint8_t **last_ext) > +{ > + uint32_t total_len = 0; > + uint8_t num_exts = 0; > + size_t ext_len = 0; > + *last_ext = (uint8_t *)(ip_hdr + 1); > + int next_proto = ip_hdr->proto; > +#define MAX_NUM_IPV6_EXTS 8
As a nit - let's keep coding style consistent: Pls move #define outside the function definition. > + > + while (next_proto != IPPROTO_FRAGMENT && > + num_exts < MAX_NUM_IPV6_EXTS && > + (next_proto = rte_ipv6_get_next_ext( > + *last_ext, next_proto, &ext_len)) >= 0) { > + > + total_len += ext_len; > + > + if (next_proto == IPPROTO_FRAGMENT) > + return total_len; > + > + *last_ext += ext_len; > + num_exts++; > + } So if IPPROTO_FRAGMENT was not found, we just use extension #8 instead? Shouldn't we return an error in that case, and probably drop the fragment? > + return total_len; > +} > + > /* > * Process new mbuf with fragment of IPV6 datagram. > * Incoming mbuf should have its l2_len/l3_len fields setup correctly. > @@ -139,6 +172,8 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl > *tbl, > { > struct ip_frag_pkt *fp; > struct ip_frag_key key; > + uint8_t *last_ipv6_ext; > + uint32_t exts_len; > uint16_t ip_ofs; > int32_t ip_len; > int32_t trim; > @@ -154,10 +189,10 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl > *tbl, > /* > * as per RFC2460, payload length contains all extension headers > * as well. > - * since we don't support anything but frag headers, > - * this is what we remove from the payload len. > + * so we remove the extension len from the payload len. > */ > - ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); > + exts_len = ip_frag_get_last_exthdr(ip_hdr, &last_ipv6_ext); > + ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - exts_len - > sizeof(*frag_hdr); Hmm..., as I remember ip_len is what we want to preserve in the packet... Why we want to remove all previous ext headers here? > trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len); > > IP_FRAG_LOG(DEBUG, "%s:%d:\n" > @@ -201,6 +236,21 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl > *tbl, > /* process the fragmented packet. */ > mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, > MORE_FRAGS(frag_hdr->frag_data)); Can you explain why we setting these new fp fields after 'ip_frag_process()'? Ip_frag_process() itself can call reassembly() - if all fragments are already in place. > + > + /* store extension stack info, only for first fragment */ > + if (ip_ofs == 0) { If we want it for first fragment only, why not invoke ip_frag_get_last_exthdr() only when ip_ofs == 0? > + /* > + * fp->next_proto points to either the IP's next header > + * or th next header of the extension before the fragment > + * extension > + */ > + fp->next_proto = (uint8_t *)&ip_hdr->proto; > + if (exts_len > 0) { > + fp->exts_len = exts_len; > + fp->next_proto = last_ipv6_ext; > + } > + } > + > ip_frag_inuse(tbl, fp); > > IP_FRAG_LOG(DEBUG, "%s:%d:\n" > -- > 2.34.1