From: wenxu <we...@ucloud.cn>

Add ip_defrag_ignore_cb for conntrack defrag and it will
elide the CB clear when packets are defragmented by
connection tracking.

Signed-off-by: wenxu <we...@ucloud.cn>
---
 include/net/ip.h       |  2 ++
 net/ipv4/ip_fragment.c | 55 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 862c954..31779a5 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -671,6 +671,8 @@ static inline bool ip_defrag_user_in_between(u32 user,
 }
 
 int ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
+int ip_defrag_ignore_cb(struct net *net, struct sk_buff *skb,
+                       u32 user, u16 *frag_max_size);
 #ifdef CONFIG_INET
 struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 
user);
 #else
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb889..afc2b3d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -76,7 +76,8 @@ static u8 ip4_frag_ecn(u8 tos)
 static struct inet_frags ip4_frags;
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
-                        struct sk_buff *prev_tail, struct net_device *dev);
+                        struct sk_buff *prev_tail, struct net_device *dev,
+                        bool ignore_skb_cb, u16 *frag_max_size);
 
 
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
@@ -269,7 +270,8 @@ static int ip_frag_reinit(struct ipq *qp)
 }
 
 /* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb,
+                        bool ignore_skb_cb, u16 *frag_max_size)
 {
        struct net *net = qp->q.fqdir->net;
        int ihl, end, flags, offset;
@@ -282,7 +284,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff 
*skb)
        if (qp->q.flags & INET_FRAG_COMPLETE)
                goto err;
 
-       if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+       if ((ignore_skb_cb || !(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE)) &&
            unlikely(ip_frag_too_far(qp)) &&
            unlikely(err = ip_frag_reinit(qp))) {
                ipq_kill(qp);
@@ -368,7 +370,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff 
*skb)
                unsigned long orefdst = skb->_skb_refdst;
 
                skb->_skb_refdst = 0UL;
-               err = ip_frag_reasm(qp, skb, prev_tail, dev);
+               err = ip_frag_reasm(qp, skb, prev_tail, dev, ignore_skb_cb,
+                                   frag_max_size);
                skb->_skb_refdst = orefdst;
                if (err)
                        inet_frag_kill(&qp->q);
@@ -400,7 +403,8 @@ static bool ip_frag_coalesce_ok(const struct ipq *qp)
 
 /* Build a new IP datagram from all its fragments. */
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
-                        struct sk_buff *prev_tail, struct net_device *dev)
+                        struct sk_buff *prev_tail, struct net_device *dev,
+                        bool ignore_skb_cb, u16 *frag_max_size)
 {
        struct net *net = qp->q.fqdir->net;
        struct iphdr *iph;
@@ -430,7 +434,10 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
*skb,
                               ip_frag_coalesce_ok(qp));
 
        skb->dev = dev;
-       IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+       if (!ignore_skb_cb)
+               IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+       else if (frag_max_size)
+               *frag_max_size = max(qp->max_df_size, qp->q.max_size);
 
        iph = ip_hdr(skb);
        iph->tot_len = htons(len);
@@ -445,7 +452,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff 
*skb,
         * from one very small df-fragment and one large non-df frag.
         */
        if (qp->max_df_size == qp->q.max_size) {
-               IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
+               if (!ignore_skb_cb)
+                       IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
                iph->frag_off = htons(IP_DF);
        } else {
                iph->frag_off = 0;
@@ -487,7 +495,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 
user)
 
                spin_lock(&qp->q.lock);
 
-               ret = ip_frag_queue(qp, skb);
+               ret = ip_frag_queue(qp, skb, false, NULL);
 
                spin_unlock(&qp->q.lock);
                ipq_put(qp);
@@ -500,6 +508,37 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 
user)
 }
 EXPORT_SYMBOL(ip_defrag);
 
+/* Process an incoming IP datagram fragment. */
+int ip_defrag_ignore_cb(struct net *net, struct sk_buff *skb,
+                       u32 user, u16 *frag_max_size)
+{
+       struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
+       int vif = l3mdev_master_ifindex_rcu(dev);
+       struct ipq *qp;
+
+       __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
+       skb_orphan(skb);
+
+       /* Lookup (or create) queue header */
+       qp = ip_find(net, ip_hdr(skb), user, vif);
+       if (qp) {
+               int ret;
+
+               spin_lock_bh(&qp->q.lock);
+
+               ret = ip_frag_queue(qp, skb, true, frag_max_size);
+
+               spin_unlock_bh(&qp->q.lock);
+               ipq_put(qp);
+               return ret;
+       }
+
+       __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+       kfree_skb(skb);
+       return -ENOMEM;
+}
+EXPORT_SYMBOL(ip_defrag_ignore_cb);
+
 struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 {
        struct iphdr iph;
-- 
1.8.3.1

Reply via email to