On Sun, 31 Jul 2005, Herbert Xu wrote:

>
> Anyway, here is a patch to use inetpeer instead of that icky ipc
> structure.? It sure cuts down the size of the patch :)
> ....

Herbert;

I've been using the updated patch, and I like it. struct inet_peer
is the right place to do this accounting.

I made a few bug fixes and have tested it, tried to break it, etc.
Seems to do the trick. The latest iteration is attached. Please have
a look.

Regards.

--
Arthur
diff -pur linux.orig/include/linux/sysctl.h linux.new/include/linux/sysctl.h
--- linux.orig/include/linux/sysctl.h   2005-08-03 11:43:40.923892254 -0700
+++ linux.new/include/linux/sysctl.h    2005-08-04 16:58:17.901171101 -0700
@@ -352,6 +352,7 @@ enum
        NET_TCP_BIC_BETA=108,
        NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
        NET_TCP_CONG_CONTROL=110,
+       NET_IPV4_IPFRAG_MAX_DIST=111,
 };
 
 enum {
diff -pur linux.orig/include/net/inetpeer.h linux.new/include/net/inetpeer.h
--- linux.orig/include/net/inetpeer.h   2005-08-03 11:44:01.778605212 -0700
+++ linux.new/include/net/inetpeer.h    2005-08-04 16:58:38.694360121 -0700
@@ -25,6 +25,7 @@ struct inet_peer
        __u32                   v4daddr;        /* peer's address */
        __u16                   avl_height;
        __u16                   ip_id_count;    /* IP ID for the next packet */
+       atomic_t                rid;            /* Frag reception counter */
        __u32                   tcp_ts;
        unsigned long           tcp_ts_stamp;
 };
diff -pur linux.orig/include/net/ip.h linux.new/include/net/ip.h
--- linux.orig/include/net/ip.h 2005-08-03 11:44:08.654654565 -0700
+++ linux.new/include/net/ip.h  2005-08-04 16:58:50.460109760 -0700
@@ -45,6 +45,7 @@ struct inet_skb_parm
 #define IPSKB_TRANSLATED       2
 #define IPSKB_FORWARDED                4
 #define IPSKB_XFRM_TUNNEL_SIZE 8
+#define IPSKB_FRAG_COMPLETE    16
 };
 
 struct ipcm_cookie
diff -pur linux.orig/net/ipv4/inetpeer.c linux.new/net/ipv4/inetpeer.c
--- linux.orig/net/ipv4/inetpeer.c      2005-08-03 11:44:40.086627938 -0700
+++ linux.new/net/ipv4/inetpeer.c       2005-08-04 16:59:20.251440976 -0700
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 dad
                return NULL;
        n->v4daddr = daddr;
        atomic_set(&n->refcnt, 1);
+       atomic_set(&n->rid, 0);
        n->ip_id_count = secure_ip_id(daddr);
        n->tcp_ts_stamp = 0;
 
diff -pur linux.orig/net/ipv4/ip_fragment.c linux.new/net/ipv4/ip_fragment.c
--- linux.orig/net/ipv4/ip_fragment.c   2005-08-03 11:44:48.086712630 -0700
+++ linux.new/net/ipv4/ip_fragment.c    2005-08-04 17:03:02.162971536 -0700
@@ -22,6 +22,7 @@
  *             Patrick McHardy :       LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist = 64;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -90,8 +94,11 @@ struct ipq {
        atomic_t        refcnt;
        struct timer_list timer;        /* when will this queue expire?         
*/
        struct ipq      **pprev;
-       int             iif;
        struct timeval  stamp;
+       int             iif;
+
+       unsigned int    rid;
+       struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -207,6 +214,9 @@ static void ip_frag_destroy(struct ipq *
        BUG_TRAP(qp->last_in&COMPLETE);
        BUG_TRAP(del_timer(&qp->timer) == 0);
 
+       if (qp->peer)
+               inet_putpeer(qp->peer);
+
        /* Release all fragment data. */
        fp = qp->fragments;
        while (fp) {
@@ -366,6 +376,9 @@ static struct ipq *ip_frag_create(unsign
        qp->meat = 0;
        qp->fragments = NULL;
        qp->iif = 0;
+       qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
+       if (qp->peer) 
+               qp->rid = atomic_read(&qp->peer->rid);
 
        /* Initialize a timer for this entry. */
        init_timer(&qp->timer);
@@ -410,6 +423,63 @@ static inline struct ipq *ip_find(struct
        return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+       struct inet_peer *peer = qp->peer;
+       unsigned int max = sysctl_ipfrag_max_dist;
+       unsigned int start, end;
+
+       int rc;
+
+       if (!peer || !max)
+               return 0;
+
+       start = ++qp->rid;
+       end   = atomic_inc_return(&peer->rid);
+
+       rc = qp->fragments && (end - start) >= max;
+
+       if (rc) {
+               IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+       }
+
+       return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+       struct sk_buff *fp;
+
+       if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+               atomic_inc(&qp->refcnt);
+               return -ETIMEDOUT;
+       }
+
+       fp = qp->fragments;
+       do {
+               struct sk_buff *xp = fp->next;
+               frag_kfree_skb(fp, NULL);
+               fp = xp;
+       } while (fp);
+
+       qp->last_in = 0;
+       qp->len = 0;
+       qp->meat = 0;
+       qp->fragments = NULL;
+       qp->iif = 0;
+       if (sysctl_ipfrag_max_dist) {
+               if (qp->peer == NULL) {
+                       qp->peer = inet_getpeer(qp->saddr, 1);
+               }
+               if (qp->peer) {
+                       qp->rid = atomic_read(&qp->peer->rid);
+               }
+       }
+
+       return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -420,6 +490,12 @@ static void ip_frag_queue(struct ipq *qp
        if (qp->last_in & COMPLETE)
                goto err;
 
+       if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+           unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+               ipq_kill(qp);
+               goto err;
+       }
+
        offset = ntohs(skb->nh.iph->frag_off);
        flags = offset & ~IP_OFFSET;
        offset &= IP_OFFSET;
diff -pur linux.orig/net/ipv4/ip_output.c linux.new/net/ipv4/ip_output.c
--- linux.orig/net/ipv4/ip_output.c     2005-08-03 11:44:53.139500496 -0700
+++ linux.new/net/ipv4/ip_output.c      2005-08-04 16:59:52.609205635 -0700
@@ -447,6 +447,7 @@ int ip_fragment(struct sk_buff *skb, int
 
        hlen = iph->ihl * 4;
        mtu = dst_mtu(&rt->u.dst) - hlen;       /* Size of data space */
+       IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
        /* When frag_list is given, use it. First, check its validity:
         * some transformers could create wrong frag_list or break existing
diff -pur linux.orig/net/ipv4/sysctl_net_ipv4.c 
linux.new/net/ipv4/sysctl_net_ipv4.c
--- linux.orig/net/ipv4/sysctl_net_ipv4.c       2005-08-03 11:45:01.530214323 
-0700
+++ linux.new/net/ipv4/sysctl_net_ipv4.c        2005-08-04 17:00:04.807577047 
-0700
@@ -30,6 +30,7 @@ extern int sysctl_ipfrag_low_thresh;
 extern int sysctl_ipfrag_high_thresh; 
 extern int sysctl_ipfrag_time;
 extern int sysctl_ipfrag_secret_interval;
+extern int sysctl_ipfrag_max_dist;
 
 /* From ip_output.c */
 extern int sysctl_ip_dynaddr;
@@ -50,6 +51,7 @@ extern int inet_peer_gc_mintime;
 extern int inet_peer_gc_maxtime;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -643,6 +645,15 @@ ctl_table ipv4_table[] = {
                .strategy       = &sysctl_jiffies
        },
        {
+               .ctl_name       = NET_IPV4_IPFRAG_MAX_DIST,
+               .procname       = "ipfrag_max_dist",
+               .data           = &sysctl_ipfrag_max_dist,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .extra1         = &zero
+       },
+       {
                .ctl_name       = NET_TCP_NO_METRICS_SAVE,
                .procname       = "tcp_no_metrics_save",
                .data           = &sysctl_tcp_nometrics_save,

Reply via email to