The following reply was made to PR kern/121955; it has been noted by GNATS.

From: Oleg Bulyzhin <[EMAIL PROTECTED]>
To: [EMAIL PROTECTED]
Cc:  
Subject: Re: kern/121955: dummynet panics after 6.2
Date: Wed, 2 Apr 2008 20:47:47 +0400

 --rwEMma7ioTxnRzrJ
 Content-Type: text/plain; charset=us-ascii
 Content-Disposition: inline
 
 
 Please test attached patch and let me know if it changes anything for you.
 
 -- 
 Oleg.
 
 ================================================================
 === Oleg Bulyzhin -- OBUL-RIPN -- OBUL-RIPE -- [EMAIL PROTECTED] ===
 ================================================================
 
 
 --rwEMma7ioTxnRzrJ
 Content-Type: text/x-diff; charset=us-ascii
 Content-Disposition: attachment; filename="dummynet_iofast.diff"
 
 Index: sys/netinet/ip_dummynet.h
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.h,v
 retrieving revision 1.40
 diff -u -r1.40 ip_dummynet.h
 --- sys/netinet/ip_dummynet.h  17 Jun 2007 00:33:34 -0000      1.40
 +++ sys/netinet/ip_dummynet.h  27 Mar 2008 17:19:00 -0000
 @@ -343,7 +343,7 @@
  #ifdef _KERNEL
  typedef       int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */
  typedef       void ip_dn_ruledel_t(void *); /* ip_fw.c */
 -typedef       int ip_dn_io_t(struct mbuf *m, int dir, struct ip_fw_args *fwa);
 +typedef       int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args 
*fwa);
  extern        ip_dn_ctl_t *ip_dn_ctl_ptr;
  extern        ip_dn_ruledel_t *ip_dn_ruledel_ptr;
  extern        ip_dn_io_t *ip_dn_io_ptr;
 Index: sys/netinet/ip_dummynet.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_dummynet.c,v
 retrieving revision 1.110
 diff -u -r1.110 ip_dummynet.c
 --- sys/netinet/ip_dummynet.c  7 Oct 2007 20:44:22 -0000       1.110
 +++ sys/netinet/ip_dummynet.c  27 Mar 2008 17:19:03 -0000
 @@ -56,6 +56,7 @@
   * include files marked with XXX are probably not needed
   */
  
 +#include <sys/limits.h>
  #include <sys/param.h>
  #include <sys/systm.h>
  #include <sys/malloc.h>
 @@ -110,6 +111,11 @@
  /* Adjusted vs non-adjusted curr_time difference (ticks). */
  static long tick_diff;
  
 +static int            io_fast;
 +static unsigned long  io_pkt;
 +static unsigned long  io_pkt_fast;
 +static unsigned long  io_pkt_drop;
 +
  /*
   * Three heaps contain queues and pipes that the scheduler handles:
   *
 @@ -181,6 +187,17 @@
  SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
      CTLFLAG_RD, &tick_lost, 0,
      "Number of ticks coalesced by dummynet taskqueue.");
 +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
 +    CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
 +    CTLFLAG_RD, &io_pkt, 0,
 +    "Number of packets passed to dummynet.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
 +    CTLFLAG_RD, &io_pkt_fast, 0,
 +    "Number of packets bypassed dummynet scheduler.");
 +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
 +    CTLFLAG_RD, &io_pkt_drop, 0,
 +    "Number of packets dropped by dummynet.");
  #endif
  
  #ifdef DUMMYNET_DEBUG
 @@ -206,15 +223,15 @@
  #define       DUMMYNET_UNLOCK()       mtx_unlock(&dummynet_mtx)
  #define       DUMMYNET_LOCK_ASSERT()  mtx_assert(&dummynet_mtx, MA_OWNED)
  
 -static int config_pipe(struct dn_pipe *p);
 -static int ip_dn_ctl(struct sockopt *sopt);
 +static int    config_pipe(struct dn_pipe *p);
 +static int    ip_dn_ctl(struct sockopt *sopt);
  
 -static void dummynet(void *);
 -static void dummynet_flush(void);
 -static void dummynet_send(struct mbuf *);
 -void dummynet_drain(void);
 +static void   dummynet(void *);
 +static void   dummynet_flush(void);
 +static void   dummynet_send(struct mbuf *);
 +void          dummynet_drain(void);
  static ip_dn_io_t dummynet_io;
 -static void dn_rule_delete(void *);
 +static void   dn_rule_delete(void *);
  
  /*
   * Heap management functions.
 @@ -483,7 +500,7 @@
        if ((m = pipe->head) != NULL) {
                pkt = dn_tag_get(m);
                /*
 -               * XXX: Should check errors on heap_insert, by draining the
 +               * XXX Should check errors on heap_insert, by draining the
                 * whole pipe p and hoping in the future we are more successful.
                 */
                heap_insert(&extract_heap, pkt->output_time, pipe);
 @@ -496,8 +513,8 @@
   * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
   */
  #define SET_TICKS(_m, q, p)   \
 -    ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
 -          p->bandwidth ;
 +    ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \
 +    p->bandwidth;
  
  /*
   * extract pkt from queue, compute output time (could be now)
 @@ -533,59 +550,61 @@
  static void
  ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
  {
 -    struct mbuf *pkt;
 -    struct dn_pipe *p = q->fs->pipe ;
 -    int p_was_empty ;
 +      struct mbuf *pkt;
 +      struct dn_pipe *p = q->fs->pipe;
 +      int p_was_empty;
  
 -    DUMMYNET_LOCK_ASSERT();
 +      DUMMYNET_LOCK_ASSERT();
  
 -    if (p == NULL) {
 -      printf("dummynet: ready_event- pipe is gone\n");
 -      return ;
 -    }
 -    p_was_empty = (p->head == NULL) ;
 +      if (p == NULL) {
 +              printf("dummynet: ready_event- pipe is gone\n");
 +              return;
 +      }
 +      p_was_empty = (p->head == NULL);
  
 -    /*
 -     * schedule fixed-rate queues linked to this pipe:
 -     * Account for the bw accumulated since last scheduling, then
 -     * drain as many pkts as allowed by q->numbytes and move to
 -     * the delay line (in p) computing output time.
 -     * bandwidth==0 (no limit) means we can drain the whole queue,
 -     * setting len_scaled = 0 does the job.
 -     */
 -    q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
 -    while ( (pkt = q->head) != NULL ) {
 -      int len = pkt->m_pkthdr.len;
 -      int len_scaled = p->bandwidth ? len*8*hz : 0 ;
 -      if (len_scaled > q->numbytes )
 -          break ;
 -      q->numbytes -= len_scaled ;
 -      move_pkt(pkt, q, p, len);
 -    }
 -    /*
 -     * If we have more packets queued, schedule next ready event
 -     * (can only occur when bandwidth != 0, otherwise we would have
 -     * flushed the whole queue in the previous loop).
 -     * To this purpose we record the current time and compute how many
 -     * ticks to go for the finish time of the packet.
 -     */
 -    if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
 -      dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
 -      q->sched_time = curr_time ;
 -      heap_insert(&ready_heap, curr_time + t, (void *)q );
 -      /* XXX should check errors on heap_insert, and drain the whole
 -       * queue on error hoping next time we are luckier.
 +      /*
 +       * Schedule fixed-rate queues linked to this pipe:
 +       * account for the bw accumulated since last scheduling, then
 +       * drain as many pkts as allowed by q->numbytes and move to
 +       * the delay line (in p) computing output time.
 +       * bandwidth==0 (no limit) means we can drain the whole queue,
 +       * setting len_scaled = 0 does the job.
         */
 -    } else {  /* RED needs to know when the queue becomes empty */
 -      q->q_time = curr_time;
 -      q->numbytes = 0;
 -    }
 -    /*
 -     * If the delay line was empty call transmit_event() now.
 -     * Otherwise, the scheduler will take care of it.
 -     */
 -    if (p_was_empty)
 -      transmit_event(p, head, tail);
 +      q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
 +      while ((pkt = q->head) != NULL) {
 +              int len = pkt->m_pkthdr.len;
 +              int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 +
 +              if (len_scaled > q->numbytes)
 +                      break;
 +              q->numbytes -= len_scaled;
 +              move_pkt(pkt, q, p, len);
 +      }
 +      /*
 +       * If we have more packets queued, schedule next ready event
 +       * (can only occur when bandwidth != 0, otherwise we would have
 +       * flushed the whole queue in the previous loop).
 +       * To this purpose we record the current time and compute how many
 +       * ticks to go for the finish time of the packet.
 +       */
 +      if ((pkt = q->head) != NULL) {  /* this implies bandwidth != 0 */
 +              dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
 +
 +              q->sched_time = curr_time;
 +              heap_insert(&ready_heap, curr_time + t, (void *)q);
 +              /*
 +               * XXX Should check errors on heap_insert, and drain the whole
 +               * queue on error hoping next time we are luckier.
 +               */
 +      } else          /* RED needs to know when the queue becomes empty. */
 +              q->q_time = curr_time;
 +
 +      /*
 +       * If the delay line was empty call transmit_event() now.
 +       * Otherwise, the scheduler will take care of it.
 +       */
 +      if (p_was_empty)
 +              transmit_event(p, head, tail);
  }
  
  /*
 @@ -593,123 +612,147 @@
   * the queues at their start time, and enqueue into the delay line.
   * Packets are drained until p->numbytes < 0. As long as
   * len_scaled >= p->numbytes, the packet goes into the delay line
 - * with a deadline p->delay. For the last packet, if p->numbytes<0,
 + * with a deadline p->delay. For the last packet, if p->numbytes < 0,
   * there is an additional delay.
   */
  static void
  ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
  {
 -    int p_was_empty = (p->head == NULL) ;
 -    struct dn_heap *sch = &(p->scheduler_heap);
 -    struct dn_heap *neh = &(p->not_eligible_heap) ;
 +      int p_was_empty = (p->head == NULL);
 +      struct dn_heap *sch = &(p->scheduler_heap);
 +      struct dn_heap *neh = &(p->not_eligible_heap);
 +      int64_t p_numbytes = p->numbytes;
  
 -    DUMMYNET_LOCK_ASSERT();
 -
 -    if (p->if_name[0] == 0) /* tx clock is simulated */
 -      p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
 -    else { /* tx clock is for real, the ifq must be empty or this is a NOP */
 -      if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
 -          return ;
 -      else {
 -          DPRINTF(("dummynet: pipe %d ready from %s --\n",
 -              p->pipe_nr, p->if_name));
 -      }
 -    }
 +      DUMMYNET_LOCK_ASSERT();
  
 -    /*
 -     * While we have backlogged traffic AND credit, we need to do
 -     * something on the queue.
 -     */
 -    while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
 -      if (sch->elements > 0) { /* have some eligible pkts to send out */
 -          struct dn_flow_queue *q = sch->p[0].object ;
 -          struct mbuf *pkt = q->head;
 -          struct dn_flow_set *fs = q->fs;
 -          u_int64_t len = pkt->m_pkthdr.len;
 -          int len_scaled = p->bandwidth ? len*8*hz : 0 ;
 -
 -          heap_extract(sch, NULL); /* remove queue from heap */
 -          p->numbytes -= len_scaled ;
 -          move_pkt(pkt, q, p, len);
 -
 -          p->V += (len<<MY_M) / p->sum ; /* update V */
 -          q->S = q->F ; /* update start time */
 -          if (q->len == 0) { /* Flow not backlogged any more */
 -              fs->backlogged-- ;
 -              heap_insert(&(p->idle_heap), q->F, q);
 -          } else { /* still backlogged */
 +      if (p->if_name[0] == 0)         /* tx clock is simulated */
                /*
 -               * update F and position in backlogged queue, then
 -               * put flow in not_eligible_heap (we will fix this later).
 +               * Since result may not fit into p->numbytes (32bit) we
 +               * are using 64bit var here.
                 */
 -              len = (q->head)->m_pkthdr.len;
 -              q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
 -              if (DN_KEY_LEQ(q->S, p->V))
 -                  heap_insert(neh, q->S, q);
 -              else
 -                  heap_insert(sch, q->F, q);
 -          }
 +              p_numbytes += (curr_time - p->sched_time) * p->bandwidth;
 +      else {  /*
 +               * tx clock is for real,
 +               * the ifq must be empty or this is a NOP.
 +               */
 +              if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
 +                      return;
 +              else {
 +                      DPRINTF(("dummynet: pipe %d ready from %s --\n",
 +                          p->pipe_nr, p->if_name));
 +              }
        }
 +
        /*
 -       * now compute V = max(V, min(S_i)). Remember that all elements in sch
 -       * have by definition S_i <= V so if sch is not empty, V is surely
 -       * the max and we must not update it. Conversely, if sch is empty
 -       * we only need to look at neh.
 +       * While we have backlogged traffic AND credit, we need to do
 +       * something on the queue.
         */
 -      if (sch->elements == 0 && neh->elements > 0)
 -          p->V = MAX64 ( p->V, neh->p[0].key );
 -      /* move from neh to sch any packets that have become eligible */
 -      while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
 -          struct dn_flow_queue *q = neh->p[0].object ;
 -          heap_extract(neh, NULL);
 -          heap_insert(sch, q->F, q);
 +      while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) {
 +              if (sch->elements > 0) {
 +                      /* Have some eligible pkts to send out. */
 +                      struct dn_flow_queue *q = sch->p[0].object;
 +                      struct mbuf *pkt = q->head;
 +                      struct dn_flow_set *fs = q->fs;
 +                      uint64_t len = pkt->m_pkthdr.len;
 +                      int len_scaled = p->bandwidth ? len * 8 * hz : 0;
 +
 +                      heap_extract(sch, NULL); /* Remove queue from heap. */
 +                      p_numbytes -= len_scaled;
 +                      move_pkt(pkt, q, p, len);
 +
 +                      p->V += (len << MY_M) / p->sum; /* Update V. */
 +                      q->S = q->F;                    /* Update start time. */
 +                      if (q->len == 0) {
 +                              /* Flow not backlogged any more. */
 +                              fs->backlogged--;
 +                              heap_insert(&(p->idle_heap), q->F, q);
 +                      } else {
 +                              /* Still backlogged. */
 +
 +                              /*
 +                               * Update F and position in backlogged queue,
 +                               * then put flow in not_eligible_heap
 +                               * (we will fix this later).
 +                               */
 +                              len = (q->head)->m_pkthdr.len;
 +                              q->F += (len << MY_M) / (uint64_t)fs->weight;
 +                              if (DN_KEY_LEQ(q->S, p->V))
 +                                      heap_insert(neh, q->S, q);
 +                              else
 +                                      heap_insert(sch, q->F, q);
 +                      }
 +              }
 +              /*
 +               * Now compute V = max(V, min(S_i)). Remember that all elements
 +               * in sch have by definition S_i <= V so if sch is not empty,
 +               * V is surely the max and we must not update it. Conversely,
 +               * if sch is empty we only need to look at neh.
 +               */
 +              if (sch->elements == 0 && neh->elements > 0)
 +                      p->V = MAX64(p->V, neh->p[0].key);
 +              /* Move from neh to sch any packets that have become eligible */
 +              while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) {
 +                      struct dn_flow_queue *q = neh->p[0].object;
 +                      heap_extract(neh, NULL);
 +                      heap_insert(sch, q->F, q);
 +              }
 +
 +              if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
 +                      p_numbytes = -1;        /* Mark not ready for I/O. */
 +                      break;
 +              }
        }
 +      if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 &&
 +          p->idle_heap.elements > 0) {
 +              /*
 +               * No traffic and no events scheduled.
 +               * We can get rid of idle-heap.
 +               */
 +              int i;
  
 -      if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
 -          p->numbytes = -1 ; /* mark not ready for I/O */
 -          break ;
 +              for (i = 0; i < p->idle_heap.elements; i++) {
 +                      struct dn_flow_queue *q = p->idle_heap.p[i].object;
 +
 +                      q->F = 0;
 +                      q->S = q->F + 1;
 +              }
 +              p->sum = 0;
 +              p->V = 0;
 +              p->idle_heap.elements = 0;
        }
 -    }
 -    if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
 -          && p->idle_heap.elements > 0) {
        /*
 -       * no traffic and no events scheduled. We can get rid of idle-heap.
 +       * If we are getting clocks from dummynet (not a real interface) and
 +       * If we are under credit, schedule the next ready event.
 +       * Also fix the delivery time of the last packet.
         */
 -      int i ;
 +      if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */
 +              dn_key t = 0;           /* Number of ticks i have to wait. */
  
 -      for (i = 0 ; i < p->idle_heap.elements ; i++) {
 -          struct dn_flow_queue *q = p->idle_heap.p[i].object ;
 -
 -          q->F = 0 ;
 -          q->S = q->F + 1 ;
 +              if (p->bandwidth > 0)
 +                      t = (p->bandwidth - 1 - p_numbytes) / p->bandwidth;
 +              dn_tag_get(p->tail)->output_time += t;
 +              p->sched_time = curr_time;
 +              heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
 +              /*
 +               * XXX Should check errors on heap_insert, and drain the whole
 +               * queue on error hoping next time we are luckier.
 +               */
        }
 -      p->sum = 0 ;
 -      p->V = 0 ;
 -      p->idle_heap.elements = 0 ;
 -    }
 -    /*
 -     * If we are getting clocks from dummynet (not a real interface) and
 -     * If we are under credit, schedule the next ready event.
 -     * Also fix the delivery time of the last packet.
 -     */
 -    if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
 -      dn_key t=0 ; /* number of ticks i have to wait */
  
 -      if (p->bandwidth > 0)
 -          t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
 -      dn_tag_get(p->tail)->output_time += t ;
 -      p->sched_time = curr_time ;
 -      heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
 -      /* XXX should check errors on heap_insert, and drain the whole
 -       * queue on error hoping next time we are luckier.
 +      /* Fit (adjust if necessary) 64bit result into 32bit variable. */
 +      if (p_numbytes > INT_MAX)
 +              p->numbytes = INT_MAX;
 +      else if (p_numbytes < INT_MIN)
 +              p->numbytes = INT_MIN;
 +      else
 +              p->numbytes = p_numbytes;
 +
 +      /*
 +       * If the delay line was empty call transmit_event() now.
 +       * Otherwise, the scheduler will take care of it.
         */
 -    }
 -    /*
 -     * If the delay line was empty call transmit_event() now.
 -     * Otherwise, the scheduler will take care of it.
 -     */
 -    if (p_was_empty)
 -      transmit_event(p, head, tail);
 +      if (p_was_empty)
 +              transmit_event(p, head, tail);
  }
  
  /*
 @@ -924,29 +967,28 @@
  static struct dn_flow_queue *
  create_queue(struct dn_flow_set *fs, int i)
  {
 -    struct dn_flow_queue *q ;
 +      struct dn_flow_queue *q;
  
 -    if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
 +      if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
            expire_queues(fs) == 0) {
 -      /*
 -       * No way to get room, use or create overflow queue.
 -       */
 -      i = fs->rq_size ;
 -      if ( fs->rq[i] != NULL )
 -          return fs->rq[i] ;
 -    }
 -    q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
 -    if (q == NULL) {
 -      printf("dummynet: sorry, cannot allocate queue for new flow\n");
 -      return NULL ;
 -    }
 -    q->fs = fs ;
 -    q->hash_slot = i ;
 -    q->next = fs->rq[i] ;
 -    q->S = q->F + 1;   /* hack - mark timestamp as invalid */
 -    fs->rq[i] = q ;
 -    fs->rq_elements++ ;
 -    return q ;
 +              /* No way to get room, use or create overflow queue. */
 +              i = fs->rq_size;
 +              if (fs->rq[i] != NULL)
 +                  return fs->rq[i];
 +      }
 +      q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
 +      if (q == NULL) {
 +              printf("dummynet: sorry, cannot allocate queue for new flow\n");
 +              return (NULL);
 +      }
 +      q->fs = fs;
 +      q->hash_slot = i;
 +      q->next = fs->rq[i];
 +      q->S = q->F + 1;        /* hack - mark timestamp as invalid. */
 +      q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
 +      fs->rq[i] = q;
 +      fs->rq_elements++;
 +      return (q);
  }
  
  /*
 @@ -1200,185 +1242,201 @@
   * ifp                the 'ifp' parameter from the caller.
   *            NULL in ip_input, destination interface in ip_output,
   * rule               matching rule, in case of multiple passes
 - *
   */
  static int
 -dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa)
 +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
  {
 -    struct mbuf *head = NULL, *tail = NULL;
 -    struct dn_pkt_tag *pkt;
 -    struct m_tag *mtag;
 -    struct dn_flow_set *fs = NULL;
 -    struct dn_pipe *pipe ;
 -    u_int64_t len = m->m_pkthdr.len ;
 -    struct dn_flow_queue *q = NULL ;
 -    int is_pipe;
 -    ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 -
 -    KASSERT(m->m_nextpkt == NULL,
 -      ("dummynet_io: mbuf queue passed to dummynet"));
 -
 -    if (cmd->opcode == O_LOG)
 -      cmd += F_LEN(cmd);
 -    if (cmd->opcode == O_ALTQ)
 -      cmd += F_LEN(cmd);
 -    if (cmd->opcode == O_TAG)
 -      cmd += F_LEN(cmd);
 -    is_pipe = (cmd->opcode == O_PIPE);
 +      struct mbuf *m = *m0, *head = NULL, *tail = NULL;
 +      struct dn_pkt_tag *pkt;
 +      struct m_tag *mtag;
 +      struct dn_flow_set *fs = NULL;
 +      struct dn_pipe *pipe;
 +      uint64_t len = m->m_pkthdr.len;
 +      struct dn_flow_queue *q = NULL;
 +      int is_pipe;
 +      ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 +
 +      KASSERT(m->m_nextpkt == NULL,
 +          ("dummynet_io: mbuf queue passed to dummynet"));
 +
 +      if (cmd->opcode == O_LOG)
 +              cmd += F_LEN(cmd);
 +      if (cmd->opcode == O_ALTQ)
 +              cmd += F_LEN(cmd);
 +      if (cmd->opcode == O_TAG)
 +              cmd += F_LEN(cmd);
 +      is_pipe = (cmd->opcode == O_PIPE);
  
 -    DUMMYNET_LOCK();
 -    /*
 -     * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 -     *
 -     * XXXGL: probably the pipe->fs and fs->pipe logic here
 -     * below can be simplified.
 -     */
 -    if (is_pipe) {
 -      pipe = locate_pipe(fwa->cookie);
 -      if (pipe != NULL)
 -              fs = &(pipe->fs);
 -    } else
 -      fs = locate_flowset(fwa->cookie);
 +      DUMMYNET_LOCK();
 +      io_pkt++;
 +      /*
 +       * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 +       *
 +       * XXXGL: probably the pipe->fs and fs->pipe logic here
 +       * below can be simplified.
 +       */
 +      if (is_pipe) {
 +              pipe = locate_pipe(fwa->cookie);
 +              if (pipe != NULL)
 +                      fs = &(pipe->fs);
 +      } else
 +              fs = locate_flowset(fwa->cookie);
  
 -    if (fs == NULL)
 -      goto dropit;    /* This queue/pipe does not exist! */
 -    pipe = fs->pipe;
 -    if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */
 -      pipe = locate_pipe(fs->parent_nr);
 -      if (pipe != NULL)
 -          fs->pipe = pipe;
 -      else {
 -          printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 -              fs->parent_nr, fs->fs_nr);
 -          goto dropit ;
 +      if (fs == NULL)
 +              goto dropit;    /* This queue/pipe does not exist! */
 +      pipe = fs->pipe;
 +      if (pipe == NULL) {     /* Must be a queue, try find a matching pipe. */
 +              pipe = locate_pipe(fs->parent_nr);
 +              if (pipe != NULL)
 +                      fs->pipe = pipe;
 +              else {
 +                      printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 +                          fs->parent_nr, fs->fs_nr);
 +                      goto dropit;
 +              }
        }
 -    }
 -    q = find_queue(fs, &(fwa->f_id));
 -    if ( q == NULL )
 -      goto dropit ;           /* cannot allocate queue                */
 -    /*
 -     * update statistics, then check reasons to drop pkt
 -     */
 -    q->tot_bytes += len ;
 -    q->tot_pkts++ ;
 -    if ( fs->plr && random() < fs->plr )
 -      goto dropit ;           /* random pkt drop                      */
 -    if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
 -      if (q->len_bytes > fs->qsize)
 -          goto dropit ;       /* queue size overflow                  */
 -    } else {
 -      if (q->len >= fs->qsize)
 -          goto dropit ;       /* queue count overflow                 */
 -    }
 -    if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
 -      goto dropit ;
 -
 -    /* XXX expensive to zero, see if we can remove it*/
 -    mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 -              sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO);
 -    if ( mtag == NULL )
 -      goto dropit ;           /* cannot allocate packet header        */
 -    m_tag_prepend(m, mtag);   /* attach to mbuf chain */
 -
 -    pkt = (struct dn_pkt_tag *)(mtag+1);
 -    /* ok, i can handle the pkt now... */
 -    /* build and enqueue packet + parameters */
 -    pkt->rule = fwa->rule ;
 -    pkt->dn_dir = dir ;
 -
 -    pkt->ifp = fwa->oif;
 +      q = find_queue(fs, &(fwa->f_id));
 +      if (q == NULL)
 +              goto dropit;            /* Cannot allocate queue. */
 +
 +      /* Update statistics, then check reasons to drop pkt. */
 +      q->tot_bytes += len;
 +      q->tot_pkts++;
 +      if (fs->plr && random() < fs->plr)
 +              goto dropit;            /* Random pkt drop. */
 +      if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
 +              if (q->len_bytes > fs->qsize)
 +                      goto dropit;    /* Queue size overflow. */
 +      } else {
 +              if (q->len >= fs->qsize)
 +                      goto dropit;    /* Queue count overflow. */
 +      }
 +      if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len))
 +              goto dropit;
  
 -    if (q->head == NULL)
 -      q->head = m;
 -    else
 -      q->tail->m_nextpkt = m;
 -    q->tail = m;
 -    q->len++;
 -    q->len_bytes += len ;
 +      /* XXX expensive to zero, see if we can remove it. */
 +      mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 +          sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO);
 +      if (mtag == NULL)
 +              goto dropit;            /* Cannot allocate packet header. */
 +      m_tag_prepend(m, mtag);         /* Attach to mbuf chain. */
  
 -    if ( q->head != m )               /* flow was not idle, we are done */
 -      goto done;
 -    /*
 -     * If we reach this point the flow was previously idle, so we need
 -     * to schedule it. This involves different actions for fixed-rate or
 -     * WF2Q queues.
 -     */
 -    if (is_pipe) {
 +      pkt = (struct dn_pkt_tag *)(mtag + 1);
        /*
 -       * Fixed-rate queue: just insert into the ready_heap.
 +       * Ok, i can handle the pkt now...
 +       * Build and enqueue packet + parameters.
         */
 -      dn_key t = 0 ;
 -      if (pipe->bandwidth)
 -          t = SET_TICKS(m, q, pipe);
 -      q->sched_time = curr_time ;
 -      if (t == 0)     /* must process it now */
 -          ready_event(q, &head, &tail);
 +      pkt->rule = fwa->rule;
 +      pkt->dn_dir = dir;
 +
 +      pkt->ifp = fwa->oif;
 +
 +      if (q->head == NULL)
 +              q->head = m;
        else
 -          heap_insert(&ready_heap, curr_time + t , q );
 -    } else {
 -      /*
 -       * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
 -       * set S to the virtual time V for the controlling pipe, and update
 -       * the sum of weights for the pipe; otherwise, remove flow from
 -       * idle_heap and set S to max(F,V).
 -       * Second, compute finish time F = S + len/weight.
 -       * Third, if pipe was idle, update V=max(S, V).
 -       * Fourth, count one more backlogged flow.
 -       */
 -      if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
 -          q->S = pipe->V ;
 -          pipe->sum += fs->weight ; /* add weight of new queue */
 -      } else {
 -          heap_extract(&(pipe->idle_heap), q);
 -          q->S = MAX64(q->F, pipe->V ) ;
 -      }
 -      q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
 +              q->tail->m_nextpkt = m;
 +      q->tail = m;
 +      q->len++;
 +      q->len_bytes += len;
 +
 +      if (q->head != m)               /* Flow was not idle, we are done. */
 +              goto done;
 +
 +      if (q->q_time < curr_time)
 +              q->numbytes = io_fast ? fs->pipe->bandwidth : 0;
 +      q->q_time = curr_time;
  
 -      if (pipe->not_eligible_heap.elements == 0 &&
 -              pipe->scheduler_heap.elements == 0)
 -          pipe->V = MAX64 ( q->S, pipe->V );
 -      fs->backlogged++ ;
        /*
 -       * Look at eligibility. A flow is not eligibile if S>V (when
 -       * this happens, it means that there is some other flow already
 -       * scheduled for the same pipe, so the scheduler_heap cannot be
 -       * empty). If the flow is not eligible we just store it in the
 -       * not_eligible_heap. Otherwise, we store in the scheduler_heap
 -       * and possibly invoke ready_event_wfq() right now if there is
 -       * leftover credit.
 -       * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 -       * and for all flows in not_eligible_heap (NEH), S_i > V .
 -       * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
 -       * we only need to look into NEH.
 +       * If we reach this point the flow was previously idle, so we need
 +       * to schedule it. This involves different actions for fixed-rate or
 +       * WF2Q queues.
         */
 -      if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
 -          if (pipe->scheduler_heap.elements == 0)
 -              printf("dummynet: ++ ouch! not eligible but empty 
scheduler!\n");
 -          heap_insert(&(pipe->not_eligible_heap), q->S, q);
 +      if (is_pipe) {
 +              /* Fixed-rate queue: just insert into the ready_heap. */
 +              dn_key t = 0;
 +
 +              if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes)
 +                      t = SET_TICKS(m, q, pipe);
 +              q->sched_time = curr_time;
 +              if (t == 0)             /* Must process it now. */
 +                      ready_event(q, &head, &tail);
 +              else
 +                      heap_insert(&ready_heap, curr_time + t , q);
        } else {
 -          heap_insert(&(pipe->scheduler_heap), q->F, q);
 -          if (pipe->numbytes >= 0) { /* pipe is idle */
 -              if (pipe->scheduler_heap.elements != 1)
 -                  printf("dummynet: OUCH! pipe should have been idle!\n");
 -              DPRINTF(("dummynet: waking up pipe %d at %d\n",
 -                      pipe->pipe_nr, (int)(q->F >> MY_M)));
 -              pipe->sched_time = curr_time ;
 -              ready_event_wfq(pipe, &head, &tail);
 -          }
 +              /*
 +               * WF2Q. First, compute start time S: if the flow was
 +               * idle (S = F + 1) set S to the virtual time V for the
 +               * controlling pipe, and update the sum of weights for the pipe;
 +               * otherwise, remove flow from idle_heap and set S to max(F,V).
 +               * Second, compute finish time F = S + len / weight.
 +               * Third, if pipe was idle, update V = max(S, V).
 +               * Fourth, count one more backlogged flow.
 +               */
 +              if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */
 +                      q->S = pipe->V;
 +                      pipe->sum += fs->weight; /* Add weight of new queue. */
 +              } else {
 +                      heap_extract(&(pipe->idle_heap), q);
 +                      q->S = MAX64(q->F, pipe->V);
 +              }
 +              q->F = q->S + (len << MY_M) / (uint64_t)fs->weight;
 +
 +              if (pipe->not_eligible_heap.elements == 0 &&
 +                  pipe->scheduler_heap.elements == 0)
 +                      pipe->V = MAX64(q->S, pipe->V);
 +              fs->backlogged++;
 +              /*
 +               * Look at eligibility. A flow is not eligibile if S>V (when
 +               * this happens, it means that there is some other flow already
 +               * scheduled for the same pipe, so the scheduler_heap cannot be
 +               * empty). If the flow is not eligible we just store it in the
 +               * not_eligible_heap. Otherwise, we store in the scheduler_heap
 +               * and possibly invoke ready_event_wfq() right now if there is
 +               * leftover credit.
 +               * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 +               * and for all flows in not_eligible_heap (NEH), S_i > V.
 +               * So when we need to compute max(V, min(S_i)) forall i in
 +               * SCH+NEH, we only need to look into NEH.
 +               */
 +              if (DN_KEY_GT(q->S, pipe->V)) {         /* Not eligible. */
 +                      if (pipe->scheduler_heap.elements == 0)
 +                              printf("dummynet: ++ ouch! not eligible but 
empty scheduler!\n");
 +                      heap_insert(&(pipe->not_eligible_heap), q->S, q);
 +              } else {
 +                      heap_insert(&(pipe->scheduler_heap), q->F, q);
 +                      if (pipe->numbytes >= 0) {       /* Pipe is idle. */
 +                              if (pipe->scheduler_heap.elements != 1)
 +                                      printf("dummynet: OUCH! pipe should 
have been idle!\n");
 +                              DPRINTF(("dummynet: waking up pipe %d at %d\n",
 +                                  pipe->pipe_nr, (int)(q->F >> MY_M)));
 +                              pipe->sched_time = curr_time;
 +                              ready_event_wfq(pipe, &head, &tail);
 +                      }
 +              }
        }
 -    }
  done:
 -    DUMMYNET_UNLOCK();
 -    if (head != NULL)
 -      dummynet_send(head);
 -    return 0;
 +      if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX &&
 +          dir != DN_TO_ETH_OUT) {     /* Fast io. */
 +              io_pkt_fast++;
 +              if (m->m_nextpkt != NULL)
 +                      printf("dummynet: fast io: pkt chain detected!\n");
 +              head = m->m_nextpkt = NULL;
 +      } else
 +              *m0 = NULL;             /* Normal io. */
 +
 +      DUMMYNET_UNLOCK();
 +      if (head != NULL)
 +              dummynet_send(head);
 +      return (0);
  
  dropit:
 -    if (q)
 -      q->drops++ ;
 -    DUMMYNET_UNLOCK();
 -    m_freem(m);
 -    return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
 +      io_pkt_drop++;
 +      if (q)
 +              q->drops++;
 +      DUMMYNET_UNLOCK();
 +      m_freem(m);
 +      *m0 = NULL;
 +      return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
  }
  
  /*
 @@ -1696,7 +1754,7 @@
                        /* Flush accumulated credit for all queues. */
                        for (i = 0; i <= pipe->fs.rq_size; i++)
                                for (q = pipe->fs.rq[i]; q; q = q->next)
 -                                      q->numbytes = 0;
 +                                      q->numbytes = io_fast ? p->bandwidth : 
0;
  
                pipe->bandwidth = p->bandwidth;
                pipe->numbytes = 0;             /* just in case... */
 Index: sys/netinet/ip_fw_pfil.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/netinet/ip_fw_pfil.c,v
 retrieving revision 1.25
 diff -u -r1.25 ip_fw_pfil.c
 --- sys/netinet/ip_fw_pfil.c   7 Oct 2007 20:44:23 -0000       1.25
 +++ sys/netinet/ip_fw_pfil.c   27 Mar 2008 17:19:10 -0000
 @@ -104,16 +104,6 @@
  
        bzero(&args, sizeof(args));
  
 -      dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 -      if (dn_tag != NULL){
 -              struct dn_pkt_tag *dt;
 -
 -              dt = (struct dn_pkt_tag *)(dn_tag+1);
 -              args.rule = dt->rule;
 -
 -              m_tag_delete(*m0, dn_tag);
 -      }
 -
        ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0,
            NULL);
        if (ng_tag != NULL) {
 @@ -124,6 +114,16 @@
        }
  
  again:
 +      dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 +      if (dn_tag != NULL){
 +              struct dn_pkt_tag *dt;
 +
 +              dt = (struct dn_pkt_tag *)(dn_tag+1);
 +              args.rule = dt->rule;
 +
 +              m_tag_delete(*m0, dn_tag);
 +      }
 +
        args.m = *m0;
        args.inp = inp;
        ipfw = ipfw_chk(&args);
 @@ -160,10 +160,11 @@
                if (!DUMMYNET_LOADED)
                        goto drop;
                if (mtod(*m0, struct ip *)->ip_v == 4)
 -                      ip_dn_io_ptr(*m0, DN_TO_IP_IN, &args);
 +                      ip_dn_io_ptr(m0, DN_TO_IP_IN, &args);
                else if (mtod(*m0, struct ip *)->ip_v == 6)
 -                      ip_dn_io_ptr(*m0, DN_TO_IP6_IN, &args);
 -              *m0 = NULL;
 +                      ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args);
 +              if (*m0 != NULL)
 +                      goto again;
                return 0;               /* packet consumed */
  
        case IP_FW_TEE:
 @@ -225,16 +226,6 @@
  
        bzero(&args, sizeof(args));
  
 -      dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 -      if (dn_tag != NULL) {
 -              struct dn_pkt_tag *dt;
 -
 -              dt = (struct dn_pkt_tag *)(dn_tag+1);
 -              args.rule = dt->rule;
 -
 -              m_tag_delete(*m0, dn_tag);
 -      }
 -
        ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0,
            NULL);
        if (ng_tag != NULL) {
 @@ -245,6 +236,16 @@
        }
  
  again:
 +      dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL);
 +      if (dn_tag != NULL) {
 +              struct dn_pkt_tag *dt;
 +
 +              dt = (struct dn_pkt_tag *)(dn_tag+1);
 +              args.rule = dt->rule;
 +
 +              m_tag_delete(*m0, dn_tag);
 +      }
 +
        args.m = *m0;
        args.oif = ifp;
        args.inp = inp;
 @@ -286,10 +287,11 @@
                if (!DUMMYNET_LOADED)
                        break;
                if (mtod(*m0, struct ip *)->ip_v == 4)
 -                      ip_dn_io_ptr(*m0, DN_TO_IP_OUT, &args);
 +                      ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args);
                else if (mtod(*m0, struct ip *)->ip_v == 6)
 -                      ip_dn_io_ptr(*m0, DN_TO_IP6_OUT, &args);
 -              *m0 = NULL;
 +                      ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args);
 +              if (*m0 != NULL)
 +                      goto again;
                return 0;               /* packet consumed */
  
                break;
 Index: sys/net/if_bridge.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/net/if_bridge.c,v
 retrieving revision 1.103.2.3
 diff -u -r1.103.2.3 if_bridge.c
 --- sys/net/if_bridge.c        21 Dec 2007 05:29:15 -0000      1.103.2.3
 +++ sys/net/if_bridge.c        27 Mar 2008 17:19:15 -0000
 @@ -2982,7 +2982,7 @@
                         * packet will return to us via bridge_dummynet().
                         */
                        args.oif = ifp;
 -                      ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args);
 +                      ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args);
                        return (error);
                }
  
 Index: sys/net/if_ethersubr.c
 ===================================================================
 RCS file: /home/ncvs/src/sys/net/if_ethersubr.c,v
 retrieving revision 1.236.2.1
 diff -u -r1.236.2.1 if_ethersubr.c
 --- sys/net/if_ethersubr.c     28 Oct 2007 16:24:16 -0000      1.236.2.1
 +++ sys/net/if_ethersubr.c     27 Mar 2008 17:19:18 -0000
 @@ -491,7 +491,7 @@
                         */
                        *m0 = NULL ;
                }
 -              ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
 +              ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args);
                return 0;
        }
        /*
 Index: sbin/ipfw/ipfw.8
 ===================================================================
 RCS file: /home/ncvs/src/sbin/ipfw/ipfw.8,v
 retrieving revision 1.203.2.1
 diff -u -r1.203.2.1 ipfw.8
 --- sbin/ipfw/ipfw.8   29 Nov 2007 18:42:15 -0000      1.203.2.1
 +++ sbin/ipfw/ipfw.8   27 Mar 2008 17:25:32 -0000
 @@ -1756,6 +1756,16 @@
  TCP connection, or from/to a given host, or entire subnet, or a
  protocol type, etc.
  .Pp
 +There are two modes of dummynet operation: normal and fast.
 +Normal mode tries to emulate real link: dummynet scheduler ensures packet will
 +not leave pipe faster than it would be on real link with given bandwidth.
 +Fast mode allows certain packets to bypass dummynet scheduler (if packet flow
 +does not exceed pipe's bandwidth). Thus fast mode requires less cpu cycles
 +per packet (in average) but packet latency can be significantly lower 
comparing
 +to real link with same bandwidth. Default is normal mode, fast mode can be
 +enabled by setting net.inet.ip.dummynet.io_fast sysctl(8) variable to non-zero
 +value.
 +.Pp
  Packets belonging to the same flow are then passed to either of two
  different objects, which implement the traffic regulation:
  .Bl -hang -offset XXXX
 @@ -2120,6 +2130,14 @@
  This value is used when no
  .Cm buckets
  option is specified when configuring a pipe/queue.
 +.It Em net.inet.ip.dummynet.io_fast : No 0
 +If set to non-zero value enables "fast" mode of dummynet operation (see 
above).
 +.It Em net.inet.ip.dummynet.io_pkt
 +Number of packets passed to by dummynet.
 +.It Em net.inet.ip.dummynet.io_pkt_drop
 +Number of packets dropped by dummynet.
 +.It Em net.inet.ip.dummynet.io_pkt_fast
 +Number of packets bypassed dummynet scheduler.
  .It Em net.inet.ip.dummynet.max_chain_len : No 16
  Target value for the maximum number of pipes/queues in a hash bucket.
  The product
 
 --rwEMma7ioTxnRzrJ--
 
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-ipfw
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to