On 02.02.2012 12:59, Коньков Евгений wrote:
Здравствуйте, Andrey.

Вы писали 2 февраля 2012 г., 8:35:23:

AZ>  On 02.02.2012 5:11, Alexander V. Chernikov wrote:
On 01.02.2012 20:45, Andrey Zonov wrote:
Hi,

I'm trying to tune machine with 8.2-STABLE for heavy network load and
now playing with netisr. Could anyone explain me why actually works only
one netisr thread if I set them to 8?

Can you please supply `nestat -Q` output and clarify you usage pattern ?
(I mean, this is router/web server/some kind of traffic receiver/etc..).
For example, flow policy does not balance traffic from single flow
between different CPUs.


AZ>  This is a web server with multiple nginx instances.  5k/sec accepted
AZ>  connections.  Input packet rate is 35kpps, output - 25kpps.

AZ>  I thought of changing policy for IP, but how can I do this (without
AZ>  patching)?  Is it safe?

AZ>  netstat -Q (I turned on direct&  direct force for now):
AZ>  Configuration:
AZ>  Setting                          Value      Maximum
AZ>  Thread count                         8            8
AZ>  Default queue limit                256        10240
AZ>  Direct dispatch                enabled          n/a
AZ>  Forced direct dispatch         enabled          n/a
AZ>  Threads bound to CPUs          enabled          n/a

AZ>  Protocols:
AZ>  Name   Proto QLimit Policy Flags
AZ>  ip         1   5000   flow   ---
AZ>  igmp       2    256 source   ---
AZ>  rtsock     3    256 source   ---
AZ>  arp        7    256 source   ---
AZ>  ip6       10    256   flow   ---

AZ>  Workstreams:
AZ>  WSID CPU   Name     Len WMark   Disp'd  HDisp'd   QDrops   Queued Handled
AZ>      0   0  ip         0     0  1125716        0        0        0 1125716
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     1        0        0        0      102 102
AZ>             arp        0     0       27        0        0        0 27
AZ>             ip6        0     0        0        0        0        0
AZ>      1   1  ip         0     0  1222701        0        0        0 1222701
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       46        0        0        0 46
AZ>             ip6        0     0        0        0        0        0
AZ>      2   2  ip         0     0  1184381        0        0        0 1184381
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       45        0        0        0 45
AZ>             ip6        0     0        0        0        0        0
AZ>      3   3  ip         0     0  1191094        0        0        0 1191094
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       54        0        0        0 54
AZ>             ip6        0     0        0        0        0        0
AZ>      4   4  ip         0     0   846165        0        0        0 846165
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       19        0        0        0 19
AZ>             ip6        0     0        0        0        0        0
AZ>      5   5  ip         0     0   849478        0        0        0 849478
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       27        0        0        0 27
AZ>             ip6        0     0        0        0        0        0
AZ>      6   6  ip         0     0   870836        0        0        0 870836
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     0       29        0        0        0 29
AZ>             ip6        0     0        0        0        0        0
AZ>      7   7  ip         0  5000   594320        5   910862  3453459 4047784
AZ>             igmp       0     0        0        0        0        0
AZ>             rtsock     0     0        0        0        0        0
AZ>             arp        0     5       21        0        0      109 130
AZ>             ip6        0     1        0        0        0        1

same problem, it is because one netisr take 100% so other threads
stops?? to work fine. or packet scheduler has disbalanced scheduler
and still trying to schedule packet to netisr:7 despite on it is 100%
busy.

Can you please try an attached patch?

Rebuild kernel with this patch and set net.isr.dispatch to deferred / hybrid

P.S. it is also reasonable to set net.isr.bindthreads to 1







--
WBR, Alexander
Index: sys/netinet/ip_input.c
===================================================================
--- sys/netinet/ip_input.c      (revision 230910)
+++ sys/netinet/ip_input.c      (working copy)
@@ -78,6 +78,11 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip_ipsec.h>
 #endif /* IPSEC */
 
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/sctp.h>
+#include <libkern/jenkins.h>
+
 #include <sys/socketvar.h>
 
 #include <security/mac/mac_framework.h>
@@ -145,9 +150,13 @@ SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_inte
 
 VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */
 
+static VNET_DEFINE(uint32_t, flow_hashjitter);
+#define        V_flow_hashjitter       VNET(flow_hashjitter)
+static struct mbuf * ip_hash_mbuf(struct mbuf *m, uintptr_t source);
 static struct netisr_handler ip_nh = {
        .nh_name = "ip",
        .nh_handler = ip_input,
+       .nh_m2flow = ip_hash_mbuf,
        .nh_proto = NETISR_IP,
        .nh_policy = NETISR_POLICY_FLOW,
 };
@@ -305,6 +314,9 @@ ip_init(void)
            NULL, UMA_ALIGN_PTR, 0);
        maxnipq_update();
 
+       if (V_flow_hashjitter == 0)
+               V_flow_hashjitter = arc4random();
+
        /* Initialize packet filter hooks. */
        V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
        V_inet_pfil_hook.ph_af = AF_INET;
@@ -390,6 +402,73 @@ ip_fini(void *xtp)
        callout_stop(&ipport_tick_callout);
 }
 
+static struct mbuf *
+ip_hash_mbuf(struct mbuf *m, uintptr_t source)
+{
+       struct ip *ip;
+       uint8_t proto;
+       int iphlen, offset;
+       uint32_t key[3];
+       struct tcphdr *th;
+       struct udphdr *uh;
+       struct sctphdr *sh;
+       uint16_t sport = 0, dport = 0;
+       uint32_t flowid, pullup_len = 0;
+
+#define        M_CHECK(length) do {                                    \
+       pullup_len += length;                                   \
+       if ((m)->m_pkthdr.len < (pullup_len))                   \
+               return (m);                                     \
+       if ((m)->m_len < (pullup_len) &&                        \
+          (((m) = m_pullup((m),(pullup_len))) == NULL))        \
+               return NULL;                                    \
+} while (0)
+
+       M_CHECK(sizeof(struct ip));
+       ip = mtod(m, struct ip *);
+
+       proto = ip->ip_p;
+       iphlen = ip->ip_hl << 2; /* XXX options? */
+
+       key[0] = 0;
+       key[1] = ip->ip_src.s_addr;
+       key[2] = ip->ip_dst.s_addr;
+
+       switch (proto) {
+       case IPPROTO_TCP:
+               M_CHECK(sizeof(struct tcphdr));
+               th = (struct tcphdr *)((caddr_t)ip + iphlen);
+               sport = th->th_sport;
+               dport = th->th_dport;
+       break;
+       case IPPROTO_UDP:
+               M_CHECK(sizeof(struct udphdr));
+               uh = (struct udphdr *)((caddr_t)ip + iphlen);
+               sport = uh->uh_sport;
+               dport = uh->uh_dport;
+       break;
+       case IPPROTO_SCTP:
+               M_CHECK(sizeof(struct sctphdr));
+               sh = (struct sctphdr *)((caddr_t)ip + iphlen);
+               sport = sh->src_port;
+               dport = sh->dest_port;
+       break;
+       }
+
+       if (sport > 0) {
+               ((uint16_t *)key)[0] = sport;
+               ((uint16_t *)key)[1] = dport;
+               offset = 0;
+       } else
+               offset = V_flow_hashjitter + proto;
+
+       flowid = jenkins_hashword(key, 3, offset);
+       m->m_flags |= M_FLOWID;
+       m->m_pkthdr.flowid = flowid;
+
+       return m;
+}
+
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to