Re: [LARTC] [PATCH] HTB O(1) class lookup

2007-02-03 Thread Konrad Cempura

Simon Lodal napisaƂ(a):
The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone 
is interested.


It's working also on 2.6.20-rc7.

I'm testing it and I'm impressed. Good work :)

___
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc


[LARTC] [PATCH] HTB O(1) class lookup

2007-01-31 Thread Simon Lodal

This patch changes HTB's class storage from hash+lists to a two-level linear 
array, so it can do constant time (O(1)) class lookup by classid. It improves 
scalability for large number of classes.

Without the patch, ~14k htb classes can starve a Xeon-3.2 at only 15kpps, 
using most of it's cycles traversing lists in htb_find(). The patch 
eliminates this problem, and has a measurable impact even with a few hundred 
classes.

Previously, scalability could be improved by increasing HTB_HSIZE, modify the 
hash function, and recompile, but this patch works for everyone without 
recompile and scales better too.

The patch is for 2.6.20-rc6, I have older ones for 2.6.18 and 2.6.19 if anyone 
is interested.

Signed-off-by: Simon Lodal [EMAIL PROTECTED]
--- linux-2.6.20-rc6.base/net/sched/sch_htb.c	2007-01-25 03:19:28.0 +0100
+++ linux-2.6.20-rc6/net/sched/sch_htb.c	2007-02-01 05:44:36.0 +0100
@@ -68,16 +68,37 @@
 one less than their parent.
 */
 
-#define HTB_HSIZE 16		/* classid hash size */
-#define HTB_EWMAC 2		/* rate average over HTB_EWMAC*HTB_HSIZE sec */
-#define HTB_RATECM 1		/* whether to use rate computer */
+#define HTB_MAX_CLS		(TC_H_MIN(-1)+1)
+#define HTB_CLS_ARRAY_SIZE	PAGE_SIZE
+#define HTB_CLS_PER_ARRAY	(HTB_CLS_ARRAY_SIZE / sizeof(void*))
+#define HTB_CLS_ARRAYS		(HTB_MAX_CLS / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_ARRAY(CID)	(CID / HTB_CLS_PER_ARRAY)
+#define HTB_CLS_INDEX(CID)	(CID  (HTB_CLS_PER_ARRAY-1))
+
+
 #define HTB_HYSTERESIS 1	/* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30012		/* major must be matched with number suplied by TC as version */
 
 #if HTB_VER  16 != TC_HTB_PROTOVER
 #error Mismatched sch_htb.c and pkt_sch.h
 #endif
 
+/* Whether to use rate computer. This is only used for statistics output to
+   userspace (tc -s class show dev ...); if you do not care about that and
+   want the last bit of performance, disable this. */
+#define HTB_RATECM
+#ifdef HTB_RATECM
+/* Time between rate computation updates, in seconds, for each class. */
+#define HTB_RATECM_UPDATE_INTERVAL	16
+/* How many HTB_RATECM_UPDATE_INTERVAL intervals to average over when
+   computing the rate. If set to 1, the computed rate will be exactly the
+   observed rate of the last interval. If set to higher values, the computed
+   rate will converge slower, but also vary less with small, temporary changes
+   in traffic.
+*/
+#define HTB_RATECM_AVERAGE_INTERVALS	2
+#endif
+
 /* used internaly to keep status of single class */
 enum htb_cmode {
 	HTB_CANT_SEND,		/* class can't send and can't borrow */
@@ -104,7 +125,7 @@
 	/* topology */
 	int level;		/* our level (see above) */
 	struct htb_class *parent;	/* parent class */
-	struct hlist_node hlist;	/* classid hash list item */
+	struct list_head clist;		/* classid list item */
 	struct list_head sibling;	/* sibling list item */
 	struct list_head children;	/* children list */
 
@@ -165,9 +186,13 @@
 	return rate-data[slot];
 }
 
+typedef struct htb_class* htb_cls_array[HTB_CLS_PER_ARRAY];
+
 struct htb_sched {
-	struct list_head root;	/* root classes list */
-	struct hlist_head hash[HTB_HSIZE];	/* hashed by classid */
+	struct list_head root;		/* root classes list */
+	struct list_head clist;		/* all classes list */
+	/* all classes arrays for fast lookup */
+	htb_cls_array* classes[HTB_CLS_ARRAYS];
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* self list - roots of self generating tree */
@@ -198,8 +223,10 @@
 	psched_time_t now;	/* cached dequeue time */
 	struct timer_list timer;	/* send delay timer */
 #ifdef HTB_RATECM
-	struct timer_list rttim;	/* rate computer timer */
-	int recmp_bucket;	/* which hash bucket to recompute next */
+	struct timer_list rttim;/* rate computer timer */
+	int clscnt;		/* total number of classes */
+	struct list_head *rtcur;/* current class to update rate timer for */
+	int rtiter;		/* current iteration (1..UPDATE_INTERVAL) */
 #endif
 
 	/* non shaped skbs; let them go directly thru */
@@ -209,32 +236,22 @@
 	long direct_pkts;
 };
 
-/* compute hash of size HTB_HSIZE for given handle */
-static inline int htb_hash(u32 h)
-{
-#if HTB_HSIZE != 16
-#error Declare new hash for your HTB_HSIZE
-#endif
-	h ^= h  8;		/* stolen from cbq_hash */
-	h ^= h  4;
-	return h  0xf;
-}
-
-/* find class in global hash table using given handle */
+/* find class in class arrays using given handle */
 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct hlist_node *p;
-	struct htb_class *cl;
+	htb_cls_array* a;
+	int minorid;
 
 	if (TC_H_MAJ(handle) != sch-handle)
 		return NULL;
 
-	hlist_for_each_entry(cl, p, q-hash + htb_hash(handle), hlist) {
-		if (cl-classid == handle)
-			return cl;
-	}
-	return NULL;
+	minorid = TC_H_MIN(handle);
+	a = q-classes[HTB_CLS_ARRAY(minorid)];
+	if (a == NULL)
+		return