Module Name:    src
Committed By:   joerg
Date:           Fri Mar 19 16:48:55 UTC 2010

Modified Files:
        src/sys/sys: bitops.h

Log Message:
Add functions for replacing runtime invariant 32bit unsigned divisions
with simpler full width multiplications and shifts + adds. The main
operations are generally at least 50% faster when serialised and often
better for parallelism as well.


To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/sys/bitops.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/sys/bitops.h
diff -u src/sys/sys/bitops.h:1.2 src/sys/sys/bitops.h:1.3
--- src/sys/sys/bitops.h:1.2	Mon Apr 28 20:24:10 2008
+++ src/sys/sys/bitops.h	Fri Mar 19 16:48:55 2010
@@ -1,11 +1,11 @@
-/*	$NetBSD: bitops.h,v 1.2 2008/04/28 20:24:10 martin Exp $	*/
+/*	$NetBSD: bitops.h,v 1.3 2010/03/19 16:48:55 joerg Exp $	*/
 
 /*-
- * Copyright (c) 2007 The NetBSD Foundation, Inc.
+ * Copyright (c) 2007, 2010 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
- * by Christos Zoulas.
+ * by Christos Zoulas and Joerg Sonnenberger.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,6 +31,8 @@
 #ifndef _SYS_BITOPS_H_
 #define _SYS_BITOPS_H_
 
+#include <sys/stdint.h>
+
 /*
  * Find First Set functions
  */
@@ -256,4 +258,36 @@
 	-1) : ((sizeof(_n) >= 4 ? fls64(_n) : fls32(_n)) - 1) \
 )
 
+static inline void
+fast_divide32_prepare(uint32_t _div, uint32_t * __restrict _m,
+    uint8_t *__restrict _s1, uint8_t *__restrict _s2)
+{
+	uint64_t _mt;
+	int _l;
+
+	_l = fls32(_div - 1);
+	_mt = 0x100000000ULL * ((1ULL << _l) - _div);
+	*_m = _mt / _div + 1;
+	*_s1 = (_l > 1) ? 1 : _l;
+	*_s2 = (_l == 0) ? 0 : _l - 1;
+}
+
+static inline uint32_t
+fast_divide32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1,
+    uint8_t _s2)
+{
+	uint32_t _t;
+
+	_t = ((uint64_t)_v * _m) >> 32;
+	return (_t + ((_v - _t) >> _s1)) >> _s2;
+}
+
+static inline uint32_t
+fast_remainder32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1,
+    uint8_t _s2)
+{
+
+	return _v - _div * fast_divide32(_v, _div, _m, _s1, _s2);
+}
+
 #endif /* _SYS_BITOPS_H_ */

Reply via email to