Module Name: src Committed By: joerg Date: Fri Mar 19 16:48:55 UTC 2010
Modified Files: src/sys/sys: bitops.h Log Message: Add functions for replacing runtime invariant 32bit unsigned divisions with simpler full width multiplications and shifts + adds. The main operations are generally at least 50% faster when serialised and often better for parallelism as well. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/sys/sys/bitops.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/sys/bitops.h diff -u src/sys/sys/bitops.h:1.2 src/sys/sys/bitops.h:1.3 --- src/sys/sys/bitops.h:1.2 Mon Apr 28 20:24:10 2008 +++ src/sys/sys/bitops.h Fri Mar 19 16:48:55 2010 @@ -1,11 +1,11 @@ -/* $NetBSD: bitops.h,v 1.2 2008/04/28 20:24:10 martin Exp $ */ +/* $NetBSD: bitops.h,v 1.3 2010/03/19 16:48:55 joerg Exp $ */ /*- - * Copyright (c) 2007 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2010 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Christos Zoulas. + * by Christos Zoulas and Joerg Sonnenberger. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,6 +31,8 @@ #ifndef _SYS_BITOPS_H_ #define _SYS_BITOPS_H_ +#include <sys/stdint.h> + /* * Find First Set functions */ @@ -256,4 +258,36 @@ -1) : ((sizeof(_n) >= 4 ? fls64(_n) : fls32(_n)) - 1) \ ) +static inline void +fast_divide32_prepare(uint32_t _div, uint32_t * __restrict _m, + uint8_t *__restrict _s1, uint8_t *__restrict _s2) +{ + uint64_t _mt; + int _l; + + _l = fls32(_div - 1); + _mt = 0x100000000ULL * ((1ULL << _l) - _div); + *_m = _mt / _div + 1; + *_s1 = (_l > 1) ? 1 : _l; + *_s2 = (_l == 0) ? 0 : _l - 1; +} + +static inline uint32_t +fast_divide32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1, + uint8_t _s2) +{ + uint32_t _t; + + _t = ((uint64_t)_v * _m) >> 32; + return (_t + ((_v - _t) >> _s1)) >> _s2; +} + +static inline uint32_t +fast_remainder32(uint32_t _v, uint32_t _div, uint32_t _m, uint8_t _s1, + uint8_t _s2) +{ + + return _v - _div * fast_divide32(_v, _div, _m, _s1, _s2); +} + #endif /* _SYS_BITOPS_H_ */