From: Yury Norov <[email protected]>

New implementations takes less space in source file (see diffstat)
and in object. For me it's 710 vs 453 bytes of text.

Patch was boot-tested on x86_64 and MIPS (big-endian) machines.
Performance tests were ran on userspace with code like this:

        /* addr[] is filled from /dev/urandom */
        start = clock();
        while (ret < nbits)
                ret = find_next_bit(addr, nbits, ret + 1);

        end = clock();
        printf("%ld\t", (unsigned long) end - start);

On Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz rezults are next:
(for find_next_bit, nbits is 8M, for find_first_bit - 80K)

        find_next_bit:          find_first_bit:
        new     current         new     current
        26932   43151           14777   14925
        26947   43182           14521   15423
        26507   43824           15053   14705
        27329   43759           14473   14777
        26895   43367           14847   15023
        26990   43693           15103   15163
        26775   43299           15067   15232
        27282   42752           14544   15121
        27504   43088           14644   14858
        26761   43856           14699   15193
        26692   43075           14781   14681
        27137   42969           14451   15061
        ...                     ...

find_next_bit performance gain is 35-40%;
find_first_bit - no measurable difference.

Signed-off-by: Yury Norov <[email protected]>
---
 lib/find_last_bit.c |  31 ++-----
 lib/find_next_bit.c | 254 +++++++++++++++-------------------------------------
 2 files changed, 79 insertions(+), 206 deletions(-)

diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
index 91ca09f..e67e970 100644
--- a/lib/find_last_bit.c
+++ b/lib/find_last_bit.c
@@ -4,44 +4,29 @@
  * Written by Rusty Russell <[email protected]>
  * (Inspired by David Howell's find_next_bit implementation)
  *
+ * Rewritten by Yury Norov <[email protected]> to decrease
+ * size and improve performance, 2015.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/bitops.h>
 #include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
 
 #ifndef find_last_bit
 
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
-       unsigned long words;
-       unsigned long tmp;
-
-       /* Start at final word. */
-       words = size / BITS_PER_LONG;
-
-       /* Partial final word? */
-       if (size & (BITS_PER_LONG-1)) {
-               tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
-                                        - (size & (BITS_PER_LONG-1)))));
-               if (tmp)
-                       goto found;
-       }
+       unsigned long idx = DIV_ROUND_UP(size, BITS_PER_LONG);
 
-       while (words) {
-               tmp = addr[--words];
-               if (tmp) {
-found:
-                       return words * BITS_PER_LONG + __fls(tmp);
-               }
+       while (idx--) {
+               if (addr[idx])
+                       return min(idx * BITS_PER_LONG + __fls(addr[idx]), 
size);
        }
 
-       /* Not found */
        return size;
 }
 EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
index 0cbfc0b..ebfb3dc 100644
--- a/lib/find_next_bit.c
+++ b/lib/find_next_bit.c
@@ -3,18 +3,45 @@
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells ([email protected])
  *
+ * Rewritten by Yury Norov <[email protected]> to decrease
+ * size and improve performance, 2015.
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/bitops.h>
 #include <linux/export.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
+#include <linux/kernel.h>
+
+#define HIGH_BITS_MASK(nr)             (ULONG_MAX << (nr))
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+static unsigned long _find_next_bit(const unsigned long *addr,
+               unsigned long nbits, unsigned long start, bool set)
+{
+       unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+                       : ~addr[start / BITS_PER_LONG];
+
+       /* Handle 1st word. */
+       if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+               tmp &= HIGH_BITS_MASK(start % BITS_PER_LONG);
+               start = round_down(start, BITS_PER_LONG);
+       }
 
-#define BITOP_WORD(nr)         ((nr) / BITS_PER_LONG)
+       while (!tmp) {
+               start += BITS_PER_LONG;
+               if (start >= nbits)
+                       return nbits;
+
+               tmp = set ? addr[start / BITS_PER_LONG]
+                       : ~addr[start / BITS_PER_LONG];
+       }
+
+       return start + __ffs(tmp);
+}
+#endif
 
 #ifndef find_next_bit
 /*
@@ -23,86 +50,22 @@
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
                            unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
-
        if (offset >= size)
                return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
 
-found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size;   /* Nope. */
-found_middle:
-       return result + __ffs(tmp);
+       return min(_find_next_bit(addr, size, offset, 1), size);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
 
 #ifndef find_next_zero_bit
-/*
- * This implementation of find_{first,next}_zero_bit was stolen from
- * Linus' asm-alpha/bitops.h.
- */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
                                 unsigned long offset)
 {
-       const unsigned long *p = addr + BITOP_WORD(offset);
-       unsigned long result = offset & ~(BITS_PER_LONG-1);
-       unsigned long tmp;
-
        if (offset >= size)
                return size;
-       size -= result;
-       offset %= BITS_PER_LONG;
-       if (offset) {
-               tmp = *(p++);
-               tmp |= ~0UL >> (BITS_PER_LONG - offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (~tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-       while (size & ~(BITS_PER_LONG-1)) {
-               if (~(tmp = *(p++)))
-                       goto found_middle;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = *p;
 
-found_first:
-       tmp |= ~0UL << size;
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size;   /* Nope. */
-found_middle:
-       return result + ffz(tmp);
+       return min(_find_next_bit(addr, size, offset, 0), size);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
@@ -113,24 +76,14 @@ EXPORT_SYMBOL(find_next_zero_bit);
  */
 unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
 {
-       const unsigned long *p = addr;
-       unsigned long result = 0;
-       unsigned long tmp;
+       unsigned long idx;
 
-       while (size & ~(BITS_PER_LONG-1)) {
-               if ((tmp = *(p++)))
-                       goto found;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
+       for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+               if (addr[idx])
+                       return min(idx * BITS_PER_LONG + __ffs(addr[idx]), 
size);
        }
-       if (!size)
-               return result;
 
-       tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size;   /* Nope. */
-found:
-       return result + __ffs(tmp);
+       return size;
 }
 EXPORT_SYMBOL(find_first_bit);
 #endif
@@ -141,24 +94,14 @@ EXPORT_SYMBOL(find_first_bit);
  */
 unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long 
size)
 {
-       const unsigned long *p = addr;
-       unsigned long result = 0;
-       unsigned long tmp;
+       unsigned long idx;
 
-       while (size & ~(BITS_PER_LONG-1)) {
-               if (~(tmp = *(p++)))
-                       goto found;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
+       for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+               if (addr[idx] != ULONG_MAX)
+                       return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
        }
-       if (!size)
-               return result;
 
-       tmp = (*p) | (~0UL << size);
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size;   /* Nope. */
-found:
-       return result + ffz(tmp);
+       return size;
 }
 EXPORT_SYMBOL(find_first_zero_bit);
 #endif
@@ -166,18 +109,6 @@ EXPORT_SYMBOL(find_first_zero_bit);
 #ifdef __BIG_ENDIAN
 
 /* include/linux/byteorder does not support "unsigned long" type */
-static inline unsigned long ext2_swabp(const unsigned long * x)
-{
-#if BITS_PER_LONG == 64
-       return (unsigned long) __swab64p((u64 *) x);
-#elif BITS_PER_LONG == 32
-       return (unsigned long) __swab32p((u32 *) x);
-#else
-#error BITS_PER_LONG not defined
-#endif
-}
-
-/* include/linux/byteorder doesn't support "unsigned long" type */
 static inline unsigned long ext2_swab(const unsigned long y)
 {
 #if BITS_PER_LONG == 64
@@ -189,48 +120,40 @@ static inline unsigned long ext2_swab(const unsigned long 
y)
 #endif
 }
 
+#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
+static unsigned long _find_next_bit_le(const unsigned long *addr,
+               unsigned long nbits, unsigned long start, bool set)
+{
+       unsigned long tmp = set ? addr[start / BITS_PER_LONG]
+                       : ~addr[start / BITS_PER_LONG];
+
+       /* Handle 1st word. */
+       if (!IS_ALIGNED(start, BITS_PER_LONG)) {
+               tmp &= ext2_swab(HIGH_BITS_MASK(start % BITS_PER_LONG));
+               start = round_down(start, BITS_PER_LONG);
+       }
+
+       while (!tmp) {
+               start += BITS_PER_LONG;
+               if (start >= nbits)
+                       return nbits;
+
+               tmp = set ? addr[start / BITS_PER_LONG]
+                       : ~addr[start / BITS_PER_LONG];
+       }
+
+       return start + __ffs(ext2_swab(tmp));
+}
+#endif
+
 #ifndef find_next_zero_bit_le
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
                long size, unsigned long offset)
 {
-       const unsigned long *p = addr;
-       unsigned long result = offset & ~(BITS_PER_LONG - 1);
-       unsigned long tmp;
-
        if (offset >= size)
                return size;
-       p += BITOP_WORD(offset);
-       size -= result;
-       offset &= (BITS_PER_LONG - 1UL);
-       if (offset) {
-               tmp = ext2_swabp(p++);
-               tmp |= (~0UL >> (BITS_PER_LONG - offset));
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (~tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
 
-       while (size & ~(BITS_PER_LONG - 1)) {
-               if (~(tmp = *(p++)))
-                       goto found_middle_swap;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = ext2_swabp(p);
-found_first:
-       tmp |= ~0UL << size;
-       if (tmp == ~0UL)        /* Are any bits zero? */
-               return result + size; /* Nope. Skip ffz */
-found_middle:
-       return result + ffz(tmp);
-
-found_middle_swap:
-       return result + ffz(ext2_swab(tmp));
+       return min(_find_next_bit_le(addr, size, offset, 0), size);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -239,45 +162,10 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
                long size, unsigned long offset)
 {
-       const unsigned long *p = addr;
-       unsigned long result = offset & ~(BITS_PER_LONG - 1);
-       unsigned long tmp;
-
        if (offset >= size)
                return size;
-       p += BITOP_WORD(offset);
-       size -= result;
-       offset &= (BITS_PER_LONG - 1UL);
-       if (offset) {
-               tmp = ext2_swabp(p++);
-               tmp &= (~0UL << offset);
-               if (size < BITS_PER_LONG)
-                       goto found_first;
-               if (tmp)
-                       goto found_middle;
-               size -= BITS_PER_LONG;
-               result += BITS_PER_LONG;
-       }
-
-       while (size & ~(BITS_PER_LONG - 1)) {
-               tmp = *(p++);
-               if (tmp)
-                       goto found_middle_swap;
-               result += BITS_PER_LONG;
-               size -= BITS_PER_LONG;
-       }
-       if (!size)
-               return result;
-       tmp = ext2_swabp(p);
-found_first:
-       tmp &= (~0UL >> (BITS_PER_LONG - size));
-       if (tmp == 0UL)         /* Are any bits set? */
-               return result + size; /* Nope. */
-found_middle:
-       return result + __ffs(tmp);
 
-found_middle_swap:
-       return result + __ffs(ext2_swab(tmp));
+       return min(_find_next_bit_le(addr, size, offset, 1), size);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to