We should really just use the CLZ instruction since we only care about armv7 these days. This is what Bionic used before they removed the assembly implementation completely. Also adops the "bx lr" from the Bionic code instead of "mov pc, lr" since it is faster on some processors.
ok? Index: lib/libc/arch/arm/string/ffs.S =================================================================== RCS file: /cvs/src/lib/libc/arch/arm/string/ffs.S,v retrieving revision 1.8 diff -u -p -r1.8 ffs.S --- lib/libc/arch/arm/string/ffs.S 19 Jan 2018 11:10:43 -0000 1.8 +++ lib/libc/arch/arm/string/ffs.S 21 Jan 2018 17:50:40 -0000 @@ -31,49 +31,12 @@ #include "DEFS.h" -/* - * ffs - find first set bit, this algorithm isolates the first set - * bit, then multiplies the number by 0x0450fbaf which leaves the top - * 6 bits as an index into the table. This algorithm should be a win - * over the checking each bit in turn as per the C compiled version. - * - * under ARMv5 there's an instruction called CLZ (count leading Zero's) that - * could be used - * - * This is the ffs algorithm devised by d.seal and posted to comp.sys.arm on - * 16 Feb 1994. - */ - - .syntax unified - ENTRY(ffs) /* Standard trick to isolate bottom bit in r0 or 0 if r0 = 0 on entry */ - rsb r1, r0, #0 - ands r0, r0, r1 - /* - * now r0 has at most one set bit, call this X - * if X = 0, all further instructions are skipped - */ - adrne r2, .L_ffs_table - orrne r0, r0, r0, lsl #4 /* r0 = X * 0x11 */ - orrne r0, r0, r0, lsl #6 /* r0 = X * 0x451 */ - rsbne r0, r0, r0, lsl #16 /* r0 = X * 0x0450fbaf */ - - /* now lookup in table indexed on top 6 bits of r0 */ - ldrbne r0, [ r2, r0, lsr #26 ] - - mov pc, lr + rsb r1, r0, #0 + ands r0, r0, r1 + clzne r0, r0 + rsbne r0, r0, #32 + bx lr END(ffs) .protected ffs - -.type .L_ffs_table, _ASM_TYPE_OBJECT; -.L_ffs_table: -/* 0 1 2 3 4 5 6 7 */ - .byte 0, 1, 2, 13, 3, 7, 0, 14 /* 0- 7 */ - .byte 4, 0, 8, 0, 0, 0, 0, 15 /* 8-15 */ - .byte 11, 5, 0, 0, 9, 0, 0, 26 /* 16-23 */ - .byte 0, 0, 0, 0, 0, 22, 28, 16 /* 24-31 */ - .byte 32, 12, 6, 0, 0, 0, 0, 0 /* 32-39 */ - .byte 10, 0, 0, 25, 0, 0, 21, 27 /* 40-47 */ - .byte 31, 0, 0, 0, 0, 24, 0, 20 /* 48-55 */ - .byte 30, 0, 23, 19, 29, 18, 17, 0 /* 56-63 */