https://gcc.gnu.org/g:c564a8be8a15389e8a5119e51d5929f0689044be
commit r17-523-gc564a8be8a15389e8a5119e51d5929f0689044be Author: Jakub Jelinek <[email protected]> Date: Fri May 15 09:50:52 2026 +0200 Add __builtin_bitreverse{8,16,32,64} builtins [PR50481] Future work could optimize this on specific targets: - ARM: lower to RBIT - x86 with GFNI: lower to vgf2p8affineqb https://wunkolo.github.io/post/2020/11/gf2p8affineqb-bit-reversal/ 2026-05-15 Disservin <[email protected]> Jakub Jelinek <[email protected]> PR target/50481 * builtin-types.def (BT_FN_UINT8_UINT8): New. * builtins.def (BUILT_IN_BITREVERSE8, BUILT_IN_BITREVERSE16, BUILT_IN_BITREVERSE32, BUILT_IN_BITREVERSE64): New builtins. * builtins.cc (expand_builtin, is_inexpensive_builtin): Handle bitreverse builtins. * fold-const-call.cc (fold_const_call_ss): Fold bitreverse builtins. * fold-const.cc (tree_call_nonnegative_warnv_p): Handle bitreverse builtins. * optabs.def (bitreverse_optab): New. * optabs.cc (expand_bitreverse): New function. (expand_unop): Use it for bitreverse_optab. * tree-ssa-ccp.cc (evaluate_stmt): Handle bitreverse builtins. * tree-ssa-phiopt.cc (empty_bb_or_one_feeding_into_p, cond_removal_in_builtin_zero_pattern): Likewise. * doc/extend.texi: Document __builtin_bitreverse{8,16,32,64}. * doc/md.texi (bitreverse<mode>2): Document. * gcc.dg/builtin-bitreverse-1.c: New test. * gcc.dg/builtin-bitreverse-2.c: New test. Signed-off-by: Disservin <[email protected]> Diff: --- gcc/builtin-types.def | 1 + gcc/builtins.cc | 14 +++ gcc/builtins.def | 4 + gcc/doc/extend.texi | 19 ++++ gcc/doc/md.texi | 4 + gcc/fold-const-call.cc | 8 ++ gcc/fold-const.cc | 4 + gcc/optabs.cc | 130 ++++++++++++++++++++++++++++ gcc/optabs.def | 1 + gcc/testsuite/gcc.dg/builtin-bitreverse-1.c | 71 +++++++++++++++ gcc/testsuite/gcc.dg/builtin-bitreverse-2.c | 17 ++++ gcc/tree-ssa-ccp.cc | 4 + gcc/tree-ssa-phiopt.cc | 8 ++ 13 files changed, 285 insertions(+) diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index ab0cbc65cfcf..365badca2aa9 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -412,6 +412,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT16_FLOAT, BT_INT16, BT_FLOAT) DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_FLOAT, BT_UINT32, BT_FLOAT) DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_FLOAT, BT_UINT16, BT_FLOAT) DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT) +DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_UINT8, BT_UINT8, BT_UINT8) DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16) DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32) DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 692e20088c28..0475bb88a617 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -8184,6 +8184,16 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, return target; break; + case BUILT_IN_BITREVERSE8: + case BUILT_IN_BITREVERSE16: + case BUILT_IN_BITREVERSE32: + case BUILT_IN_BITREVERSE64: + target = expand_builtin_unop (target_mode, exp, target, subtarget, + bitreverse_optab); + if (target) + return target; + break; + CASE_INT_FN (BUILT_IN_FFS): target = expand_builtin_unop (target_mode, exp, target, subtarget, ffs_optab); @@ -12343,6 +12353,10 @@ is_inexpensive_builtin (tree decl) case BUILT_IN_BSWAP32: case BUILT_IN_BSWAP64: case BUILT_IN_BSWAP128: + case BUILT_IN_BITREVERSE8: + case BUILT_IN_BITREVERSE16: + case BUILT_IN_BITREVERSE32: + case BUILT_IN_BITREVERSE64: case BUILT_IN_CLZ: case BUILT_IN_CLZIMAX: case BUILT_IN_CLZL: diff --git a/gcc/builtins.def b/gcc/builtins.def index 8ab0599b17f3..68838de55f13 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -1024,6 +1024,10 @@ DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_C DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_BITREVERSE8, "bitreverse8", BT_FN_UINT8_UINT8, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_BITREVERSE16, "bitreverse16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_BITREVERSE32, "bitreverse32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GCC_BUILTIN (BUILT_IN_BITREVERSE64, "bitreverse64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) /* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 42f83b98a05b..96ed8b45f04f 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16022,6 +16022,25 @@ Similar to @code{__builtin_bswap64}, except the argument and return types are 128-bit. Only supported on targets when 128-bit types are supported. @enddefbuiltin +@defbuiltin{uint8_t __builtin_bitreverse8 (uint8_t @var{x})} +Returns @var{x} with all bits reversed. +@enddefbuiltin + +@defbuiltin{uint16_t __builtin_bitreverse16 (uint16_t @var{x})} +Similar to @code{__builtin_bitreverse8}, except the argument and return types +are 16-bit. +@enddefbuiltin + +@defbuiltin{uint32_t __builtin_bitreverse32 (uint32_t @var{x})} +Similar to @code{__builtin_bitreverse8}, except the argument and return types +are 32-bit. +@enddefbuiltin + +@defbuiltin{uint64_t __builtin_bitreverse64 (uint64_t @var{x})} +Similar to @code{__builtin_bitreverse8}, except the argument and return types +are 64-bit. +@enddefbuiltin + @node CRC Builtins @subsection CRC Builtins @cindex CRC builtins diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index c4fc53458d9d..94d258c3c919 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5273,6 +5273,10 @@ op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1); @item @samp{bswap@var{m}2} Reverse the order of bytes of operand 1 and store the result in operand 0. +@mdindex bitreverse@var{m}2 +@item @samp{bitreverse@var{m}2} +Reverse the order of bits of operand 1 and store the result in operand 0. + @mdindex neg@var{m}2 @mdindex ssneg@var{m}2 @mdindex usneg@var{m}2 diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc index cc2defcd5bc9..d473b0dd4280 100644 --- a/gcc/fold-const-call.cc +++ b/gcc/fold-const-call.cc @@ -1102,6 +1102,14 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg, TYPE_SIGN (arg_type))); return true; + case CFN_BUILT_IN_BITREVERSE8: + case CFN_BUILT_IN_BITREVERSE16: + case CFN_BUILT_IN_BITREVERSE32: + case CFN_BUILT_IN_BITREVERSE64: + *result = wi::bitreverse (wide_int::from (arg, precision, + TYPE_SIGN (arg_type))); + return true; + default: return false; } diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 56503e570bc4..3bc82a108dbc 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -14984,6 +14984,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1, case CFN_BUILT_IN_BSWAP32: case CFN_BUILT_IN_BSWAP64: case CFN_BUILT_IN_BSWAP128: + case CFN_BUILT_IN_BITREVERSE8: + case CFN_BUILT_IN_BITREVERSE16: + case CFN_BUILT_IN_BITREVERSE32: + case CFN_BUILT_IN_BITREVERSE64: /* Always true. */ return true; diff --git a/gcc/optabs.cc b/gcc/optabs.cc index d17fbcf12ed6..87c4ea682fc5 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -2959,6 +2959,132 @@ expand_doubleword_bswap (machine_mode mode, rtx op, rtx target) return target; } +/* Try calculating (bitreverse x) using masks and shifts. */ + +static rtx +expand_bitreverse (scalar_int_mode mode, rtx op0, rtx target) +{ + unsigned int precision = GET_MODE_BITSIZE (mode); + rtx_insn *last; + + /* Operation requires at least 4 bits (one nibble swap makes no sense below + that). */ + if (precision < 4) + return NULL_RTX; + + if (target == NULL_RTX + || target == op0 + || reg_overlap_mentioned_p (target, op0)) + target = gen_reg_rtx (mode); + + last = get_last_insn (); + + rtx x, lo, hi; + + /* Step 1: byte-swap (only meaningful for >= 16 bits). */ + if (precision >= 16) + { + x = expand_unop (mode, bswap_optab, op0, NULL_RTX, true); + if (x == NULL_RTX) + goto fail; + } + else + x = op0; + + /* Step 2: swap nibbles within each byte (shift=4, only for >= 8 bits). */ + if (precision >= 8) + { + wide_int mask = wi::zero (precision); + for (unsigned int start = 0; start < precision; start += 8) + mask = wi::bit_or (mask, wi::shifted_mask (start, 4, false, + precision)); + + rtx mask_rtx = immed_wide_int_const (mask, mode); + + hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (4), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + hi = expand_binop (mode, and_optab, hi, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + + lo = expand_binop (mode, and_optab, x, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (4), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + + x = expand_binop (mode, ior_optab, hi, lo, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (x == NULL_RTX) goto fail; + } + + /* Step 3: swap pairs of bits within each nibble (shift=2). */ + { + wide_int mask = wi::zero (precision); + for (unsigned int start = 0; start < precision; start += 4) + mask = wi::bit_or (mask, wi::shifted_mask (start, 2, false, precision)); + + rtx mask_rtx = immed_wide_int_const (mask, mode); + + hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (2), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + hi = expand_binop (mode, and_optab, hi, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + + lo = expand_binop (mode, and_optab, x, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (2), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + + x = expand_binop (mode, ior_optab, hi, lo, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (x == NULL_RTX) goto fail; + } + + /* Step 4: swap adjacent bits (shift=1). */ + { + wide_int mask = wi::zero (precision); + for (unsigned int start = 0; start < precision; start += 2) + mask = wi::bit_or (mask, wi::shifted_mask (start, 1, false, + precision)); + + rtx mask_rtx = immed_wide_int_const (mask, mode); + + hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (1), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + hi = expand_binop (mode, and_optab, hi, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (hi == NULL_RTX) goto fail; + + lo = expand_binop (mode, and_optab, x, mask_rtx, + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (1), + NULL_RTX, true, OPTAB_LIB_WIDEN); + if (lo == NULL_RTX) goto fail; + + x = expand_binop (mode, ior_optab, hi, lo, + target, true, OPTAB_LIB_WIDEN); + if (x == NULL_RTX) goto fail; + } + + if (x != target) + emit_move_insn (target, x); + + return target; + + fail: + delete_insns_since (last); + return NULL_RTX; +} + /* Try calculating (parity x) as (and (popcount x) 1), where popcount can also be done in a wider mode. */ static rtx @@ -3409,6 +3535,10 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, goto try_libcall; } + if (unoptab == bitreverse_optab && is_a <scalar_int_mode> (mode, &int_mode)) + if (rtx tem = expand_bitreverse (int_mode, op0, target)) + return tem; + /* Neg should be tried via expand_absneg_bit before widening. */ if (optab_to_code (unoptab) == NEG) { diff --git a/gcc/optabs.def b/gcc/optabs.def index 193f42a728a2..7ccea18543f1 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -184,6 +184,7 @@ OPTAB_VC(absv_optab, "absv$I$a2", ABS) OPTAB_VX(absv_optab, "abs$F$a2") OPTAB_NL(one_cmpl_optab, "one_cmpl$a2", NOT, "one_cmpl", '2', gen_int_libfunc) OPTAB_NC(bswap_optab, "bswap$a2", BSWAP) +OPTAB_NC(bitreverse_optab, "bitreverse$a2", BITREVERSE) OPTAB_NL(ffs_optab, "ffs$a2", FFS, "ffs", '2', gen_int_libfunc) OPTAB_NL(clz_optab, "clz$a2", CLZ, "clz", '2', gen_int_libfunc) OPTAB_NL(ctz_optab, "ctz$a2", CTZ, "ctz", '2', gen_int_libfunc) diff --git a/gcc/testsuite/gcc.dg/builtin-bitreverse-1.c b/gcc/testsuite/gcc.dg/builtin-bitreverse-1.c new file mode 100644 index 000000000000..0047e8d9e22d --- /dev/null +++ b/gcc/testsuite/gcc.dg/builtin-bitreverse-1.c @@ -0,0 +1,71 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +[[gnu::noipa]] static unsigned char +br8 (unsigned char x) +{ + return __builtin_bitreverse8 (x); +} + +[[gnu::noipa]] static unsigned short +br16 (unsigned short x) +{ + return __builtin_bitreverse16 (x); +} + +[[gnu::noipa]] static unsigned +br32 (unsigned x) +{ + return __builtin_bitreverse32 (x); +} + +[[gnu::noipa]] static unsigned long long +br64 (unsigned long long x) +{ + return __builtin_bitreverse64 (x); +} + +int +main () +{ +#if __CHAR_BIT__ == 8 + if (br8 (0x00u) != 0x00u) + __builtin_abort (); + if (br8 (0x01u) != 0x80u) + __builtin_abort (); + if (br8 (0x34u) != 0x2cu) + __builtin_abort (); + if (br8 (0xffu) != 0xffu) + __builtin_abort (); +#if __SIZEOF_SHORT__ == 2 + if (br16 (0x0000u) != 0x0000u) + __builtin_abort (); + if (br16 (0x0001u) != 0x8000u) + __builtin_abort (); + if (br16 (0x1234u) != 0x2c48u) + __builtin_abort (); + if (br16 (0xffffu) != 0xffffu) + __builtin_abort (); +#endif +#if __SIZEOF_INT__ == 4 + if (br32 (0x00000000u) != 0x00000000u) + __builtin_abort (); + if (br32 (0x00000001u) != 0x80000000u) + __builtin_abort (); + if (br32 (0x01234567u) != 0xe6a2c480u) + __builtin_abort (); + if (br32 (0xffffffffu) != 0xffffffffu) + __builtin_abort (); +#endif +#if __SIZEOF_LONG_LONG__ == 8 + if (br64 (0x0000000000000000ull) != 0x0000000000000000ull) + __builtin_abort (); + if (br64 (0x0000000000000001ull) != 0x8000000000000000ull) + __builtin_abort (); + if (br64 (0x0123456789abcdefull) != 0xf7b3d591e6a2c480ull) + __builtin_abort (); + if (br64 (0xffffffffffffffffull) != 0xffffffffffffffffull) + __builtin_abort (); +#endif +#endif +} diff --git a/gcc/testsuite/gcc.dg/builtin-bitreverse-2.c b/gcc/testsuite/gcc.dg/builtin-bitreverse-2.c new file mode 100644 index 000000000000..42a677cae4b1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/builtin-bitreverse-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-std=gnu11" } */ + +#if __CHAR_BIT__ == 8 +_Static_assert (__builtin_bitreverse8 (0x01u) == 0x80u, "bitreverse8"); +#if __SIZEOF_SHORT__ == 2 +_Static_assert (__builtin_bitreverse16 (0x0001u) == 0x8000u, "bitreverse16"); +#endif +#if __SIZEOF_INT__ == 4 +_Static_assert (__builtin_bitreverse32 (0x12345678u) == 0x1e6a2c48u, + "bitreverse32"); +#endif +#if __SIZEOF_LONG_LONG__ == 8 +_Static_assert (__builtin_bitreverse64 (0x0123456789abcdefull) + == 0xf7b3d591e6a2c480ull, "bitreverse64"); +#endif +#endif diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 952ebf17e7ac..974bef17832f 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -2435,6 +2435,10 @@ evaluate_stmt (gimple *stmt) case BUILT_IN_BSWAP32: case BUILT_IN_BSWAP64: case BUILT_IN_BSWAP128: + case BUILT_IN_BITREVERSE8: + case BUILT_IN_BITREVERSE16: + case BUILT_IN_BITREVERSE32: + case BUILT_IN_BITREVERSE64: val = get_value_for_expr (gimple_call_arg (stmt, 0), true); if (val.lattice_val == UNDEFINED) break; diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 324559e6a7de..d9e1edb9b144 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -819,6 +819,10 @@ empty_bb_or_one_feeding_into_p (basic_block bb, case CFN_BUILT_IN_BSWAP32: case CFN_BUILT_IN_BSWAP64: case CFN_BUILT_IN_BSWAP128: + case CFN_BUILT_IN_BITREVERSE8: + case CFN_BUILT_IN_BITREVERSE16: + case CFN_BUILT_IN_BITREVERSE32: + case CFN_BUILT_IN_BITREVERSE64: CASE_CFN_FFS: CASE_CFN_PARITY: CASE_CFN_POPCOUNT: @@ -2578,6 +2582,10 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb, case CFN_BUILT_IN_BSWAP32: case CFN_BUILT_IN_BSWAP64: case CFN_BUILT_IN_BSWAP128: + case CFN_BUILT_IN_BITREVERSE8: + case CFN_BUILT_IN_BITREVERSE16: + case CFN_BUILT_IN_BITREVERSE32: + case CFN_BUILT_IN_BITREVERSE64: CASE_CFN_FFS: CASE_CFN_PARITY: CASE_CFN_POPCOUNT:
