Hi Bernd,

On Sun, 2019-09-29 at 08:49 +0000, Bernd Edlinger wrote:
> 
> But I cannot tell if the bitfield access generates
> more efficient code or identical code than the
> original variant when no ms bitfields are used.
> That needs closer inspection of the generated
> assembler code, a simple bootstrap / regtest will
> probably not be sufficient.

Thanks for your comment.

The bitfields in this case are used for packing and unpacking the FP
values.  So I gave it a try on RX and SH as examples (with trunk
compiler).  I've extracted the unpack function for a "double" value and
compared the generated assembly code.  Please see the attached file.

The bitfield code on RX is equivalent.  On SH bitfields is slightly
worse.  It gets *a lot* worse when the struct is passed by
reference/pointer, then things get completely out of control.

Theoretically, if the bitfields and the shifts-and-ands extract/pack
the same bits from the underlying base integer value, the resulting
code should be the same in both cases (see RX).  But it depends on what
the backend does, or tries to do (see SH).

> But my thought is if the -mms-bitfields option has such
> an impact on this structure, then it would be good if there
> was a built-in define that can be used to adjust to and/or
> diagnose the problem at compile time.

I think it'd be better to add static_assert on the expected sizes of
the various float value structs.


> 
> I think that is missing right now, but wouldn't it be nice to have
> a define like __MS_BITFIELD_LAYOUT__ ?

Honestly, I'm not a big fan of the whole MS bitfield thing.  On RX it's
been put there long time ago for compatibility with some other
compiler.  I'm not sure if it's relevant at all anymore.  So I don't
want to add any more to the pile ...

Cheers,
Oleg
typedef unsigned long long fractype;

struct unpacked_double
{
  fractype fraction;
  int exp;
  int sign;
};

struct  __attribute__ ((packed))
bits_little_endian 
{
  fractype fraction:52 __attribute__ ((packed));
  unsigned int exp:11 __attribute__ ((packed));
  unsigned int sign:1 __attribute__ ((packed));
};

static_assert (sizeof (bits_little_endian) == 8, "");
static_assert (sizeof (long long) == 8, "");


// ------------------------------------------------------------------------

unpacked_double
unpack_d_bitfields (bits_little_endian val)
{
  fractype fraction = val.fraction;
  int exp = val.exp;
  int sign = val.sign;

  return { fraction, exp, sign };
}

/*
RX -O2

__Z18unpack_d_bitfieldsRK18bits_little_endian:
	mov.L	r2, r4	 ; 4	[c=4 l=2]  *movsi_internal/4
	shlr	#20, r2, r3	 ; 9	[c=8 l=3]  lshrsi3/2
	add	#-16, r0	 ; 58	[c=4 l=3]  addsi3_internal/3
	mov.L	#0xfffff, r2	 ; 57	[c=4 l=5]  *movsi_internal/2
	and	r4, r2	 ; 43	[c=4 l=2]  andsi3/0
	and	#0x7ff, r3	 ; 44	[c=4 l=4]  andsi3/3
	shlr	#31, r4	 ; 45	[c=8 l=2]  lshrsi3/1
	rtsd	#16		 ; 61	[c=4 l=2]  deallocate_and_return

SH -O2

__Z18unpack_d_bitfieldsRK18bits_little_endian:
.LFB0:
	.cfi_startproc
	.cfi_startproc
	add	#-8,r15	! 85	[c=4 l=2]  *addsi3/0
	.cfi_def_cfa_offset 8
	mov	r2,r0	! 2	[c=4 l=2]  movsi_ie/1
	mov.l	.L3,r3	! 32	[c=10 l=2]  movsi_ie/0
	mov	#-21,r2	! 82	[c=4 l=2]  movsi_ie/2
	mov	r5,r1	! 81	[c=4 l=2]  movsi_ie/1
	add	r1,r1	! 16	[c=4 l=2]  ashlsi3_k/0
	shld	r2,r1	! 71	[c=4 l=2]  lshrsi3_d
	mov	r5,r2	! 83	[c=4 l=2]  movsi_ie/1
	shll	r2		! 72	[c=0 l=2]  shll
	mov.l	r4,@r0	! 44	[c=4 l=2]  movsi_ie/8
	movt	r2		! 73	[c=4 l=2]  movt
	mov.l	r1,@(8,r0)	! 35	[c=4 l=2]  movsi_ie/8
	and	r3,r5	! 33	[c=4 l=2]  *andsi_compact/3
	mov.l	r2,@(12,r0)	! 36	[c=4 l=2]  movsi_ie/8
	mov.l	r5,@(4,r0)	! 45	[c=4 l=2]  movsi_ie/8
	rts			! 91	[c=0 l=2]  *return_i
	add	#8,r15	! 90	[c=4 l=2]  *addsi3/0
	.cfi_endproc

*/

// ------------------------------------------------------------------------

unpacked_double
unpack_d_no_bitfields (long long val)
{
  fractype fraction = val & ((((fractype)1) << 52) - 1);
  int exp = ((int)(val >> 52)) & ((1 << 11) - 1);
  int sign = ((int)(val >> (52 + 11))) & 1;

  return { fraction, exp, sign };
}

/*
RX -O2
	mov.L	r2, r4	 ; 4	[c=4 l=2]  *movsi_internal/4
	shar	#20, r2, r3	 ; 13	[c=8 l=3]  ashrsi3/2
	add	#-16, r0	 ; 56	[c=4 l=3]  addsi3_internal/3
	mov.L	#0xfffff, r2	 ; 55	[c=4 l=5]  *movsi_internal/2
	and	r4, r2	 ; 42	[c=4 l=2]  andsi3/0
	and	#0x7ff, r3	 ; 43	[c=4 l=4]  andsi3/3
	shlr	#31, r4	 ; 44	[c=8 l=2]  lshrsi3/1
	rtsd	#16		 ; 59	[c=4 l=2]  deallocate_and_return

SH -O2
	mov	r2,r0	! 2	[c=4 l=2]  movsi_ie/1
	mov.l	.L6,r1	! 10	[c=10 l=2]  movsi_ie/0
	mov.l	r4,@r2	! 9	[c=4 l=2]  movsi_ie/8
	and	r5,r1	! 11	[c=4 l=2]  *andsi_compact/3
	mov.l	r1,@(4,r2)	! 12	[c=4 l=2]  movsi_ie/8
	mov	#-21,r2	! 69	[c=4 l=2]  movsi_ie/2
	mov	r5,r1	! 68	[c=4 l=2]  movsi_ie/1
	shll	r5		! 59	[c=0 l=2]  shll
	add	r1,r1	! 18	[c=4 l=2]  ashlsi3_k/0
	shld	r2,r1	! 58	[c=4 l=2]  lshrsi3_d
	movt	r5		! 60	[c=4 l=2]  movt
	mov.l	r1,@(8,r0)	! 20	[c=4 l=2]  movsi_ie/8
	rts			! 74	[c=0 l=2]  *return_i
	mov.l	r5,@(12,r0)	! 28	[c=4 l=2]  movsi_ie/8
.L7:
	.align 2
.L6:
	.long	1048575

*/

Reply via email to