If you are still interested in. Here are 3 versions of pack-float. The
union version of pack-float should run faster. The code is simpler, the
dependencies are easier.

But it may be less accurate or even wrong, as for signed integers (x>>2)
and (x/4) are not the same. Consider x = -1.

You may try pack_float_good, which gives the same asm as v3, but without
warnings.

- Mikhail, respectfully

On Thu, Sep 08, 2016 at 01:29:36PM +0500, Andrew Borodin wrote:
> >autoconf check for IEEE 754 floats
> Autoconf man says folowing:
> >it is safe to assume IEEE-754 in most portable code these days
> https://www.gnu.org/software/autoconf/manual/autoconf.html#Floating-Point-Portability
> 
> > A union might be more readable
> Here is union version of the patch. It's slower 10% than original cube
> and dereference version. Have no idea why.
> Select performance is improved as in v3.
> 
#include <stdint.h>

typedef union { float fp; int i; } U;

float pack_float(const float v, const int r)
{
  const U a = { .fp = v };
  const U b = { .i = (a.i >> 2) + r * (INT32_MAX / 4) };

  return b.fp;
}

float pack_float_av(float v, int r)
{
  U buf;

  buf.fp = v;
  buf.i = (buf.i >> 2) + (INT32_MAX / 4) * r;

  return buf.fp;
}

float
pack_float_v3(float actualValue, int realm)
{
  /* two bits for realm, others for value */
  /* we have 4 realms           */
  int realmAjustment = *((int*)&actualValue)/4;
  int realCode = realm * (INT32_MAX/4) + realmAjustment;
  return *((float*)&realCode);
}

float pack_float_good(const float v, const int r)
{
  const U a = { .fp = v };
  const U b = { .i = a.i/4 + r * (INT32_MAX / 4) };

  return b.fp;
}

	.file	"pack-float.c"
	.text
	.p2align 4,,15
	.globl	pack_float
	.type	pack_float, @function
pack_float:
.LFB0:
	.cfi_startproc
	movd	%xmm0, %eax
	movl	%edi, %edx
	sall	$29, %edx
	sarl	$2, %eax
	subl	%edi, %edx
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE0:
	.size	pack_float, .-pack_float
	.p2align 4,,15
	.globl	pack_float_av
	.type	pack_float_av, @function
pack_float_av:
.LFB1:
	.cfi_startproc
	movd	%xmm0, %eax
	movl	%edi, %edx
	sall	$29, %edx
	sarl	$2, %eax
	subl	%edi, %edx
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE1:
	.size	pack_float_av, .-pack_float_av
	.p2align 4,,15
	.globl	pack_float_v3
	.type	pack_float_v3, @function
pack_float_v3:
.LFB2:
	.cfi_startproc
	movd	%xmm0, %edx
	leal	3(%rdx), %eax
	testl	%edx, %edx
	cmovns	%edx, %eax
	sarl	$2, %eax
	movl	%eax, %edx
	movl	%edi, %eax
	sall	$29, %eax
	subl	%edi, %eax
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE2:
	.size	pack_float_v3, .-pack_float_v3
	.p2align 4,,15
	.globl	pack_float_good
	.type	pack_float_good, @function
pack_float_good:
.LFB3:
	.cfi_startproc
	movd	%xmm0, %edx
	leal	3(%rdx), %eax
	testl	%edx, %edx
	cmovns	%edx, %eax
	sarl	$2, %eax
	movl	%eax, %edx
	movl	%edi, %eax
	sall	$29, %eax
	subl	%edi, %eax
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE3:
	.size	pack_float_good, .-pack_float_good
	.ident	"GCC: (GNU) 6.1.1 20160802"
	.section	.note.GNU-stack,"",@progbits

Attachment: signature.asc
Description: PGP signature

Reply via email to