Re: [HACKERS] GiST penalty functions [PoC]

Михаил Бахтерев Fri, 09 Sep 2016 06:50:19 -0700

Yes. You are right, ANSI C allows only load-time initializers. Attached
ANSI compatible version leads to the same assembly.

And let me suggest a bit-twiddling version as well. It gives 12
instructions, instead of 13. 12 is better, as modern x86 CPU will fetch
them at most in 3 cycles, one less than for 13 instructions. Also this
bit-twiddling is more parallel at instruction level.

And for ARM, which is unsurpassed at bit-twiddling this code is a way
better.

Of course speed is influenced by a lot of factors as always, so it needs
to be tested on some datasets.

- Mikhail, respectfully

On Fri, Sep 09, 2016 at 08:50:53AM +0500, Andrey Borodin wrote:
> Thank you for your attention to details, Mikhail.
> 
> pack_float_good() looks good. But I'm not sure inline strict init is allowed 
> under ansi C. Converting to regular ancient form b.fp = v; won't change 
> compile result, would it?
> 
> Regards, Andrey Borodin.

#include <stdint.h>

typedef union { float fp; int i; } U;

float pack_float(const float v, const int r)
{
  const U a = { .fp = v };
  const U b = { .i = (a.i >> 2) + r * (INT32_MAX / 4) };

  return b.fp;
}

float pack_float_av(float v, int r)
{
  U buf;

  buf.fp = v;
  buf.i = (buf.i >> 2) + (INT32_MAX / 4) * r;

  return buf.fp;
}

float
pack_float_v3(float actualValue, int realm)
{
  /* two bits for realm, others for value */
  /* we have 4 realms           */
  int realmAjustment = *((int*)&actualValue)/4;
  int realCode = realm * (INT32_MAX/4) + realmAjustment;
  return *((float*)&realCode);
}

float pack_float_good(const float v, const int r)
{
  const U a = { .fp = v };
  const U b = { .i = a.i/4 + r * (INT32_MAX / 4) };

  return b.fp;
}

float pack_float_ansi(const float v, const int r)
{
  union { float f; int i; } a;

  a.f = v;
  a.i = a.i / 4 + r * (INT32_MAX / 4);

  return a.f;
}

float pack_float_bits(const float v, const int r)
{
  union {
    float f;
    struct { unsigned value:31, sign:1; } vbits;
    struct { unsigned value:29, realm:2, sign:1; } rbits;
  } a;

  a.f = v;
  a.rbits.value = a.vbits.value >> 2;
  a.rbits.realm = r;

  return a.f;
}

	.file	"pack-float.c"
	.text
	.p2align 4,,15
	.globl	pack_float
	.type	pack_float, @function
pack_float:
.LFB0:
	.cfi_startproc
	movd	%xmm0, %eax
	movl	%edi, %edx
	sall	$29, %edx
	sarl	$2, %eax
	subl	%edi, %edx
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE0:
	.size	pack_float, .-pack_float
	.p2align 4,,15
	.globl	pack_float_av
	.type	pack_float_av, @function
pack_float_av:
.LFB1:
	.cfi_startproc
	movd	%xmm0, %eax
	movl	%edi, %edx
	sall	$29, %edx
	sarl	$2, %eax
	subl	%edi, %edx
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE1:
	.size	pack_float_av, .-pack_float_av
	.p2align 4,,15
	.globl	pack_float_v3
	.type	pack_float_v3, @function
pack_float_v3:
.LFB2:
	.cfi_startproc
	movd	%xmm0, %edx
	leal	3(%rdx), %eax
	testl	%edx, %edx
	cmovns	%edx, %eax
	sarl	$2, %eax
	movl	%eax, %edx
	movl	%edi, %eax
	sall	$29, %eax
	subl	%edi, %eax
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE2:
	.size	pack_float_v3, .-pack_float_v3
	.p2align 4,,15
	.globl	pack_float_good
	.type	pack_float_good, @function
pack_float_good:
.LFB3:
	.cfi_startproc
	movd	%xmm0, %edx
	leal	3(%rdx), %eax
	testl	%edx, %edx
	cmovns	%edx, %eax
	sarl	$2, %eax
	movl	%eax, %edx
	movl	%edi, %eax
	sall	$29, %eax
	subl	%edi, %eax
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE3:
	.size	pack_float_good, .-pack_float_good
	.p2align 4,,15
	.globl	pack_float_ansi
	.type	pack_float_ansi, @function
pack_float_ansi:
.LFB4:
	.cfi_startproc
	movd	%xmm0, %edx
	leal	3(%rdx), %eax
	testl	%edx, %edx
	cmovns	%edx, %eax
	sarl	$2, %eax
	movl	%eax, %edx
	movl	%edi, %eax
	sall	$29, %eax
	subl	%edi, %eax
	addl	%edx, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE4:
	.size	pack_float_ansi, .-pack_float_ansi
	.p2align 4,,15
	.globl	pack_float_bits
	.type	pack_float_bits, @function
pack_float_bits:
.LFB5:
	.cfi_startproc
	movd	%xmm0, %edx
	movd	%xmm0, %eax
	andl	$3, %edi
	sall	$29, %edi
	andl	$2147483647, %edx
	andl	$-2147483648, %eax
	shrl	$2, %edx
	orl	%edx, %eax
	orl	%edi, %eax
	movl	%eax, -4(%rsp)
	movss	-4(%rsp), %xmm0
	ret
	.cfi_endproc
.LFE5:
	.size	pack_float_bits, .-pack_float_bits
	.ident	"GCC: (GNU) 6.1.1 20160802"
	.section	.note.GNU-stack,"",@progbits

	.arch armv7-a
	.eabi_attribute 28, 1
	.eabi_attribute 20, 1
	.eabi_attribute 21, 1
	.eabi_attribute 23, 3
	.eabi_attribute 24, 1
	.eabi_attribute 25, 1
	.eabi_attribute 26, 2
	.eabi_attribute 30, 2
	.eabi_attribute 34, 1
	.eabi_attribute 18, 4
	.file	"pack-float.c"
	.text
	.align	2
	.global	pack_float
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float, %function
pack_float:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	rsb	r0, r0, r0, lsl #29
	add	r0, r0, r3, asr #2
	vmov	s0, r0
	bx	lr
	.size	pack_float, .-pack_float
	.align	2
	.global	pack_float_av
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float_av, %function
pack_float_av:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	rsb	r0, r0, r0, lsl #29
	add	r0, r0, r3, asr #2
	vmov	s0, r0
	bx	lr
	.size	pack_float_av, .-pack_float_av
	.align	2
	.global	pack_float_v3
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float_v3, %function
pack_float_v3:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	rsb	r0, r0, r0, lsl #29
	add	r2, r3, #3
	cmp	r3, #0
	movlt	r3, r2
	add	r3, r0, r3, asr #2
	vmov	s0, r3
	bx	lr
	.size	pack_float_v3, .-pack_float_v3
	.align	2
	.global	pack_float_good
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float_good, %function
pack_float_good:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	rsb	r0, r0, r0, lsl #29
	add	r2, r3, #3
	cmp	r3, #0
	movlt	r3, r2
	add	r3, r0, r3, asr #2
	vmov	s0, r3
	bx	lr
	.size	pack_float_good, .-pack_float_good
	.align	2
	.global	pack_float_ansi
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float_ansi, %function
pack_float_ansi:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	rsb	r0, r0, r0, lsl #29
	add	r2, r3, #3
	cmp	r3, #0
	movlt	r3, r2
	add	r3, r0, r3, asr #2
	vmov	s0, r3
	bx	lr
	.size	pack_float_ansi, .-pack_float_ansi
	.align	2
	.global	pack_float_bits
	.syntax unified
	.arm
	.fpu vfpv3-d16
	.type	pack_float_bits, %function
pack_float_bits:
	@ args = 0, pretend = 0, frame = 0
	@ frame_needed = 0, uses_anonymous_args = 0
	@ link register save eliminated.
	vmov	r3, s0	@ int
	ubfx	r2, r3, #2, #29
	bfi	r3, r2, #0, #29
	bfi	r3, r0, #29, #2
	vmov	s0, r3
	bx	lr
	.size	pack_float_bits, .-pack_float_bits
	.ident	"GCC: (GNU) 6.1.1 20160802"
	.section	.note.GNU-stack,"",%progbits

signature.asc
Description: PGP signature

Re: [HACKERS] GiST penalty functions [PoC]

Reply via email to