Yes. You are right, ANSI C allows only load-time initializers. Attached ANSI compatible version leads to the same assembly.
And let me suggest a bit-twiddling version as well. It gives 12 instructions, instead of 13. 12 is better, as modern x86 CPU will fetch them at most in 3 cycles, one less than for 13 instructions. Also this bit-twiddling is more parallel at instruction level. And for ARM, which is unsurpassed at bit-twiddling this code is a way better. Of course speed is influenced by a lot of factors as always, so it needs to be tested on some datasets. - Mikhail, respectfully On Fri, Sep 09, 2016 at 08:50:53AM +0500, Andrey Borodin wrote: > Thank you for your attention to details, Mikhail. > > pack_float_good() looks good. But I'm not sure inline strict init is allowed > under ansi C. Converting to regular ancient form b.fp = v; won't change > compile result, would it? > > Regards, Andrey Borodin.
#include <stdint.h> typedef union { float fp; int i; } U; float pack_float(const float v, const int r) { const U a = { .fp = v }; const U b = { .i = (a.i >> 2) + r * (INT32_MAX / 4) }; return b.fp; } float pack_float_av(float v, int r) { U buf; buf.fp = v; buf.i = (buf.i >> 2) + (INT32_MAX / 4) * r; return buf.fp; } float pack_float_v3(float actualValue, int realm) { /* two bits for realm, others for value */ /* we have 4 realms */ int realmAjustment = *((int*)&actualValue)/4; int realCode = realm * (INT32_MAX/4) + realmAjustment; return *((float*)&realCode); } float pack_float_good(const float v, const int r) { const U a = { .fp = v }; const U b = { .i = a.i/4 + r * (INT32_MAX / 4) }; return b.fp; } float pack_float_ansi(const float v, const int r) { union { float f; int i; } a; a.f = v; a.i = a.i / 4 + r * (INT32_MAX / 4); return a.f; } float pack_float_bits(const float v, const int r) { union { float f; struct { unsigned value:31, sign:1; } vbits; struct { unsigned value:29, realm:2, sign:1; } rbits; } a; a.f = v; a.rbits.value = a.vbits.value >> 2; a.rbits.realm = r; return a.f; }
.file "pack-float.c" .text .p2align 4,,15 .globl pack_float .type pack_float, @function pack_float: .LFB0: .cfi_startproc movd %xmm0, %eax movl %edi, %edx sall $29, %edx sarl $2, %eax subl %edi, %edx addl %edx, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE0: .size pack_float, .-pack_float .p2align 4,,15 .globl pack_float_av .type pack_float_av, @function pack_float_av: .LFB1: .cfi_startproc movd %xmm0, %eax movl %edi, %edx sall $29, %edx sarl $2, %eax subl %edi, %edx addl %edx, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE1: .size pack_float_av, .-pack_float_av .p2align 4,,15 .globl pack_float_v3 .type pack_float_v3, @function pack_float_v3: .LFB2: .cfi_startproc movd %xmm0, %edx leal 3(%rdx), %eax testl %edx, %edx cmovns %edx, %eax sarl $2, %eax movl %eax, %edx movl %edi, %eax sall $29, %eax subl %edi, %eax addl %edx, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE2: .size pack_float_v3, .-pack_float_v3 .p2align 4,,15 .globl pack_float_good .type pack_float_good, @function pack_float_good: .LFB3: .cfi_startproc movd %xmm0, %edx leal 3(%rdx), %eax testl %edx, %edx cmovns %edx, %eax sarl $2, %eax movl %eax, %edx movl %edi, %eax sall $29, %eax subl %edi, %eax addl %edx, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE3: .size pack_float_good, .-pack_float_good .p2align 4,,15 .globl pack_float_ansi .type pack_float_ansi, @function pack_float_ansi: .LFB4: .cfi_startproc movd %xmm0, %edx leal 3(%rdx), %eax testl %edx, %edx cmovns %edx, %eax sarl $2, %eax movl %eax, %edx movl %edi, %eax sall $29, %eax subl %edi, %eax addl %edx, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE4: .size pack_float_ansi, .-pack_float_ansi .p2align 4,,15 .globl pack_float_bits .type pack_float_bits, @function pack_float_bits: .LFB5: .cfi_startproc movd %xmm0, %edx movd %xmm0, %eax andl $3, %edi sall $29, %edi andl $2147483647, %edx andl $-2147483648, %eax shrl $2, %edx orl %edx, %eax orl %edi, %eax movl %eax, -4(%rsp) movss -4(%rsp), %xmm0 ret .cfi_endproc .LFE5: .size pack_float_bits, .-pack_float_bits .ident "GCC: (GNU) 6.1.1 20160802" .section .note.GNU-stack,"",@progbits
.arch armv7-a .eabi_attribute 28, 1 .eabi_attribute 20, 1 .eabi_attribute 21, 1 .eabi_attribute 23, 3 .eabi_attribute 24, 1 .eabi_attribute 25, 1 .eabi_attribute 26, 2 .eabi_attribute 30, 2 .eabi_attribute 34, 1 .eabi_attribute 18, 4 .file "pack-float.c" .text .align 2 .global pack_float .syntax unified .arm .fpu vfpv3-d16 .type pack_float, %function pack_float: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int rsb r0, r0, r0, lsl #29 add r0, r0, r3, asr #2 vmov s0, r0 bx lr .size pack_float, .-pack_float .align 2 .global pack_float_av .syntax unified .arm .fpu vfpv3-d16 .type pack_float_av, %function pack_float_av: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int rsb r0, r0, r0, lsl #29 add r0, r0, r3, asr #2 vmov s0, r0 bx lr .size pack_float_av, .-pack_float_av .align 2 .global pack_float_v3 .syntax unified .arm .fpu vfpv3-d16 .type pack_float_v3, %function pack_float_v3: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int rsb r0, r0, r0, lsl #29 add r2, r3, #3 cmp r3, #0 movlt r3, r2 add r3, r0, r3, asr #2 vmov s0, r3 bx lr .size pack_float_v3, .-pack_float_v3 .align 2 .global pack_float_good .syntax unified .arm .fpu vfpv3-d16 .type pack_float_good, %function pack_float_good: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int rsb r0, r0, r0, lsl #29 add r2, r3, #3 cmp r3, #0 movlt r3, r2 add r3, r0, r3, asr #2 vmov s0, r3 bx lr .size pack_float_good, .-pack_float_good .align 2 .global pack_float_ansi .syntax unified .arm .fpu vfpv3-d16 .type pack_float_ansi, %function pack_float_ansi: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int rsb r0, r0, r0, lsl #29 add r2, r3, #3 cmp r3, #0 movlt r3, r2 add r3, r0, r3, asr #2 vmov s0, r3 bx lr .size pack_float_ansi, .-pack_float_ansi .align 2 .global pack_float_bits .syntax unified .arm .fpu vfpv3-d16 .type pack_float_bits, %function pack_float_bits: @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. vmov r3, s0 @ int ubfx r2, r3, #2, #29 bfi r3, r2, #0, #29 bfi r3, r0, #29, #2 vmov s0, r3 bx lr .size pack_float_bits, .-pack_float_bits .ident "GCC: (GNU) 6.1.1 20160802" .section .note.GNU-stack,"",%progbits
signature.asc
Description: PGP signature