I assume your template use untyped parameters as
template eqabs(a, b: untyped): bool = a * a == b * b
(Indeed I should have used 64 bit int for the C code.)
It is great that we get the same optimized assembly as in C (for a template,
but not for an inline proc)
But indeed comparing the squares seems to give no real advantage -- Nim's and
C's abs() is already fully optimized.
#include <stdint.h>
#include <stdlib.h>
#define eqabs(a, b) a*a == b*b
#define eq(a, b) llabs(a) == b
int8_t t1(int64_t x) {
return eqabs(x, 9) or eqabs(x, 5);
}
int8_t t2(int64_t x) {
return eq(x, 9) or eq(x, 5);
}
int64_t a1(int64_t x) {
return (x < 0 ? -x : x);
}
int64_t a2(int64_t x) {
return llabs(x);
}
t1(long):
imul rdi, rdi
cmp rdi, 81
sete al
cmp rdi, 25
sete dl
or eax, edx
ret
t2(long):
mov rax, rdi
sar rax, 63
xor rdi, rax
sub rdi, rax
sub rdi, 5
test rdi, -5
sete al
ret
a1(long):
mov rdx, rdi
mov rax, rdi
sar rdx, 63
xor rax, rdx
sub rax, rdx
ret
a2(long):
mov rdx, rdi
mov rax, rdi
sar rdx, 63
xor rax, rdx
sub rax, rdx
ret
Function t1 has one instruction less, but that does not mean that it is faster
than t2.