I assume your template use untyped parameters as
    
    
    template eqabs(a, b: untyped): bool = a * a == b * b
    

(Indeed I should have used 64 bit int for the C code.)

It is great that we get the same optimized assembly as in C (for a template, 
but not for an inline proc)

But indeed comparing the squares seems to give no real advantage -- Nim's and 
C's abs() is already fully optimized.
    
    
    #include <stdint.h>
    #include <stdlib.h>
    #define eqabs(a, b) a*a == b*b
    #define eq(a, b) llabs(a) == b
    
    int8_t t1(int64_t x) {
        return eqabs(x, 9) or eqabs(x, 5);
    }
    
    int8_t t2(int64_t x) {
        return eq(x, 9) or eq(x, 5);
    }
    
    int64_t a1(int64_t x) {
        return (x < 0 ? -x : x);
    }
    
    int64_t a2(int64_t x) {
        return llabs(x);
    }
    
    
    
    t1(long):
      imul rdi, rdi
      cmp rdi, 81
      sete al
      cmp rdi, 25
      sete dl
      or eax, edx
      ret
    t2(long):
      mov rax, rdi
      sar rax, 63
      xor rdi, rax
      sub rdi, rax
      sub rdi, 5
      test rdi, -5
      sete al
      ret
    a1(long):
      mov rdx, rdi
      mov rax, rdi
      sar rdx, 63
      xor rax, rdx
      sub rax, rdx
      ret
    a2(long):
      mov rdx, rdi
      mov rax, rdi
      sar rdx, 63
      xor rax, rdx
      sub rax, rdx
      ret
    

Function t1 has one instruction less, but that does not mean that it is faster 
than t2.

Reply via email to