I made a thorough comparison using multiple compilers and a summary of the findings. In short, there is a runtime overhead.

I reduced the code to cut out the imports and made two versions with equivalent semantic content. positive0.d contains the hand written specializations of the abs function. positive.d contains the solution with function templates / static type analysis.

///////

/* positive0.d:

Compile & execute:
$ dmd positive0.d; ./positive0; echo $?
$ ldc2 positive0.d; ./positive0; echo $?

generate ASM source:
$ dmd positive0.d; gobjdump -d positive0.o > positive0.dmd.s
$ ldc2 positive0.d -output-s

*/

int absPositive(int n) {
  return n;
}

int abs(int n) {
  return (n>=0) ? n : -n;
}

int square(int x) {
  return x * x;
}

int main() {
  return !((abs(-16) == 16)
    && (abs(3) == 3)
    && (square(5).abs == 25)
    && (square(-4).abs == 16));
}

///////

/* positive.d:

Compile & execute:
$ dmd positive.d; ./positive; echo $?
$ ldc2 positive.d; ./positive; echo $?

generate ASM source:
$ dmd positive.d; gobjdump -d positive.o > positive.dmd.s
$ ldc2 positive.d -output-s

*/
struct Positive {
  int num;
  alias num this;
}

Positive abs(T)(T n) {
  static if (is(T == Positive)) {
    return n;
  } else {
    return Positive((n >= 0) ? n : -n);
  }
}

Positive square(int x) {
  return Positive(x * x);
}

int main() {
  return !((abs(-16) == 16)
    && (abs(3) == 3)
    && (square(5).abs == 25)
    && (square(-4).abs == 16));
}

///////

I compared the generated asms. The asm code was substantially longer in case of non-hand written specializations of the abs function.

The 'optimized' versions of the abs function were equivalent, but the 'non-optimzed' versions shows the runtime overhead for dmd and ldc2 as well, a double 'mov' commands instead of a single ones;

The compiled hand written code was roughly half the size for both compilers:

File sizes:
ldc:
2678 positive0.s
4313 positive.s

dmd:
3442 positive0.dmd.s
8701 positive.dmd.s

You can see the abs functions below, and you can spot the double 'mov' operations:

positive.dmd.s:
0000000000000230 <_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive>:
 230:   55                      push   %rbp
 231:   48 8b ec                mov    %rsp,%rbp
 234:   48 83 ec 10             sub    $0x10,%rsp
 238:   85 ff                   test   %edi,%edi
23a: 78 02 js 23e <_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive+0xe> 23c: eb 02 jmp 240 <_D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive+0x10>
 23e:   f7 df                   neg    %edi
 240:   89 7d f0                mov    %edi,-0x10(%rbp)
 243:   48 89 f8                mov    %rdi,%rax
 246:   c9                      leaveq
 247:   c3                      retq

0000000000000248 <_D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive>:
 248:   55                      push   %rbp
 249:   48 8b ec                mov    %rsp,%rbp
 24c:   48 83 ec 10             sub    $0x10,%rsp
 250:   48 89 f8                mov    %rdi,%rax
 253:   c9                      leaveq
 254:   c3                      retq
 255:   0f 1f 00                nopl   (%rax)



positive0.dmd.s:
00000000000000a0 <_D9positive011absPositiveFiZi>:
  a0:   55                      push   %rbp
  a1:   48 8b ec                mov    %rsp,%rbp
  a4:   48 83 ec 10             sub    $0x10,%rsp
  a8:   48 89 f8                mov    %rdi,%rax
  ab:   c9                      leaveq
  ac:   c3                      retq
  ad:   0f 1f 00                nopl   (%rax)

00000000000000b0 <_D9positive03absFiZi>:
  b0:   55                      push   %rbp
  b1:   48 8b ec                mov    %rsp,%rbp
  b4:   48 83 ec 10             sub    $0x10,%rsp
  b8:   85 ff                   test   %edi,%edi
  ba:   78 05                   js     c1 <_D9positive03absFiZi+0x11>
  bc:   48 89 f8                mov    %rdi,%rax
  bf:   eb 05                   jmp    c6 <_D9positive03absFiZi+0x16>
  c1:   48 89 f8                mov    %rdi,%rax
  c4:   f7 d8                   neg    %eax
  c6:   c9                      leaveq
  c7:   c3                      retq


ldc2:
positive.s:

__D8positive10__T3absTiZ3absFNaNbNiNfiZS8positive8Positive:
        .cfi_startproc
        movl    %edi, -4(%rsp)
        cmpl    $0, -4(%rsp)
        jl      LBB2_2
        leaq    -4(%rsp), %rax
        movq    %rax, -16(%rsp)
        jmp     LBB2_3
LBB2_2:
        leaq    -20(%rsp), %rax
        xorl    %ecx, %ecx
        subl    -4(%rsp), %ecx
        movl    %ecx, -20(%rsp)
        movq    %rax, -16(%rsp)
LBB2_3:
        movq    -16(%rsp), %rax
        movl    (%rax), %ecx
        movl    %ecx, -8(%rsp)
        movl    %ecx, %eax
        retq
        .cfi_endproc

        .globl  
__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive
        .weak_definition        
__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive
        .align  4, 0x90
__D8positive28__T3absTS8positive8PositiveZ3absFNaNbNiNfS8positive8PositiveZS8positive8Positive:
        .cfi_startproc
        movl    %edi, -8(%rsp)
        movl    %edi, %eax
        retq
        .cfi_endproc

        .section        __TEXT,__text,regular,pure_instructions
        .align  4, 0x90


positive0.s:
__D9positive011absPositiveFiZi:
        .cfi_startproc
        movl    %edi, -4(%rsp)
        movl    -4(%rsp), %eax
        retq
        .cfi_endproc

        .globl  __D9positive03absFiZi
        .align  4, 0x90
__D9positive03absFiZi:
        .cfi_startproc
        movl    %edi, -4(%rsp)
        cmpl    $0, -4(%rsp)
        jl      LBB1_2
        leaq    -4(%rsp), %rax
        movq    %rax, -16(%rsp)
        jmp     LBB1_3
LBB1_2:
        leaq    -20(%rsp), %rax
        xorl    %ecx, %ecx
        subl    -4(%rsp), %ecx
        movl    %ecx, -20(%rsp)
        movq    %rax, -16(%rsp)
LBB1_3:
        movq    -16(%rsp), %rax
        movl    (%rax), %eax
        retq
        .cfi_endproc

        .globl  __D9positive06squareFiZi
        .align  4, 0x90


my compilers:

$ ldc2 -version
LDC - the LLVM D compiler (6d3923):
  based on DMD v2.066.1 and LLVM 3.6.1
  Default target: x86_64-apple-darwin14.4.0
  Host CPU: core-avx2

$ dmd --version
DMD64 D Compiler v2.067


Reply via email to