I have asked to llvm devs, it seems llvm already has an optimization pass that 
removes duplicated functions. But it's disabled on default, you can use it with 
LDC with "-mergefunc".

Its implementation, probably written by nlewycky:
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_25/lib/Transforms/IPO/MergeFunctions.cpp

--------------------

I have done few tests in D1 with LDC, the first one, here -mergefunc (and no 
other optimizations) leaves a single function:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

int factorial1(int X) {
  if (X == 0) return 1;
  return X*factorial1(X-1);
}
int factorial2(int X) {
  if (X == 0) return 1;
  return X*factorial2(X-1);
}

void main(char[][] args) {
  printf("%d\n", factorial1(atoi(args[1].ptr)));
  printf("%d\n", factorial2(atoi(args[1].ptr)));
}

--------------------

Second test I've done:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

T factorial(T)(T x) {
  if (x == 0)
    return 1;
  else
    return x * factorial(x - 1);
}

void main(char[][] args) {
  printf("%d\n", factorial(atoi(args[1].ptr)));
  printf("%d\n", factorial(cast(uint)atoi(args[1].ptr)));
}


The resulting asm with -mergefunc shows some duplication left (a person says 
this is a bug that can be fixed, I have to talk with another developer):

__unnamed_1:
        subl    $4, %esp
        movl    %eax, (%esp)
        testl   %eax, %eax
        jne     .LBB2_2
        movl    $1, %eax
        addl    $4, %esp
        ret
.LBB2_2:
        movl    (%esp), %eax
        decl    %eax
        call    factorial!uint
        imull   (%esp), %eax
        addl    $4, %esp
        ret

factorial!int:
        subl    $4, %esp
        call    __unnamed_1
        addl    $4, %esp
        ret

factorial!uint:
        subl    $4, %esp
        call    __unnamed_1
        addl    $4, %esp
        ret

_Dmain:
        subl    $28, %esp
        movl    36(%esp), %eax
        movl    %eax, 20(%esp)
        movl    32(%esp), %eax
        movl    %eax, 16(%esp)
        cmpl    $1, 16(%esp)
        jbe     .LBB1_3
        movl    20(%esp), %eax
        movl    12(%eax), %eax
        movl    %eax, (%esp)
        call    atoi
        call    factorial!int
        movl    %eax, 4(%esp)
        movl    $.str, (%esp)
        call    printf
        cmpl    $1, 16(%esp)
        jbe     .LBB1_4
        movl    20(%esp), %eax
        movl    12(%eax), %eax
        movl    %eax, (%esp)
        call    atoi
        call    factorial!uint
        movl    %eax, 4(%esp)
        movl    $.str2, (%esp)
        call    printf
        xorl    %eax, %eax
        addl    $28, %esp
        ret     $8
.LBB1_3:
        movl    .modulefilename+4, %eax
        movl    %eax, 4(%esp)
        movl    .modulefilename, %eax
        movl    %eax, (%esp)
        movl    $12, 8(%esp)
        call    _d_array_bounds
.LBB1_4:
        movl    .modulefilename+4, %eax
        movl    %eax, 4(%esp)
        movl    .modulefilename, %eax
        movl    %eax, (%esp)
        movl    $13, 8(%esp)
        call    _d_array_bounds

----------------------------

My third experiment:

import tango.stdc.stdio: printf;
import tango.stdc.stdlib: atoi;

int factorial1(int X) {
  if (X == 0) return 1;
  return X*factorial1(X-1);
}
int factorial2(int X) {
  if (X == 0) return 1;
  return X*factorial2(X-1);
}

int caller(int function(int) func, int x) {
    return func(x);
}

void main() {
  printf("%d\n", caller(&factorial1, atoi("10")));
  printf("%d\n", caller(&factorial2, atoi("10")));
}


Its asm without mergefunc:

_D5temp310factorial1FiZi:
        subl    $4, %esp
        movl    %eax, (%esp)
        movl    (%esp), %eax
        cmpl    $0, %eax
        jne     .LBB1_2
        movl    $1, %eax
        addl    $4, %esp
        ret
.LBB1_2:
        movl    (%esp), %eax
        subl    $1, %eax
        call    _D5temp310factorial1FiZi
        movl    %eax, %ecx
        movl    (%esp), %eax
        imull   %ecx, %eax
        addl    $4, %esp
        ret

_D5temp310factorial2FiZi:
        subl    $4, %esp
        movl    %eax, (%esp)
        movl    (%esp), %eax
        cmpl    $0, %eax
        jne     .LBB2_2
        movl    $1, %eax
        addl    $4, %esp
        ret
.LBB2_2:
        movl    (%esp), %eax
        subl    $1, %eax
        call    _D5temp310factorial2FiZi
        movl    %eax, %ecx
        movl    (%esp), %eax
        imull   %ecx, %eax
        addl    $4, %esp
        ret

_D5temp36callerFPFiZiiZi:
        subl    $12, %esp
        movl    16(%esp), %ecx
        movl    %ecx, 8(%esp)
        movl    %eax, 4(%esp)
        movl    8(%esp), %ecx
        movl    4(%esp), %eax
        call    *%ecx
        addl    $12, %esp
        ret     $4

_Dmain:
        subl    $12, %esp
        leal    .str1, %eax
        movl    %eax, (%esp)
        call    atoi
        movl    $_D5temp310factorial1FiZi, (%esp)
        call    _D5temp36callerFPFiZiiZi
        subl    $4, %esp
        movl    $.str, (%esp)
        movl    %eax, 4(%esp)
        call    printf
        leal    .str3, %eax
        movl    %eax, (%esp)
        call    atoi
        movl    $_D5temp310factorial2FiZi, (%esp)
        call    _D5temp36callerFPFiZiiZi
        subl    $4, %esp
        movl    $.str2, (%esp)
        movl    %eax, 4(%esp)
        call    printf
        xorl    %eax, %eax
        addl    $12, %esp
        ret     $8


Its asm with mergefunc:

_D5temp310factorial1FiZi:
        subl    $4, %esp
        movl    %eax, (%esp)
        testl   %eax, %eax
        jne     .LBB1_2
        movl    $1, %eax
        addl    $4, %esp
        ret
.LBB1_2:
        movl    (%esp), %eax
        decl    %eax
        call    _D5temp310factorial1FiZi
        imull   (%esp), %eax
        addl    $4, %esp
        ret

_D5temp310factorial2FiZi:
        subl    $4, %esp
        call    _D5temp310factorial1FiZi
        addl    $4, %esp
        ret

_D5temp36callerFPFiZiiZi:
        subl    $12, %esp
        movl    16(%esp), %ecx
        movl    %ecx, 8(%esp)
        movl    %eax, 4(%esp)
        call    *8(%esp)
        addl    $12, %esp
        ret     $4
        .size   _D5temp36callerFPFiZiiZi, .-_D5temp36callerFPFiZiiZi

_Dmain:
        subl    $12, %esp
        movl    $.str1, (%esp)
        call    atoi
        movl    $_D5temp310factorial1FiZi, (%esp)
        call    _D5temp36callerFPFiZiiZi
        subl    $4, %esp
        movl    %eax, 4(%esp)
        movl    $.str, (%esp)
        call    printf
        movl    $.str3, (%esp)
        call    atoi
        movl    $_D5temp310factorial2FiZi, (%esp)
        call    _D5temp36callerFPFiZiiZi
        subl    $4, %esp
        movl    %eax, 4(%esp)
        movl    $.str2, (%esp)
        call    printf
        xorl    %eax, %eax
        addl    $12, %esp
        ret     $8


llvm is not able to replace all function pointers factorial2 with the ones to 
factorial1, so turns factorial2 into a stump that just calls the factorial1. In 
C/D you can compare function pointers so in this case LLVM has done what it 
can. But can't here llvm replace factorial2 with just a jump instruction? I am 
not expert enough yet to answer this. If someone has an answer for me I'd like 
to hear it :-)

Bye,
bearophile

Reply via email to