On Thursday, 28 February 2013 at 17:00:51 UTC, Walter Bright wrote:
On 2/28/2013 6:31 AM, Steven Schveighoffer wrote:
If this doesn't translate to the same code, I don't know why not.

Try it and see with your favorite C compiler.

Then try the lookahead cases I also posted.

You are being stubborn here.

You claim that this is significantly faster, but give no numbers.
You claim that the compiler can't do some optimization, when LLVM does it already.

See sample code bellow, which is a typical structure find in lexer.c :

int bar(int n) {
        switch(n) {
                case 0:
                        return 0; // Bad case !
                
                case 25:
                        return 75;
                
                case 42:
                        return 69;
                
                case 666:
                        return 999;
                
                default:
                        return -1;
        }
}

int foo(int n) {
        switch(n) {
                case 1:
                        return 23;
                
                case 2:
                case 3:
                        return n;
                        
                default:
                        return bar(n);
        }
}

As we see, foo don't check for 0, and bar does, but only when necessary, as bar isn't called. Or this is what you think. Let's see how SDC compile this :

define i32 @_D3barFiZi(i32 %arg.n) {
  %n = alloca i32
  store i32 %arg.n, i32* %n
  br label %body

body:                                             ; preds = %0
  %1 = load i32* %n
  switch i32 %1, label %default [
    i32 0, label %.case
    i32 25, label %.case1
    i32 42, label %.case2
    i32 666, label %.case3
  ]

switchstart: ; No predecessors!
  br label %.case

.case: ; preds = %body, %switchstart
  ret i32 0

.case1:                                           ; preds = %body
  ret i32 75

.case2:                                           ; preds = %body
  ret i32 69

.case3:                                           ; preds = %body
  ret i32 999

default:                                          ; preds = %body
  ret i32 -1

switchend: ; No predecessors!
  unreachable
}

define i32 @_D3fooFiZi(i32 %arg.n) {
  %n = alloca i32
  store i32 %arg.n, i32* %n
  br label %body

body:                                             ; preds = %0
  %1 = load i32* %n
  switch i32 %1, label %default [
    i32 1, label %.case
    i32 2, label %.case1
    i32 3, label %.case2
  ]

switchstart: ; No predecessors!
  br label %.case

.case: ; preds = %body, %switchstart
  ret i32 23

.case1:                                           ; preds = %body
  br label %.case2

.case2: ; preds = %body, %.case1
  %2 = load i32* %n
  ret i32 %2

default:                                          ; preds = %body
  %3 = load i32* %n
  %4 = call i32 @_D3barFiZi(i32 %3)
  ret i32 %4

switchend: ; No predecessors!
  unreachable
}

Here is the raw result of the frontend in LLVM IR. Now here is what the optimizer give us :

define i32 @_D3barFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default [
    i32 0, label %.case
    i32 25, label %.case1
    i32 42, label %.case2
    i32 666, label %.case3
  ]

.case: ; preds = %default, %.case3, %.case2, %.case1, %body %merge = phi i32 [ 0, %body ], [ 75, %.case1 ], [ 69, %.case2 ], [ 999, %.case3 ], [ -1, %default ]
  ret i32 %merge

.case1:                                           ; preds = %body
  br label %.case

.case2:                                           ; preds = %body
  br label %.case

.case3:                                           ; preds = %body
  br label %.case

default:                                          ; preds = %body
  br label %.case
}

define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i [
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 0, label %_D3barFiZi.exit
    i32 25, label %.case1.i
    i32 42, label %.case2.i
    i32 666, label %.case3.i
  ]

.case: ; preds = %body, %_D3barFiZi.exit, %.case2 %merge = phi i32 [ 23, %body ], [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ]
  ret i32 %merge

.case2: ; preds = %body, %body
  br label %.case

.case1.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case2.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
  br label %_D3barFiZi.exit

_D3barFiZi.exit: ; preds = %body, %.case1.i, %.case2.i, %.case3.i, %default.i %merge.i = phi i32 [ 75, %.case1.i ], [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 0, %body ]
  br label %.case
}

See, foo don't even call bar anymore. No let's include the null check, as it would have been done if checking for empty on a regular range (that happen to internally use a sentinel) :

int foo(int n) {
        if(n != 0) {
                switch(n) {
                        case 1:
                                return 23;
                
                        case 2:
                        case 3:
                                return n;
                        
                        default:
                                return bar(n);
                }
        }
        
        return bar(n);
}

I didn't repeated bar as it doesn't change.

Which is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i [
    i32 0, label %_D3barFiZi.exit8
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 666, label %.case3.i
    i32 25, label %_D3barFiZi.exit
    i32 42, label %.case2.i
  ]

.case: ; preds = %body, %_D3barFiZi.exit8, %_D3barFiZi.exit, %.case2 %merge = phi i32 [ %arg.n, %.case2 ], [ %merge.i, %_D3barFiZi.exit ], [ 0, %_D3barFiZi.exit8 ], [ 23, %body ]
  ret i32 %merge

.case2: ; preds = %body, %body
  br label %.case

.case2.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

.case3.i:                                         ; preds = %body
  br label %_D3barFiZi.exit

default.i:                                        ; preds = %body
  br label %_D3barFiZi.exit

_D3barFiZi.exit: ; preds = %body, %.case2.i, %.case3.i, %default.i %merge.i = phi i32 [ 69, %.case2.i ], [ 999, %.case3.i ], [ -1, %default.i ], [ 75, %body ]
  br label %.case

_D3barFiZi.exit8:                                 ; preds = %body
  br label %.case
}

The exact damn same thing ! Let's try with a different schemes, we check first an early return :

int foo(int n) {
        if(n == 0) {
                return bar(n);  
        }
        
        switch(n) {
                case 1:
                        return 23;
        
                case 2:
                case 3:
                        return n;
                
                default:
                        return bar(n);
        }
}

Guess what ? It is optimized as :
define i32 @_D3fooFiZi(i32 %arg.n) nounwind readnone {
body:
  switch i32 %arg.n, label %default.i7 [
    i32 0, label %_D3barFiZi.exit
    i32 1, label %.case
    i32 2, label %.case2
    i32 3, label %.case2
    i32 666, label %.case3.i6
    i32 25, label %_D3barFiZi.exit8
    i32 42, label %.case2.i5
  ]

_D3barFiZi.exit: ; preds = %body, %_D3barFiZi.exit8, %.case2, %.case %merge9 = phi i32 [ 0, %body ], [ 23, %.case ], [ %arg.n, %.case2 ], [ %merge.i3, %_D3barFiZi.exit8 ]
  ret i32 %merge9

.case:                                            ; preds = %body
  br label %_D3barFiZi.exit

.case2: ; preds = %body, %body
  br label %_D3barFiZi.exit

.case2.i5:                                        ; preds = %body
  br label %_D3barFiZi.exit8

.case3.i6:                                        ; preds = %body
  br label %_D3barFiZi.exit8

default.i7:                                       ; preds = %body
  br label %_D3barFiZi.exit8

_D3barFiZi.exit8: ; preds = %body, %.case2.i5, %.case3.i6, %default.i7 %merge.i3 = phi i32 [ 69, %.case2.i5 ], [ 999, %.case3.i6 ], [ -1, %default.i7 ], [ 75, %body ]
  br label %_D3barFiZi.exit
}

Yet again the same thing.

A range using a sentinel internally is clearly beneficial for a lexer (like C strings). Defining explicitly a range that does so is plain useless.

Reply via email to