On Mon, Nov 17, 2003 at 02:23:50PM +1030, Alan Modra wrote:
> On Sun, Nov 16, 2003 at 09:41:58PM -0500, Albert Cahalan wrote:
> > Using 16-byte stack alignment? More?
> 
> We've had 16 byte stack alignment on ppc64 forever.
> 
> > Keeping .text in the low 2 GB? Other stuff?
> 
> -maddr32 does the text and static data in -2G..2G thing.  Not in
>  mainline gcc because the powerpc maintainers didn't like it.  Anton is
>  messing around with it in the kernel at the moment, but results so far
>  are discouraging.  We're probably hitting some cache problems due to
>  mapping the kernel at -2G as well as 0xc << 60.  Then again, it might
>  be that David Edelsohn was right all along, and -maddr32 doesn't give
>  us any performance increase.
> 
> > Keeping anything in a 48-bit range?
> > (One less opcode than full 64-bit)
> 
> No.  ppc64 loads 64 bit addresses from the TOC.  We don't build up 64
> bit addresses from multiple insns each setting 16 bits at a time.

What does it do about floating point constants?

Recent versions of GCC have an annoying bug which makes many
(but not all) floating-point constant load take 3 instructions 
in 32 bit mode (never tried 64 bit, no hardware):

        li rx,[EMAIL PROTECTED]@
        la ry,[EMAIL PROTECTED](rx)
        lfd frz,0(ry)

I have an older version (2.95.4) which at least managed to
combine the last two instructions as often as possible. Changing 
optimization options between -O2, -O3, and -Os does not fundamentally 
change the result.

IMHO, the compiler should decide to allocate one register to 
point to the constant pool in these cases. But I would not
even know where to start in GCC's code.

Now try with -fpic or -fPIC (also -m relocatable) option and 
try not to vomit when looking at the disgustingly bloated code.

I have appended an example extracted from a rather small function
(which is not a worst case because I use an inlined subroutine
to evaluate polynomials).

> In some ways the TOC is a good thing.  Think of it as a compiler
> generated GOT.  Then realize that the compiler can do better than the
> linker in placing entries for good cache performance.  On other Linux
> architectures, ld generates the GOT via a hash table traversal, which
> means entries are fairly well randomized.
> 
> > Still using "funtion pointers" that aren't?
> 
> I'm not sure what you mean here.

Function descriptors instead of single pointer to the first
instruction I believe. I'm not sure that it's that bad either.

> 
> > Anything left to improve?
> 
> Plenty.  For starters, I'd like to get rid of those pesky dot symbols.
> ie. A ".foo" symbol defined as well as a "foo" symbol for each function


        .file   "ndtri.c"
        .section        .rodata
        .align 3
[snipped]       
        .section        .rodata.cst8,"aM",@progbits,8
        .align 3
.LC0:
        .long   1072409429
        .long   1460674445
        .align 3
.LC1:
        .long   1069634218
        .long   -1547730484
        .align 3
.LC2:
        .long   1074007443
        .long   536225542
        .align 3
.LC3:
        .long   0
        .long   0
        .align 3
.LC4:
        .long   1072693248
        .long   0
        .align 3
.LC5:
        .long   1071644672
        .long   0
        .align 3
.LC6:
        .long   -1073741824
        .long   0
        .align 3
.LC7:
        .long   1075838976
        .long   0
        .align 3
.LC8:
        .long   -1048576
        .long   0
        .align 3
.LC9:
        .long   2146435072
        .long   0
        .align 3
.LC10:
        .long   2146959360
        .long   0
        .section        ".text"
        .align 2
        .globl ndtri
        .type   ndtri, @function
ndtri:
1->     lis 9,[EMAIL PROTECTED]
        stwu 1,-48(1)
2->     la 9,[EMAIL PROTECTED](9)
        stfd 31,40(1)
3->     lfd 0,0(9)
        mflr 0
        stfd 30,32(1)
        fmr 31,1
        fcmpu 0,1,0
        stw 31,28(1)
        stw 0,52(1)
        bng- 0,.L2
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
3->     lfd 30,0(9)
        fcmpu 0,1,30
        bnl- 0,.L2
        lis 9,[EMAIL PROTECTED] # Not that bad here
        lfd 0,[EMAIL PROTECTED](9)
        li 31,1
        fcmpu 0,1,0
        bng- 0,.L5
        fsub 1,30,1
        li 31,0
.L5:
        lis 9,[EMAIL PROTECTED] # Again acceptable
        lfd 0,[EMAIL PROTECTED](9)
        fcmpu 0,1,0
        bng- 0,.L7
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
3->     lfd 0,0(9)
        li 0,4
        mtctr 0
        lis 9,[EMAIL PROTECTED]
        fsub 1,1,0
        lfd 13,[EMAIL PROTECTED](9)
        la 9,[EMAIL PROTECTED](9)
        addi 9,9,8
        fmul 12,1,1
.L55:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L55
        lis 9,[EMAIL PROTECTED]
        fmul 11,12,13
        lfd 0,[EMAIL PROTECTED](9)
        li 0,7
        mtctr 0
        la 9,[EMAIL PROTECTED](9)
        fadd 13,12,0
        addi 9,9,8
.L54:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L54
        fdiv 0,11,13
        lis 9,[EMAIL PROTECTED] # Ok
        fmadd 31,1,0,1
        lfd 0,[EMAIL PROTECTED](9)      # here
        fmul 1,31,0
        b .L1
.L7:
        bl log
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
3->     lfd 0,0(9)
        fmul 1,1,0
        bl sqrt
        fmr 31,1
        bl log
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
        fdiv 1,1,31
3->     lfd 0,0(9)
        fcmpu 0,31,0
        fdiv 12,30,31
        fsub 1,31,1
        bnl- 0,.L19
        li 0,8
        mtctr 0
        lis 9,[EMAIL PROTECTED]
        lfd 13,[EMAIL PROTECTED](9)
        la 9,[EMAIL PROTECTED](9)
        addi 9,9,8
.L53:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L53
        lis 9,[EMAIL PROTECTED]
        fmul 11,12,13
        lfd 0,[EMAIL PROTECTED](9)
        li 0,7
        mtctr 0
        la 9,[EMAIL PROTECTED](9)
        fadd 13,12,0
        addi 9,9,8
.L52:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L52
        b .L56
.L19:
        li 0,8
        mtctr 0
        lis 9,[EMAIL PROTECTED]
        lfd 13,[EMAIL PROTECTED](9)
        la 9,[EMAIL PROTECTED](9)
        addi 9,9,8
.L51:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L51
        lis 9,[EMAIL PROTECTED]
        fmul 11,12,13
        lfd 0,[EMAIL PROTECTED](9)
        li 0,7
        mtctr 0
        la 9,[EMAIL PROTECTED](9)
        fadd 13,12,0
        addi 9,9,8
.L50:
        lfd 0,0(9)
        addi 9,9,8
        fmadd 13,13,12,0
        bdnz .L50
.L56:
        fdiv 0,11,13
        cmpwi 0,31,0
        fsub 31,1,0
        beq- 0,.L42
        fneg 31,31
.L42:
        fmr 1,31
        b .L1
.L2:
        fcmpu 0,31,31
        beq- 0,.L44
        fadd 1,31,31
        b .L1
.L44:
        bl __errno_location
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
3->     lfd 0,0(9)
        fcmpu 0,31,0
        bne+ 0,.L46
        li 0,34
        stw 0,0(3)
#APP
        mtfsb1 3
#NO_APP
        lis 9,[EMAIL PROTECTED]
        la 9,[EMAIL PROTECTED](9)
        b .L57
.L46:
1->     lis 9,[EMAIL PROTECTED]
2->     la 9,[EMAIL PROTECTED](9)
3->     lfd 0,0(9)
        fcmpu 0,31,0
        bne+ 0,.L48
        li 0,34
        stw 0,0(3)
#APP
        mtfsb1 3
#NO_APP
        lis 9,[EMAIL PROTECTED] # See below
        la 9,[EMAIL PROTECTED](9)
        b .L57
.L48:
        li 0,33
        stw 0,0(3)
        lis 3,0x2000
        bl feraiseexcept
        lis 9,[EMAIL PROTECTED] # See below
        la 9,[EMAIL PROTECTED](9)
.L57:
        lfd 1,0(9)      # Ok, returning the value pointed to by r9
# (It may not be the best implementation but it's an error path
# marked unlikely so I don't care very much)
.L1:
        lwz 0,52(1)
        lwz 31,28(1)
        lfd 30,32(1)
        mtlr 0
        lfd 31,40(1)
        addi 1,1,48
        blr
        .size   ndtri, .-ndtri
        .ident  "GCC: (GNU) 3.3.2 20030908 (Debian prerelease)"

Reply via email to