On Thu, Oct 13, 2011 at 11:43:35AM -0700, Richard Henderson wrote: > On 10/13/2011 11:36 AM, David Edelsohn wrote: > > Are there testcases in the GCC testsuite that exercise these patterns? > > I thought the vectorizer would use them. E.g. gcc.dg/vect/vect-shift-3.c. > > I see that I should have added ppc to > check_effective_target_vect_shift_scalar, > though, to enable even more testing.
I tried this patch on trunk, and I'm not seeing any changes in the code. I'll include the test case and asm as attachments. This is due to the code I put into tree-vect-generic.c (in expand_vector_operations_1) that converts between vector shift by vector and vector shift by scalar. Note, that AMD's XOP shifts are also vector/vector shifts. The code shifting by a scalar is pretty bad in that it recalcuates the splat of the shift element every time in the loop, rather than doing the splat once before the loop. We also have the problem we've had for a couple of years that if the type is signed char or signed short, the compiler wants to promote the items to int and does this by several unpacks and repacks. -- Michael Meissner, IBM 5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA meiss...@linux.vnet.ibm.com fax +1 (978) 399-6899
#include <stddef.h> #include <stdlib.h> #ifndef UNS #define UNS #endif #ifndef TYPE #define TYPE int #endif #ifndef SIZE #define SIZE 1024 #endif #ifndef ALIGN #define ALIGN 32 #endif UNS TYPE a[SIZE] __attribute__((__aligned__(ALIGN))); UNS TYPE b[SIZE] __attribute__((__aligned__(ALIGN))); UNS TYPE c[SIZE] __attribute__((__aligned__(ALIGN))); void shift_left_1 (void) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] << 1; } void shift_left_2 (void) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] << 2; } void shift_left_scalar (int n) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] << n; } void shift_left_vect (int n) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] << c[i]; } void shift_right_1 (void) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] >> 1; } void shift_right_2 (void) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] >> 2; } void shift_right_scalar (int n) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] >> n; } void shift_right_vect (int n) { size_t i; for (i = 0; i < SIZE; i++) a[i] = b[i] >> c[i]; }
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc a[TC],a .LC2: .tc b[TC],b .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltisw 1,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 5,,31 .L2: lxvw4x 45,11,9 vslw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltisw 1,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 5,,31 .L6: lxvw4x 45,11,9 vslw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC1 .set .LC6,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc000 mtvrsave 0 li 8,256 mtctr 8 addis 11,2,.LC5@toc@ha addis 9,2,.LC6@toc@ha ld 10,.LC5@toc@l(11) li 0,48 ld 11,.LC6@toc@l(9) li 9,0 .p2align 4,,15 .L9: stw 3,-32(1) addi 8,1,-80 lvewx 0,8,0 xxspltw 32,32,0 lxvw4x 33,11,9 vslw 0,1,0 stxvw4x 32,10,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC7,.LC1 .set .LC8,.LC2 .LC9: .tc c[TC],c .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 10,2,.LC7@toc@ha addis 11,2,.LC8@toc@ha addis 9,2,.LC9@toc@ha ld 8,.LC7@toc@l(10) ld 10,.LC8@toc@l(11) ld 11,.LC9@toc@l(9) li 9,0 .p2align 5,,31 .L12: lxvw4x 33,10,9 lxvw4x 45,11,9 vslw 0,1,13 stxvw4x 32,8,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC10,.LC1 .set .LC11,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC10@toc@ha addis 9,2,.LC11@toc@ha ld 10,.LC10@toc@l(11) vspltisw 1,1 ld 11,.LC11@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsraw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC12,.LC1 .set .LC13,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC12@toc@ha addis 9,2,.LC13@toc@ha ld 10,.LC12@toc@l(11) vspltisw 1,2 ld 11,.LC13@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsraw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC14,.LC1 .set .LC15,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc000 mtvrsave 0 li 8,256 mtctr 8 addis 11,2,.LC14@toc@ha addis 9,2,.LC15@toc@ha ld 10,.LC14@toc@l(11) li 0,48 ld 11,.LC15@toc@l(9) li 9,0 .p2align 4,,15 .L21: stw 3,-32(1) addi 8,1,-80 lvewx 0,8,0 xxspltw 32,32,0 lxvw4x 33,11,9 vsraw 0,1,0 stxvw4x 32,10,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC16,.LC1 .set .LC17,.LC2 .set .LC18,.LC9 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 10,2,.LC16@toc@ha addis 11,2,.LC17@toc@ha addis 9,2,.LC18@toc@ha ld 8,.LC16@toc@l(10) ld 10,.LC17@toc@l(11) ld 11,.LC18@toc@l(9) li 9,0 .p2align 5,,31 .L24: lxvw4x 33,10,9 lxvw4x 45,11,9 vsraw 0,1,13 stxvw4x 32,8,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,4096,32 .comm b,4096,32 .comm a,4096,32 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc a[TC],a .LC2: .tc b[TC],b .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltisw 1,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 5,,31 .L2: lxvw4x 45,11,9 vslw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltisw 1,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 5,,31 .L6: lxvw4x 45,11,9 vslw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC1 .set .LC6,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc000 mtvrsave 0 li 8,256 mtctr 8 addis 11,2,.LC5@toc@ha addis 9,2,.LC6@toc@ha ld 10,.LC5@toc@l(11) li 0,48 ld 11,.LC6@toc@l(9) li 9,0 .p2align 4,,15 .L9: stw 3,-32(1) addi 8,1,-80 lvewx 0,8,0 xxspltw 32,32,0 lxvw4x 33,11,9 vslw 0,1,0 stxvw4x 32,10,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC7,.LC1 .set .LC8,.LC2 .LC9: .tc c[TC],c .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 10,2,.LC7@toc@ha addis 11,2,.LC8@toc@ha addis 9,2,.LC9@toc@ha ld 8,.LC7@toc@l(10) ld 10,.LC8@toc@l(11) ld 11,.LC9@toc@l(9) li 9,0 .p2align 5,,31 .L12: lxvw4x 33,10,9 lxvw4x 45,11,9 vslw 0,1,13 stxvw4x 32,8,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC10,.LC1 .set .LC11,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC10@toc@ha addis 9,2,.LC11@toc@ha ld 10,.LC10@toc@l(11) vspltisw 1,1 ld 11,.LC11@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsrw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC12,.LC1 .set .LC13,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 11,2,.LC12@toc@ha addis 9,2,.LC13@toc@ha ld 10,.LC12@toc@l(11) vspltisw 1,2 ld 11,.LC13@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsrw 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC14,.LC1 .set .LC15,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc000 mtvrsave 0 li 8,256 mtctr 8 addis 11,2,.LC14@toc@ha addis 9,2,.LC15@toc@ha ld 10,.LC14@toc@l(11) li 0,48 ld 11,.LC15@toc@l(9) li 9,0 .p2align 4,,15 .L21: stw 3,-32(1) addi 8,1,-80 lvewx 0,8,0 xxspltw 32,32,0 lxvw4x 33,11,9 vsrw 0,1,0 stxvw4x 32,10,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC16,.LC1 .set .LC17,.LC2 .set .LC18,.LC9 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,256 mtctr 0 addis 10,2,.LC16@toc@ha addis 11,2,.LC17@toc@ha addis 9,2,.LC18@toc@ha ld 8,.LC16@toc@l(10) ld 10,.LC17@toc@l(11) ld 11,.LC18@toc@l(9) li 9,0 .p2align 5,,31 .L24: lxvw4x 33,10,9 lxvw4x 45,11,9 vsrw 0,1,13 stxvw4x 32,8,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,4096,32 .comm b,4096,32 .comm a,4096,32 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc b[TC],b .LC2: .tc a[TC],a .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltisw 1,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 4,,15 .L2: lxvw4x 32,10,9 vupkhsh 13,0 vupklsh 0,0 vslw 13,13,1 vslw 0,0,1 vpkuwum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltisw 1,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 4,,15 .L6: lxvw4x 32,10,9 vupkhsh 13,0 vupklsh 0,0 vslw 13,13,1 vslw 0,0,1 vpkuwum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC1 .set .LC6,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc00c mtvrsave 0 li 7,128 mtctr 7 addis 11,2,.LC5@toc@ha addis 9,2,.LC6@toc@ha ld 10,.LC5@toc@l(11) li 8,48 ld 11,.LC6@toc@l(9) li 0,64 li 9,0 .p2align 4,,15 .L9: stw 3,-48(1) stw 3,-32(1) addi 7,1,-96 lvewx 1,7,8 lvewx 0,7,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 45,10,9 vupkhsh 12,13 vupklsh 13,13 vslw 1,12,1 vslw 0,13,0 vpkuwum 0,1,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC7,.LC1 .LC8: .tc c[TC],c .set .LC9,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc00c mtvrsave 0 li 0,128 mtctr 0 addis 10,2,.LC7@toc@ha addis 11,2,.LC8@toc@ha addis 9,2,.LC9@toc@ha ld 8,.LC7@toc@l(10) ld 10,.LC8@toc@l(11) ld 11,.LC9@toc@l(9) li 9,0 .p2align 4,,15 .L12: lxvw4x 33,8,9 lxvw4x 32,10,9 vupkhsh 12,1 vupkhsh 13,0 vupklsh 1,1 vupklsh 0,0 vslw 13,12,13 vslw 0,1,0 vpkuwum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC10,.LC2 .set .LC11,.LC1 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC10@toc@ha addis 9,2,.LC11@toc@ha ld 10,.LC10@toc@l(11) vspltish 1,1 ld 11,.LC11@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsrah 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC12,.LC2 .set .LC13,.LC1 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC12@toc@ha addis 9,2,.LC13@toc@ha ld 10,.LC12@toc@l(11) vspltish 1,2 ld 11,.LC13@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsrah 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC14,.LC1 .set .LC15,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc00c mtvrsave 0 li 7,128 mtctr 7 addis 11,2,.LC14@toc@ha addis 9,2,.LC15@toc@ha ld 10,.LC14@toc@l(11) li 8,48 ld 11,.LC15@toc@l(9) li 0,64 li 9,0 .p2align 4,,15 .L21: stw 3,-48(1) stw 3,-32(1) addi 7,1,-96 lvewx 1,7,8 lvewx 0,7,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 45,10,9 vupkhsh 12,13 vupklsh 13,13 vsraw 1,12,1 vsraw 0,13,0 vpkuwum 0,1,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC16,.LC1 .set .LC17,.LC8 .set .LC18,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc00c mtvrsave 0 li 0,128 mtctr 0 addis 10,2,.LC16@toc@ha addis 11,2,.LC17@toc@ha addis 9,2,.LC18@toc@ha ld 8,.LC16@toc@l(10) ld 10,.LC17@toc@l(11) ld 11,.LC18@toc@l(9) li 9,0 .p2align 4,,15 .L24: lxvw4x 33,8,9 lxvw4x 32,10,9 vupkhsh 12,1 vupkhsh 13,0 vupklsh 1,1 vupklsh 0,0 vsraw 13,12,13 vsraw 0,1,0 vpkuwum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,2048,32 .comm b,2048,32 .comm a,2048,32 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc a[TC],a .LC2: .tc b[TC],b .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltish 1,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 5,,31 .L2: lxvw4x 45,11,9 vslh 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltish 1,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 5,,31 .L6: lxvw4x 45,11,9 vslh 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC2 .set .LC6,.LC1 .LC8: .tc .LC7[TC],.LC7 .LC10: .tc .LC9[TC],.LC9 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc07c mtvrsave 0 li 7,128 mtctr 7 addis 11,2,.LC8@toc@ha addis 9,2,.LC10@toc@ha ld 11,.LC8@toc@l(11) addis 10,2,.LC5@toc@ha ld 9,.LC10@toc@l(9) vspltisw 12,0 addis 8,2,.LC6@toc@ha ld 10,.LC5@toc@l(10) lxvw4x 41,0,11 li 0,64 ld 11,.LC6@toc@l(8) li 8,48 lxvw4x 42,0,9 li 9,0 .p2align 4,,15 .L9: stw 3,-48(1) stw 3,-32(1) addi 7,1,-96 lvewx 1,7,8 lvewx 0,7,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 45,10,9 vperm 11,13,12,9 vperm 13,13,12,10 vslw 1,11,1 vslw 0,13,0 vpkuwum 0,1,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC11,.LC2 .LC12: .tc c[TC],c .set .LC13,.LC1 .set .LC14,.LC8 .set .LC15,.LC10 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc07c mtvrsave 0 addis 11,2,.LC14@toc@ha li 0,128 ld 11,.LC14@toc@l(11) addis 9,2,.LC15@toc@ha mtctr 0 ld 9,.LC15@toc@l(9) addis 8,2,.LC11@toc@ha addis 10,2,.LC12@toc@ha addis 7,2,.LC13@toc@ha lxvw4x 43,0,11 ld 8,.LC11@toc@l(8) vspltisw 0,0 ld 10,.LC12@toc@l(10) ld 11,.LC13@toc@l(7) lxvw4x 44,0,9 li 9,0 .p2align 4,,15 .L12: lxvw4x 45,8,9 lxvw4x 33,10,9 vperm 9,13,0,11 vperm 10,1,0,11 vperm 13,13,0,12 vperm 1,1,0,12 vslw 10,9,10 vslw 1,13,1 vpkuwum 1,10,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC16,.LC1 .set .LC17,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC16@toc@ha addis 9,2,.LC17@toc@ha ld 10,.LC16@toc@l(11) vspltish 1,1 ld 11,.LC17@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsrh 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC18,.LC1 .set .LC19,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,128 mtctr 0 addis 11,2,.LC18@toc@ha addis 9,2,.LC19@toc@ha ld 10,.LC18@toc@l(11) vspltish 1,2 ld 11,.LC19@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsrh 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC20,.LC2 .set .LC21,.LC1 .set .LC22,.LC8 .set .LC23,.LC10 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc07c mtvrsave 0 li 7,128 mtctr 7 addis 11,2,.LC22@toc@ha addis 9,2,.LC23@toc@ha ld 11,.LC22@toc@l(11) addis 10,2,.LC20@toc@ha ld 9,.LC23@toc@l(9) vspltisw 12,0 addis 8,2,.LC21@toc@ha ld 10,.LC20@toc@l(10) lxvw4x 41,0,11 li 0,64 ld 11,.LC21@toc@l(8) li 8,48 lxvw4x 42,0,9 li 9,0 .p2align 4,,15 .L21: stw 3,-48(1) stw 3,-32(1) addi 7,1,-96 lvewx 1,7,8 lvewx 0,7,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 45,10,9 vperm 11,13,12,9 vperm 13,13,12,10 vsraw 1,11,1 vsraw 0,13,0 vpkuwum 0,1,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC24,.LC2 .set .LC25,.LC12 .set .LC26,.LC1 .set .LC27,.LC8 .set .LC28,.LC10 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc07c mtvrsave 0 addis 11,2,.LC27@toc@ha li 0,128 ld 11,.LC27@toc@l(11) addis 9,2,.LC28@toc@ha mtctr 0 ld 9,.LC28@toc@l(9) addis 8,2,.LC24@toc@ha addis 10,2,.LC25@toc@ha addis 7,2,.LC26@toc@ha lxvw4x 43,0,11 ld 8,.LC24@toc@l(8) vspltisw 0,0 ld 10,.LC25@toc@l(10) ld 11,.LC26@toc@l(7) lxvw4x 44,0,9 li 9,0 .p2align 4,,15 .L24: lxvw4x 45,8,9 lxvw4x 33,10,9 vperm 9,13,0,11 vperm 10,1,0,11 vperm 13,13,0,12 vperm 1,1,0,12 vsraw 10,9,10 vsraw 1,13,1 vpkuwum 1,10,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,2048,32 .comm b,2048,32 .comm a,2048,32 .section .rodata.cst16,"aM",@progbits,16 .align 4 .LC7: .byte 16 .byte 17 .byte 0 .byte 1 .byte 16 .byte 17 .byte 2 .byte 3 .byte 16 .byte 17 .byte 4 .byte 5 .byte 16 .byte 17 .byte 6 .byte 7 .LC9: .byte 16 .byte 17 .byte 8 .byte 9 .byte 16 .byte 17 .byte 10 .byte 11 .byte 16 .byte 17 .byte 12 .byte 13 .byte 16 .byte 17 .byte 14 .byte 15 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc b[TC],b .LC2: .tc a[TC],a .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc01c mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltisw 0,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 4,,15 .L2: lxvw4x 33,10,9 vupkhsb 13,1 vupklsb 1,1 vupkhsh 11,13 vupkhsh 12,1 vupklsh 13,13 vupklsh 1,1 vslw 11,11,0 vslw 13,13,0 vslw 12,12,0 vslw 1,1,0 vpkuwum 13,11,13 vpkuwum 1,12,1 vpkuhum 1,13,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc01c mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltisw 0,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 4,,15 .L6: lxvw4x 33,10,9 vupkhsb 13,1 vupklsb 1,1 vupkhsh 11,13 vupkhsh 12,1 vupklsh 13,13 vupklsh 1,1 vslw 11,11,0 vslw 13,13,0 vslw 12,12,0 vslw 1,1,0 vpkuwum 13,11,13 vpkuwum 1,12,1 vpkuhum 1,13,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC1 .set .LC6,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc0fc mtvrsave 0 li 7,64 mtctr 7 addis 11,2,.LC5@toc@ha addis 9,2,.LC6@toc@ha ld 10,.LC5@toc@l(11) li 6,48 ld 11,.LC6@toc@l(9) li 8,80 li 9,0 li 0,96 .p2align 4,,15 .L9: stw 3,-80(1) stw 3,-64(1) stw 3,-48(1) stw 3,-32(1) addi 5,1,-128 lvewx 12,5,6 lvewx 13,5,7 lvewx 1,5,8 lvewx 0,5,0 xxspltw 44,44,0 xxspltw 45,45,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 43,10,9 vupkhsb 10,11 vupklsb 11,11 vupkhsh 8,10 vupkhsh 9,11 vupklsh 10,10 vupklsh 11,11 vslw 12,8,12 vslw 13,10,13 vslw 1,9,1 vslw 0,11,0 vpkuwum 13,12,13 vpkuwum 0,1,0 vpkuhum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC7,.LC1 .LC8: .tc c[TC],c .set .LC9,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc0fc mtvrsave 0 li 0,64 mtctr 0 addis 10,2,.LC7@toc@ha addis 11,2,.LC8@toc@ha addis 9,2,.LC9@toc@ha ld 8,.LC7@toc@l(10) ld 10,.LC8@toc@l(11) ld 11,.LC9@toc@l(9) li 9,0 .p2align 4,,15 .L12: lxvw4x 33,8,9 lxvw4x 32,10,9 vupkhsb 12,1 vupkhsb 13,0 vupklsb 1,1 vupklsb 0,0 vupkhsh 8,12 vupkhsh 10,13 vupkhsh 9,1 vupkhsh 11,0 vupklsh 12,12 vupklsh 13,13 vupklsh 1,1 vupklsh 0,0 vslw 10,8,10 vslw 13,12,13 vslw 11,9,11 vslw 0,1,0 vpkuwum 13,10,13 vpkuwum 0,11,0 vpkuhum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC10,.LC2 .set .LC11,.LC1 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC10@toc@ha addis 9,2,.LC11@toc@ha ld 10,.LC10@toc@l(11) vspltisb 1,1 ld 11,.LC11@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsrab 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC12,.LC2 .set .LC13,.LC1 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC12@toc@ha addis 9,2,.LC13@toc@ha ld 10,.LC12@toc@l(11) vspltisb 1,2 ld 11,.LC13@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsrab 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC14,.LC1 .set .LC15,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc0fc mtvrsave 0 li 7,64 mtctr 7 addis 11,2,.LC14@toc@ha addis 9,2,.LC15@toc@ha ld 10,.LC14@toc@l(11) li 6,48 ld 11,.LC15@toc@l(9) li 8,80 li 9,0 li 0,96 .p2align 4,,15 .L21: stw 3,-80(1) stw 3,-64(1) stw 3,-48(1) stw 3,-32(1) addi 5,1,-128 lvewx 12,5,6 lvewx 13,5,7 lvewx 1,5,8 lvewx 0,5,0 xxspltw 44,44,0 xxspltw 45,45,0 xxspltw 33,33,0 xxspltw 32,32,0 lxvw4x 43,10,9 vupkhsb 10,11 vupklsb 11,11 vupkhsh 8,10 vupkhsh 9,11 vupklsh 10,10 vupklsh 11,11 vsraw 12,8,12 vsraw 13,10,13 vsraw 1,9,1 vsraw 0,11,0 vpkuwum 13,12,13 vpkuwum 0,1,0 vpkuhum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC16,.LC1 .set .LC17,.LC8 .set .LC18,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc0fc mtvrsave 0 li 0,64 mtctr 0 addis 10,2,.LC16@toc@ha addis 11,2,.LC17@toc@ha addis 9,2,.LC18@toc@ha ld 8,.LC16@toc@l(10) ld 10,.LC17@toc@l(11) ld 11,.LC18@toc@l(9) li 9,0 .p2align 4,,15 .L24: lxvw4x 33,8,9 lxvw4x 32,10,9 vupkhsb 12,1 vupkhsb 13,0 vupklsb 1,1 vupklsb 0,0 vupkhsh 8,12 vupkhsh 10,13 vupkhsh 9,1 vupkhsh 11,0 vupklsh 12,12 vupklsh 13,13 vupklsh 1,1 vupklsh 0,0 vsraw 10,8,10 vsraw 13,12,13 vsraw 11,9,11 vsraw 0,1,0 vpkuwum 13,10,13 vpkuwum 0,11,0 vpkuhum 0,13,0 stxvw4x 32,11,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,1024,32 .comm b,1024,32 .comm a,1024,32 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c" .section ".toc","aw" .section ".text" .section ".toc","aw" .LC1: .tc a[TC],a .LC2: .tc b[TC],b .section ".text" .align 2 .p2align 4,,15 .globl shift_left_1 .section ".opd","aw" .align 3 shift_left_1: .quad .L.shift_left_1,.TOC.@tocbase,0 .previous .type shift_left_1, @function .L.shift_left_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC1@toc@ha addis 9,2,.LC2@toc@ha ld 10,.LC1@toc@l(11) vspltisb 1,1 ld 11,.LC2@toc@l(9) li 9,0 .p2align 5,,31 .L2: lxvw4x 45,11,9 vslb 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L2 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_1,.-.L.shift_left_1 .section ".toc","aw" .set .LC3,.LC1 .set .LC4,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_2 .section ".opd","aw" .align 3 shift_left_2: .quad .L.shift_left_2,.TOC.@tocbase,0 .previous .type shift_left_2, @function .L.shift_left_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC3@toc@ha addis 9,2,.LC4@toc@ha ld 10,.LC3@toc@l(11) vspltisb 1,2 ld 11,.LC4@toc@l(9) li 9,0 .p2align 5,,31 .L6: lxvw4x 45,11,9 vslb 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L6 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_2,.-.L.shift_left_2 .section ".toc","aw" .set .LC5,.LC2 .LC7: .tc .LC6[TC],.LC6 .LC9: .tc .LC8[TC],.LC8 .set .LC10,.LC1 .LC12: .tc .LC11[TC],.LC11 .LC14: .tc .LC13[TC],.LC13 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_scalar .section ".opd","aw" .align 3 shift_left_scalar: .quad .L.shift_left_scalar,.TOC.@tocbase,0 .previous .type shift_left_scalar, @function .L.shift_left_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xfffc mtvrsave 0 addis 11,2,.LC12@toc@ha ld 11,.LC12@toc@l(11) addis 7,2,.LC10@toc@ha addis 8,2,.LC7@toc@ha addis 10,2,.LC9@toc@ha addis 9,2,.LC14@toc@ha ld 8,.LC7@toc@l(8) lxvw4x 39,0,11 ld 11,.LC10@toc@l(7) li 7,64 addis 6,2,.LC5@toc@ha mtctr 7 ld 10,.LC9@toc@l(10) ld 9,.LC14@toc@l(9) li 0,96 vspltish 6,0 lxvw4x 34,0,8 li 8,80 vspltisw 0,0 lxvw4x 35,0,10 lxvw4x 40,0,9 ld 10,.LC5@toc@l(6) li 9,0 li 6,48 .p2align 4,,15 .L9: stw 3,-80(1) stw 3,-64(1) stw 3,-48(1) stw 3,-32(1) addi 5,1,-128 lvewx 11,5,6 lvewx 12,5,7 lvewx 13,5,8 lvewx 1,5,0 xxspltw 43,43,0 xxspltw 44,44,0 xxspltw 45,45,0 xxspltw 33,33,0 lxvw4x 42,10,9 vperm 9,10,6,2 vperm 10,10,6,3 vperm 4,9,0,7 vperm 5,10,0,7 vperm 9,9,0,8 vperm 10,10,0,8 vslw 11,4,11 vslw 12,9,12 vslw 13,5,13 vslw 1,10,1 vpkuwum 12,11,12 vpkuwum 1,13,1 vpkuhum 1,12,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L9 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_scalar,.-.L.shift_left_scalar .section ".toc","aw" .set .LC15,.LC2 .set .LC16,.LC7 .set .LC17,.LC9 .LC18: .tc c[TC],c .set .LC19,.LC1 .set .LC20,.LC12 .set .LC21,.LC14 .section ".text" .align 2 .p2align 4,,15 .globl shift_left_vect .section ".opd","aw" .align 3 shift_left_vect: .quad .L.shift_left_vect,.TOC.@tocbase,0 .previous .type shift_left_vect, @function .L.shift_left_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xfffc mtvrsave 0 addis 8,2,.LC16@toc@ha addis 10,2,.LC17@toc@ha addis 11,2,.LC20@toc@ha ld 8,.LC16@toc@l(8) li 0,64 ld 10,.LC17@toc@l(10) ld 11,.LC20@toc@l(11) addis 9,2,.LC21@toc@ha mtctr 0 ld 9,.LC21@toc@l(9) addis 5,2,.LC15@toc@ha lxvw4x 38,0,8 addis 6,2,.LC18@toc@ha addis 7,2,.LC19@toc@ha lxvw4x 39,0,10 lxvw4x 45,0,11 vspltish 12,0 ld 8,.LC15@toc@l(5) ld 10,.LC18@toc@l(6) vspltisw 0,0 ld 11,.LC19@toc@l(7) lxvw4x 33,0,9 li 9,0 .p2align 4,,15 .L12: lxvw4x 42,8,9 lxvw4x 43,10,9 vperm 8,10,12,6 vperm 9,11,12,6 vperm 10,10,12,7 vperm 11,11,12,7 vperm 2,8,0,13 vperm 4,9,0,13 vperm 3,10,0,13 vperm 5,11,0,13 vperm 8,8,0,1 vperm 9,9,0,1 vperm 10,10,0,1 vperm 11,11,0,1 vslw 4,2,4 vslw 9,8,9 vslw 5,3,5 vslw 11,10,11 vpkuwum 9,4,9 vpkuwum 11,5,11 vpkuhum 11,9,11 stxvw4x 43,11,9 addi 9,9,16 bdnz .L12 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_left_vect,.-.L.shift_left_vect .section ".toc","aw" .set .LC22,.LC1 .set .LC23,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_1 .section ".opd","aw" .align 3 shift_right_1: .quad .L.shift_right_1,.TOC.@tocbase,0 .previous .type shift_right_1, @function .L.shift_right_1: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC22@toc@ha addis 9,2,.LC23@toc@ha ld 10,.LC22@toc@l(11) vspltisb 1,1 ld 11,.LC23@toc@l(9) li 9,0 .p2align 5,,31 .L15: lxvw4x 45,11,9 vsrb 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L15 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_1,.-.L.shift_right_1 .section ".toc","aw" .set .LC24,.LC1 .set .LC25,.LC2 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_2 .section ".opd","aw" .align 3 shift_right_2: .quad .L.shift_right_2,.TOC.@tocbase,0 .previous .type shift_right_2, @function .L.shift_right_2: mfvrsave 0 stw 0,-4(1) oris 0,0,0xc004 mtvrsave 0 li 0,64 mtctr 0 addis 11,2,.LC24@toc@ha addis 9,2,.LC25@toc@ha ld 10,.LC24@toc@l(11) vspltisb 1,2 ld 11,.LC25@toc@l(9) li 9,0 .p2align 5,,31 .L18: lxvw4x 45,11,9 vsrb 0,13,1 stxvw4x 32,10,9 addi 9,9,16 bdnz .L18 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_2,.-.L.shift_right_2 .section ".toc","aw" .set .LC26,.LC2 .set .LC27,.LC7 .set .LC28,.LC9 .set .LC29,.LC1 .set .LC30,.LC12 .set .LC31,.LC14 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_scalar .section ".opd","aw" .align 3 shift_right_scalar: .quad .L.shift_right_scalar,.TOC.@tocbase,0 .previous .type shift_right_scalar, @function .L.shift_right_scalar: mfvrsave 0 stw 0,-4(1) oris 0,0,0xfffc mtvrsave 0 addis 11,2,.LC30@toc@ha ld 11,.LC30@toc@l(11) addis 7,2,.LC29@toc@ha addis 8,2,.LC27@toc@ha addis 10,2,.LC28@toc@ha addis 9,2,.LC31@toc@ha ld 8,.LC27@toc@l(8) lxvw4x 39,0,11 ld 11,.LC29@toc@l(7) li 7,64 addis 6,2,.LC26@toc@ha mtctr 7 ld 10,.LC28@toc@l(10) ld 9,.LC31@toc@l(9) li 0,96 vspltish 6,0 lxvw4x 34,0,8 li 8,80 vspltisw 0,0 lxvw4x 35,0,10 lxvw4x 40,0,9 ld 10,.LC26@toc@l(6) li 9,0 li 6,48 .p2align 4,,15 .L21: stw 3,-80(1) stw 3,-64(1) stw 3,-48(1) stw 3,-32(1) addi 5,1,-128 lvewx 11,5,6 lvewx 12,5,7 lvewx 13,5,8 lvewx 1,5,0 xxspltw 43,43,0 xxspltw 44,44,0 xxspltw 45,45,0 xxspltw 33,33,0 lxvw4x 42,10,9 vperm 9,10,6,2 vperm 10,10,6,3 vperm 4,9,0,7 vperm 5,10,0,7 vperm 9,9,0,8 vperm 10,10,0,8 vsraw 11,4,11 vsraw 12,9,12 vsraw 13,5,13 vsraw 1,10,1 vpkuwum 12,11,12 vpkuwum 1,13,1 vpkuhum 1,12,1 stxvw4x 33,11,9 addi 9,9,16 bdnz .L21 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_scalar,.-.L.shift_right_scalar .section ".toc","aw" .set .LC32,.LC2 .set .LC33,.LC7 .set .LC34,.LC9 .set .LC35,.LC18 .set .LC36,.LC1 .set .LC37,.LC12 .set .LC38,.LC14 .section ".text" .align 2 .p2align 4,,15 .globl shift_right_vect .section ".opd","aw" .align 3 shift_right_vect: .quad .L.shift_right_vect,.TOC.@tocbase,0 .previous .type shift_right_vect, @function .L.shift_right_vect: mfvrsave 0 stw 0,-4(1) oris 0,0,0xfffc mtvrsave 0 addis 8,2,.LC33@toc@ha addis 10,2,.LC34@toc@ha addis 11,2,.LC37@toc@ha ld 8,.LC33@toc@l(8) li 0,64 ld 10,.LC34@toc@l(10) ld 11,.LC37@toc@l(11) addis 9,2,.LC38@toc@ha mtctr 0 ld 9,.LC38@toc@l(9) addis 5,2,.LC32@toc@ha lxvw4x 38,0,8 addis 6,2,.LC35@toc@ha addis 7,2,.LC36@toc@ha lxvw4x 39,0,10 lxvw4x 45,0,11 vspltish 12,0 ld 8,.LC32@toc@l(5) ld 10,.LC35@toc@l(6) vspltisw 0,0 ld 11,.LC36@toc@l(7) lxvw4x 33,0,9 li 9,0 .p2align 4,,15 .L24: lxvw4x 42,8,9 lxvw4x 43,10,9 vperm 8,10,12,6 vperm 9,11,12,6 vperm 10,10,12,7 vperm 11,11,12,7 vperm 2,8,0,13 vperm 4,9,0,13 vperm 3,10,0,13 vperm 5,11,0,13 vperm 8,8,0,1 vperm 9,9,0,1 vperm 10,10,0,1 vperm 11,11,0,1 vsraw 4,2,4 vsraw 9,8,9 vsraw 5,3,5 vsraw 11,10,11 vpkuwum 9,4,9 vpkuwum 11,5,11 vpkuhum 11,9,11 stxvw4x 43,11,9 addi 9,9,16 bdnz .L24 lwz 12,-4(1) mtvrsave 12 blr .long 0 .byte 0,0,0,0,0,0,0,0 .size shift_right_vect,.-.L.shift_right_vect .comm c,1024,32 .comm b,1024,32 .comm a,1024,32 .section .rodata.cst16,"aM",@progbits,16 .align 4 .LC6: .byte 16 .byte 0 .byte 16 .byte 1 .byte 16 .byte 2 .byte 16 .byte 3 .byte 16 .byte 4 .byte 16 .byte 5 .byte 16 .byte 6 .byte 16 .byte 7 .LC8: .byte 16 .byte 8 .byte 16 .byte 9 .byte 16 .byte 10 .byte 16 .byte 11 .byte 16 .byte 12 .byte 16 .byte 13 .byte 16 .byte 14 .byte 16 .byte 15 .LC11: .byte 16 .byte 17 .byte 0 .byte 1 .byte 16 .byte 17 .byte 2 .byte 3 .byte 16 .byte 17 .byte 4 .byte 5 .byte 16 .byte 17 .byte 6 .byte 7 .LC13: .byte 16 .byte 17 .byte 8 .byte 9 .byte 16 .byte 17 .byte 10 .byte 11 .byte 16 .byte 17 .byte 12 .byte 13 .byte 16 .byte 17 .byte 14 .byte 15 .ident "GCC: (GNU) 4.7.0 20111014 (experimental)"