On Thu, Oct 13, 2011 at 11:43:35AM -0700, Richard Henderson wrote:
> On 10/13/2011 11:36 AM, David Edelsohn wrote:
> > Are there testcases in the GCC testsuite that exercise these patterns?
>
> I thought the vectorizer would use them. E.g. gcc.dg/vect/vect-shift-3.c.
>
> I see that I should have added ppc to
> check_effective_target_vect_shift_scalar,
> though, to enable even more testing.
I tried this patch on trunk, and I'm not seeing any changes in the code. I'll
include the test case and asm as attachments.
This is due to the code I put into tree-vect-generic.c (in
expand_vector_operations_1) that converts between vector shift by vector and
vector shift by scalar. Note, that AMD's XOP shifts are also vector/vector
shifts.
The code shifting by a scalar is pretty bad in that it recalcuates the splat of
the shift element every time in the loop, rather than doing the splat once
before the loop. We also have the problem we've had for a couple of years that
if the type is signed char or signed short, the compiler wants to promote the
items to int and does this by several unpacks and repacks.
--
Michael Meissner, IBM
5 Technology Place Drive, M/S 2757, Westford, MA 01886-3141, USA
[email protected] fax +1 (978) 399-6899
#include <stddef.h>
#include <stdlib.h>
#ifndef UNS
#define UNS
#endif
#ifndef TYPE
#define TYPE int
#endif
#ifndef SIZE
#define SIZE 1024
#endif
#ifndef ALIGN
#define ALIGN 32
#endif
UNS TYPE a[SIZE] __attribute__((__aligned__(ALIGN)));
UNS TYPE b[SIZE] __attribute__((__aligned__(ALIGN)));
UNS TYPE c[SIZE] __attribute__((__aligned__(ALIGN)));
void
shift_left_1 (void)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] << 1;
}
void
shift_left_2 (void)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] << 2;
}
void
shift_left_scalar (int n)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] << n;
}
void
shift_left_vect (int n)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] << c[i];
}
void
shift_right_1 (void)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] >> 1;
}
void
shift_right_2 (void)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] >> 2;
}
void
shift_right_scalar (int n)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] >> n;
}
void
shift_right_vect (int n)
{
size_t i;
for (i = 0; i < SIZE; i++)
a[i] = b[i] >> c[i];
}
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc a[TC],a
.LC2:
.tc b[TC],b
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltisw 1,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 5,,31
.L2:
lxvw4x 45,11,9
vslw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltisw 1,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 5,,31
.L6:
lxvw4x 45,11,9
vslw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC1
.set .LC6,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc000
mtvrsave 0
li 8,256
mtctr 8
addis 11,2,.LC5@toc@ha
addis 9,2,.LC6@toc@ha
ld 10,.LC5@toc@l(11)
li 0,48
ld 11,.LC6@toc@l(9)
li 9,0
.p2align 4,,15
.L9:
stw 3,-32(1)
addi 8,1,-80
lvewx 0,8,0
xxspltw 32,32,0
lxvw4x 33,11,9
vslw 0,1,0
stxvw4x 32,10,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC7,.LC1
.set .LC8,.LC2
.LC9:
.tc c[TC],c
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 10,2,.LC7@toc@ha
addis 11,2,.LC8@toc@ha
addis 9,2,.LC9@toc@ha
ld 8,.LC7@toc@l(10)
ld 10,.LC8@toc@l(11)
ld 11,.LC9@toc@l(9)
li 9,0
.p2align 5,,31
.L12:
lxvw4x 33,10,9
lxvw4x 45,11,9
vslw 0,1,13
stxvw4x 32,8,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC10,.LC1
.set .LC11,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC10@toc@ha
addis 9,2,.LC11@toc@ha
ld 10,.LC10@toc@l(11)
vspltisw 1,1
ld 11,.LC11@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsraw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC12,.LC1
.set .LC13,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC12@toc@ha
addis 9,2,.LC13@toc@ha
ld 10,.LC12@toc@l(11)
vspltisw 1,2
ld 11,.LC13@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsraw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC14,.LC1
.set .LC15,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc000
mtvrsave 0
li 8,256
mtctr 8
addis 11,2,.LC14@toc@ha
addis 9,2,.LC15@toc@ha
ld 10,.LC14@toc@l(11)
li 0,48
ld 11,.LC15@toc@l(9)
li 9,0
.p2align 4,,15
.L21:
stw 3,-32(1)
addi 8,1,-80
lvewx 0,8,0
xxspltw 32,32,0
lxvw4x 33,11,9
vsraw 0,1,0
stxvw4x 32,10,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC16,.LC1
.set .LC17,.LC2
.set .LC18,.LC9
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 10,2,.LC16@toc@ha
addis 11,2,.LC17@toc@ha
addis 9,2,.LC18@toc@ha
ld 8,.LC16@toc@l(10)
ld 10,.LC17@toc@l(11)
ld 11,.LC18@toc@l(9)
li 9,0
.p2align 5,,31
.L24:
lxvw4x 33,10,9
lxvw4x 45,11,9
vsraw 0,1,13
stxvw4x 32,8,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,4096,32
.comm b,4096,32
.comm a,4096,32
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc a[TC],a
.LC2:
.tc b[TC],b
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltisw 1,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 5,,31
.L2:
lxvw4x 45,11,9
vslw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltisw 1,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 5,,31
.L6:
lxvw4x 45,11,9
vslw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC1
.set .LC6,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc000
mtvrsave 0
li 8,256
mtctr 8
addis 11,2,.LC5@toc@ha
addis 9,2,.LC6@toc@ha
ld 10,.LC5@toc@l(11)
li 0,48
ld 11,.LC6@toc@l(9)
li 9,0
.p2align 4,,15
.L9:
stw 3,-32(1)
addi 8,1,-80
lvewx 0,8,0
xxspltw 32,32,0
lxvw4x 33,11,9
vslw 0,1,0
stxvw4x 32,10,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC7,.LC1
.set .LC8,.LC2
.LC9:
.tc c[TC],c
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 10,2,.LC7@toc@ha
addis 11,2,.LC8@toc@ha
addis 9,2,.LC9@toc@ha
ld 8,.LC7@toc@l(10)
ld 10,.LC8@toc@l(11)
ld 11,.LC9@toc@l(9)
li 9,0
.p2align 5,,31
.L12:
lxvw4x 33,10,9
lxvw4x 45,11,9
vslw 0,1,13
stxvw4x 32,8,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC10,.LC1
.set .LC11,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC10@toc@ha
addis 9,2,.LC11@toc@ha
ld 10,.LC10@toc@l(11)
vspltisw 1,1
ld 11,.LC11@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsrw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC12,.LC1
.set .LC13,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 11,2,.LC12@toc@ha
addis 9,2,.LC13@toc@ha
ld 10,.LC12@toc@l(11)
vspltisw 1,2
ld 11,.LC13@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsrw 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC14,.LC1
.set .LC15,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc000
mtvrsave 0
li 8,256
mtctr 8
addis 11,2,.LC14@toc@ha
addis 9,2,.LC15@toc@ha
ld 10,.LC14@toc@l(11)
li 0,48
ld 11,.LC15@toc@l(9)
li 9,0
.p2align 4,,15
.L21:
stw 3,-32(1)
addi 8,1,-80
lvewx 0,8,0
xxspltw 32,32,0
lxvw4x 33,11,9
vsrw 0,1,0
stxvw4x 32,10,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC16,.LC1
.set .LC17,.LC2
.set .LC18,.LC9
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,256
mtctr 0
addis 10,2,.LC16@toc@ha
addis 11,2,.LC17@toc@ha
addis 9,2,.LC18@toc@ha
ld 8,.LC16@toc@l(10)
ld 10,.LC17@toc@l(11)
ld 11,.LC18@toc@l(9)
li 9,0
.p2align 5,,31
.L24:
lxvw4x 33,10,9
lxvw4x 45,11,9
vsrw 0,1,13
stxvw4x 32,8,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,4096,32
.comm b,4096,32
.comm a,4096,32
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc b[TC],b
.LC2:
.tc a[TC],a
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltisw 1,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 4,,15
.L2:
lxvw4x 32,10,9
vupkhsh 13,0
vupklsh 0,0
vslw 13,13,1
vslw 0,0,1
vpkuwum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltisw 1,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 4,,15
.L6:
lxvw4x 32,10,9
vupkhsh 13,0
vupklsh 0,0
vslw 13,13,1
vslw 0,0,1
vpkuwum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC1
.set .LC6,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc00c
mtvrsave 0
li 7,128
mtctr 7
addis 11,2,.LC5@toc@ha
addis 9,2,.LC6@toc@ha
ld 10,.LC5@toc@l(11)
li 8,48
ld 11,.LC6@toc@l(9)
li 0,64
li 9,0
.p2align 4,,15
.L9:
stw 3,-48(1)
stw 3,-32(1)
addi 7,1,-96
lvewx 1,7,8
lvewx 0,7,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 45,10,9
vupkhsh 12,13
vupklsh 13,13
vslw 1,12,1
vslw 0,13,0
vpkuwum 0,1,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC7,.LC1
.LC8:
.tc c[TC],c
.set .LC9,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc00c
mtvrsave 0
li 0,128
mtctr 0
addis 10,2,.LC7@toc@ha
addis 11,2,.LC8@toc@ha
addis 9,2,.LC9@toc@ha
ld 8,.LC7@toc@l(10)
ld 10,.LC8@toc@l(11)
ld 11,.LC9@toc@l(9)
li 9,0
.p2align 4,,15
.L12:
lxvw4x 33,8,9
lxvw4x 32,10,9
vupkhsh 12,1
vupkhsh 13,0
vupklsh 1,1
vupklsh 0,0
vslw 13,12,13
vslw 0,1,0
vpkuwum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC10,.LC2
.set .LC11,.LC1
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC10@toc@ha
addis 9,2,.LC11@toc@ha
ld 10,.LC10@toc@l(11)
vspltish 1,1
ld 11,.LC11@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsrah 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC12,.LC2
.set .LC13,.LC1
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC12@toc@ha
addis 9,2,.LC13@toc@ha
ld 10,.LC12@toc@l(11)
vspltish 1,2
ld 11,.LC13@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsrah 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC14,.LC1
.set .LC15,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc00c
mtvrsave 0
li 7,128
mtctr 7
addis 11,2,.LC14@toc@ha
addis 9,2,.LC15@toc@ha
ld 10,.LC14@toc@l(11)
li 8,48
ld 11,.LC15@toc@l(9)
li 0,64
li 9,0
.p2align 4,,15
.L21:
stw 3,-48(1)
stw 3,-32(1)
addi 7,1,-96
lvewx 1,7,8
lvewx 0,7,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 45,10,9
vupkhsh 12,13
vupklsh 13,13
vsraw 1,12,1
vsraw 0,13,0
vpkuwum 0,1,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC16,.LC1
.set .LC17,.LC8
.set .LC18,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc00c
mtvrsave 0
li 0,128
mtctr 0
addis 10,2,.LC16@toc@ha
addis 11,2,.LC17@toc@ha
addis 9,2,.LC18@toc@ha
ld 8,.LC16@toc@l(10)
ld 10,.LC17@toc@l(11)
ld 11,.LC18@toc@l(9)
li 9,0
.p2align 4,,15
.L24:
lxvw4x 33,8,9
lxvw4x 32,10,9
vupkhsh 12,1
vupkhsh 13,0
vupklsh 1,1
vupklsh 0,0
vsraw 13,12,13
vsraw 0,1,0
vpkuwum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,2048,32
.comm b,2048,32
.comm a,2048,32
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc a[TC],a
.LC2:
.tc b[TC],b
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltish 1,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 5,,31
.L2:
lxvw4x 45,11,9
vslh 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltish 1,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 5,,31
.L6:
lxvw4x 45,11,9
vslh 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC2
.set .LC6,.LC1
.LC8:
.tc .LC7[TC],.LC7
.LC10:
.tc .LC9[TC],.LC9
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc07c
mtvrsave 0
li 7,128
mtctr 7
addis 11,2,.LC8@toc@ha
addis 9,2,.LC10@toc@ha
ld 11,.LC8@toc@l(11)
addis 10,2,.LC5@toc@ha
ld 9,.LC10@toc@l(9)
vspltisw 12,0
addis 8,2,.LC6@toc@ha
ld 10,.LC5@toc@l(10)
lxvw4x 41,0,11
li 0,64
ld 11,.LC6@toc@l(8)
li 8,48
lxvw4x 42,0,9
li 9,0
.p2align 4,,15
.L9:
stw 3,-48(1)
stw 3,-32(1)
addi 7,1,-96
lvewx 1,7,8
lvewx 0,7,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 45,10,9
vperm 11,13,12,9
vperm 13,13,12,10
vslw 1,11,1
vslw 0,13,0
vpkuwum 0,1,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC11,.LC2
.LC12:
.tc c[TC],c
.set .LC13,.LC1
.set .LC14,.LC8
.set .LC15,.LC10
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc07c
mtvrsave 0
addis 11,2,.LC14@toc@ha
li 0,128
ld 11,.LC14@toc@l(11)
addis 9,2,.LC15@toc@ha
mtctr 0
ld 9,.LC15@toc@l(9)
addis 8,2,.LC11@toc@ha
addis 10,2,.LC12@toc@ha
addis 7,2,.LC13@toc@ha
lxvw4x 43,0,11
ld 8,.LC11@toc@l(8)
vspltisw 0,0
ld 10,.LC12@toc@l(10)
ld 11,.LC13@toc@l(7)
lxvw4x 44,0,9
li 9,0
.p2align 4,,15
.L12:
lxvw4x 45,8,9
lxvw4x 33,10,9
vperm 9,13,0,11
vperm 10,1,0,11
vperm 13,13,0,12
vperm 1,1,0,12
vslw 10,9,10
vslw 1,13,1
vpkuwum 1,10,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC16,.LC1
.set .LC17,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC16@toc@ha
addis 9,2,.LC17@toc@ha
ld 10,.LC16@toc@l(11)
vspltish 1,1
ld 11,.LC17@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsrh 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC18,.LC1
.set .LC19,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,128
mtctr 0
addis 11,2,.LC18@toc@ha
addis 9,2,.LC19@toc@ha
ld 10,.LC18@toc@l(11)
vspltish 1,2
ld 11,.LC19@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsrh 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC20,.LC2
.set .LC21,.LC1
.set .LC22,.LC8
.set .LC23,.LC10
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc07c
mtvrsave 0
li 7,128
mtctr 7
addis 11,2,.LC22@toc@ha
addis 9,2,.LC23@toc@ha
ld 11,.LC22@toc@l(11)
addis 10,2,.LC20@toc@ha
ld 9,.LC23@toc@l(9)
vspltisw 12,0
addis 8,2,.LC21@toc@ha
ld 10,.LC20@toc@l(10)
lxvw4x 41,0,11
li 0,64
ld 11,.LC21@toc@l(8)
li 8,48
lxvw4x 42,0,9
li 9,0
.p2align 4,,15
.L21:
stw 3,-48(1)
stw 3,-32(1)
addi 7,1,-96
lvewx 1,7,8
lvewx 0,7,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 45,10,9
vperm 11,13,12,9
vperm 13,13,12,10
vsraw 1,11,1
vsraw 0,13,0
vpkuwum 0,1,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC24,.LC2
.set .LC25,.LC12
.set .LC26,.LC1
.set .LC27,.LC8
.set .LC28,.LC10
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc07c
mtvrsave 0
addis 11,2,.LC27@toc@ha
li 0,128
ld 11,.LC27@toc@l(11)
addis 9,2,.LC28@toc@ha
mtctr 0
ld 9,.LC28@toc@l(9)
addis 8,2,.LC24@toc@ha
addis 10,2,.LC25@toc@ha
addis 7,2,.LC26@toc@ha
lxvw4x 43,0,11
ld 8,.LC24@toc@l(8)
vspltisw 0,0
ld 10,.LC25@toc@l(10)
ld 11,.LC26@toc@l(7)
lxvw4x 44,0,9
li 9,0
.p2align 4,,15
.L24:
lxvw4x 45,8,9
lxvw4x 33,10,9
vperm 9,13,0,11
vperm 10,1,0,11
vperm 13,13,0,12
vperm 1,1,0,12
vsraw 10,9,10
vsraw 1,13,1
vpkuwum 1,10,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,2048,32
.comm b,2048,32
.comm a,2048,32
.section .rodata.cst16,"aM",@progbits,16
.align 4
.LC7:
.byte 16
.byte 17
.byte 0
.byte 1
.byte 16
.byte 17
.byte 2
.byte 3
.byte 16
.byte 17
.byte 4
.byte 5
.byte 16
.byte 17
.byte 6
.byte 7
.LC9:
.byte 16
.byte 17
.byte 8
.byte 9
.byte 16
.byte 17
.byte 10
.byte 11
.byte 16
.byte 17
.byte 12
.byte 13
.byte 16
.byte 17
.byte 14
.byte 15
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc b[TC],b
.LC2:
.tc a[TC],a
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc01c
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltisw 0,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 4,,15
.L2:
lxvw4x 33,10,9
vupkhsb 13,1
vupklsb 1,1
vupkhsh 11,13
vupkhsh 12,1
vupklsh 13,13
vupklsh 1,1
vslw 11,11,0
vslw 13,13,0
vslw 12,12,0
vslw 1,1,0
vpkuwum 13,11,13
vpkuwum 1,12,1
vpkuhum 1,13,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc01c
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltisw 0,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 4,,15
.L6:
lxvw4x 33,10,9
vupkhsb 13,1
vupklsb 1,1
vupkhsh 11,13
vupkhsh 12,1
vupklsh 13,13
vupklsh 1,1
vslw 11,11,0
vslw 13,13,0
vslw 12,12,0
vslw 1,1,0
vpkuwum 13,11,13
vpkuwum 1,12,1
vpkuhum 1,13,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC1
.set .LC6,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc0fc
mtvrsave 0
li 7,64
mtctr 7
addis 11,2,.LC5@toc@ha
addis 9,2,.LC6@toc@ha
ld 10,.LC5@toc@l(11)
li 6,48
ld 11,.LC6@toc@l(9)
li 8,80
li 9,0
li 0,96
.p2align 4,,15
.L9:
stw 3,-80(1)
stw 3,-64(1)
stw 3,-48(1)
stw 3,-32(1)
addi 5,1,-128
lvewx 12,5,6
lvewx 13,5,7
lvewx 1,5,8
lvewx 0,5,0
xxspltw 44,44,0
xxspltw 45,45,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 43,10,9
vupkhsb 10,11
vupklsb 11,11
vupkhsh 8,10
vupkhsh 9,11
vupklsh 10,10
vupklsh 11,11
vslw 12,8,12
vslw 13,10,13
vslw 1,9,1
vslw 0,11,0
vpkuwum 13,12,13
vpkuwum 0,1,0
vpkuhum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC7,.LC1
.LC8:
.tc c[TC],c
.set .LC9,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc0fc
mtvrsave 0
li 0,64
mtctr 0
addis 10,2,.LC7@toc@ha
addis 11,2,.LC8@toc@ha
addis 9,2,.LC9@toc@ha
ld 8,.LC7@toc@l(10)
ld 10,.LC8@toc@l(11)
ld 11,.LC9@toc@l(9)
li 9,0
.p2align 4,,15
.L12:
lxvw4x 33,8,9
lxvw4x 32,10,9
vupkhsb 12,1
vupkhsb 13,0
vupklsb 1,1
vupklsb 0,0
vupkhsh 8,12
vupkhsh 10,13
vupkhsh 9,1
vupkhsh 11,0
vupklsh 12,12
vupklsh 13,13
vupklsh 1,1
vupklsh 0,0
vslw 10,8,10
vslw 13,12,13
vslw 11,9,11
vslw 0,1,0
vpkuwum 13,10,13
vpkuwum 0,11,0
vpkuhum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC10,.LC2
.set .LC11,.LC1
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC10@toc@ha
addis 9,2,.LC11@toc@ha
ld 10,.LC10@toc@l(11)
vspltisb 1,1
ld 11,.LC11@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsrab 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC12,.LC2
.set .LC13,.LC1
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC12@toc@ha
addis 9,2,.LC13@toc@ha
ld 10,.LC12@toc@l(11)
vspltisb 1,2
ld 11,.LC13@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsrab 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC14,.LC1
.set .LC15,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc0fc
mtvrsave 0
li 7,64
mtctr 7
addis 11,2,.LC14@toc@ha
addis 9,2,.LC15@toc@ha
ld 10,.LC14@toc@l(11)
li 6,48
ld 11,.LC15@toc@l(9)
li 8,80
li 9,0
li 0,96
.p2align 4,,15
.L21:
stw 3,-80(1)
stw 3,-64(1)
stw 3,-48(1)
stw 3,-32(1)
addi 5,1,-128
lvewx 12,5,6
lvewx 13,5,7
lvewx 1,5,8
lvewx 0,5,0
xxspltw 44,44,0
xxspltw 45,45,0
xxspltw 33,33,0
xxspltw 32,32,0
lxvw4x 43,10,9
vupkhsb 10,11
vupklsb 11,11
vupkhsh 8,10
vupkhsh 9,11
vupklsh 10,10
vupklsh 11,11
vsraw 12,8,12
vsraw 13,10,13
vsraw 1,9,1
vsraw 0,11,0
vpkuwum 13,12,13
vpkuwum 0,1,0
vpkuhum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC16,.LC1
.set .LC17,.LC8
.set .LC18,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc0fc
mtvrsave 0
li 0,64
mtctr 0
addis 10,2,.LC16@toc@ha
addis 11,2,.LC17@toc@ha
addis 9,2,.LC18@toc@ha
ld 8,.LC16@toc@l(10)
ld 10,.LC17@toc@l(11)
ld 11,.LC18@toc@l(9)
li 9,0
.p2align 4,,15
.L24:
lxvw4x 33,8,9
lxvw4x 32,10,9
vupkhsb 12,1
vupkhsb 13,0
vupklsb 1,1
vupklsb 0,0
vupkhsh 8,12
vupkhsh 10,13
vupkhsh 9,1
vupkhsh 11,0
vupklsh 12,12
vupklsh 13,13
vupklsh 1,1
vupklsh 0,0
vsraw 10,8,10
vsraw 13,12,13
vsraw 11,9,11
vsraw 0,1,0
vpkuwum 13,10,13
vpkuwum 0,11,0
vpkuhum 0,13,0
stxvw4x 32,11,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,1024,32
.comm b,1024,32
.comm a,1024,32
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"
.file "foo.c"
.section ".toc","aw"
.section ".text"
.section ".toc","aw"
.LC1:
.tc a[TC],a
.LC2:
.tc b[TC],b
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_1
.section ".opd","aw"
.align 3
shift_left_1:
.quad .L.shift_left_1,.TOC.@tocbase,0
.previous
.type shift_left_1, @function
.L.shift_left_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC1@toc@ha
addis 9,2,.LC2@toc@ha
ld 10,.LC1@toc@l(11)
vspltisb 1,1
ld 11,.LC2@toc@l(9)
li 9,0
.p2align 5,,31
.L2:
lxvw4x 45,11,9
vslb 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L2
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_1,.-.L.shift_left_1
.section ".toc","aw"
.set .LC3,.LC1
.set .LC4,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_2
.section ".opd","aw"
.align 3
shift_left_2:
.quad .L.shift_left_2,.TOC.@tocbase,0
.previous
.type shift_left_2, @function
.L.shift_left_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC3@toc@ha
addis 9,2,.LC4@toc@ha
ld 10,.LC3@toc@l(11)
vspltisb 1,2
ld 11,.LC4@toc@l(9)
li 9,0
.p2align 5,,31
.L6:
lxvw4x 45,11,9
vslb 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L6
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_2,.-.L.shift_left_2
.section ".toc","aw"
.set .LC5,.LC2
.LC7:
.tc .LC6[TC],.LC6
.LC9:
.tc .LC8[TC],.LC8
.set .LC10,.LC1
.LC12:
.tc .LC11[TC],.LC11
.LC14:
.tc .LC13[TC],.LC13
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_scalar
.section ".opd","aw"
.align 3
shift_left_scalar:
.quad .L.shift_left_scalar,.TOC.@tocbase,0
.previous
.type shift_left_scalar, @function
.L.shift_left_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xfffc
mtvrsave 0
addis 11,2,.LC12@toc@ha
ld 11,.LC12@toc@l(11)
addis 7,2,.LC10@toc@ha
addis 8,2,.LC7@toc@ha
addis 10,2,.LC9@toc@ha
addis 9,2,.LC14@toc@ha
ld 8,.LC7@toc@l(8)
lxvw4x 39,0,11
ld 11,.LC10@toc@l(7)
li 7,64
addis 6,2,.LC5@toc@ha
mtctr 7
ld 10,.LC9@toc@l(10)
ld 9,.LC14@toc@l(9)
li 0,96
vspltish 6,0
lxvw4x 34,0,8
li 8,80
vspltisw 0,0
lxvw4x 35,0,10
lxvw4x 40,0,9
ld 10,.LC5@toc@l(6)
li 9,0
li 6,48
.p2align 4,,15
.L9:
stw 3,-80(1)
stw 3,-64(1)
stw 3,-48(1)
stw 3,-32(1)
addi 5,1,-128
lvewx 11,5,6
lvewx 12,5,7
lvewx 13,5,8
lvewx 1,5,0
xxspltw 43,43,0
xxspltw 44,44,0
xxspltw 45,45,0
xxspltw 33,33,0
lxvw4x 42,10,9
vperm 9,10,6,2
vperm 10,10,6,3
vperm 4,9,0,7
vperm 5,10,0,7
vperm 9,9,0,8
vperm 10,10,0,8
vslw 11,4,11
vslw 12,9,12
vslw 13,5,13
vslw 1,10,1
vpkuwum 12,11,12
vpkuwum 1,13,1
vpkuhum 1,12,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L9
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_scalar,.-.L.shift_left_scalar
.section ".toc","aw"
.set .LC15,.LC2
.set .LC16,.LC7
.set .LC17,.LC9
.LC18:
.tc c[TC],c
.set .LC19,.LC1
.set .LC20,.LC12
.set .LC21,.LC14
.section ".text"
.align 2
.p2align 4,,15
.globl shift_left_vect
.section ".opd","aw"
.align 3
shift_left_vect:
.quad .L.shift_left_vect,.TOC.@tocbase,0
.previous
.type shift_left_vect, @function
.L.shift_left_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xfffc
mtvrsave 0
addis 8,2,.LC16@toc@ha
addis 10,2,.LC17@toc@ha
addis 11,2,.LC20@toc@ha
ld 8,.LC16@toc@l(8)
li 0,64
ld 10,.LC17@toc@l(10)
ld 11,.LC20@toc@l(11)
addis 9,2,.LC21@toc@ha
mtctr 0
ld 9,.LC21@toc@l(9)
addis 5,2,.LC15@toc@ha
lxvw4x 38,0,8
addis 6,2,.LC18@toc@ha
addis 7,2,.LC19@toc@ha
lxvw4x 39,0,10
lxvw4x 45,0,11
vspltish 12,0
ld 8,.LC15@toc@l(5)
ld 10,.LC18@toc@l(6)
vspltisw 0,0
ld 11,.LC19@toc@l(7)
lxvw4x 33,0,9
li 9,0
.p2align 4,,15
.L12:
lxvw4x 42,8,9
lxvw4x 43,10,9
vperm 8,10,12,6
vperm 9,11,12,6
vperm 10,10,12,7
vperm 11,11,12,7
vperm 2,8,0,13
vperm 4,9,0,13
vperm 3,10,0,13
vperm 5,11,0,13
vperm 8,8,0,1
vperm 9,9,0,1
vperm 10,10,0,1
vperm 11,11,0,1
vslw 4,2,4
vslw 9,8,9
vslw 5,3,5
vslw 11,10,11
vpkuwum 9,4,9
vpkuwum 11,5,11
vpkuhum 11,9,11
stxvw4x 43,11,9
addi 9,9,16
bdnz .L12
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_left_vect,.-.L.shift_left_vect
.section ".toc","aw"
.set .LC22,.LC1
.set .LC23,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_1
.section ".opd","aw"
.align 3
shift_right_1:
.quad .L.shift_right_1,.TOC.@tocbase,0
.previous
.type shift_right_1, @function
.L.shift_right_1:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC22@toc@ha
addis 9,2,.LC23@toc@ha
ld 10,.LC22@toc@l(11)
vspltisb 1,1
ld 11,.LC23@toc@l(9)
li 9,0
.p2align 5,,31
.L15:
lxvw4x 45,11,9
vsrb 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L15
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_1,.-.L.shift_right_1
.section ".toc","aw"
.set .LC24,.LC1
.set .LC25,.LC2
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_2
.section ".opd","aw"
.align 3
shift_right_2:
.quad .L.shift_right_2,.TOC.@tocbase,0
.previous
.type shift_right_2, @function
.L.shift_right_2:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xc004
mtvrsave 0
li 0,64
mtctr 0
addis 11,2,.LC24@toc@ha
addis 9,2,.LC25@toc@ha
ld 10,.LC24@toc@l(11)
vspltisb 1,2
ld 11,.LC25@toc@l(9)
li 9,0
.p2align 5,,31
.L18:
lxvw4x 45,11,9
vsrb 0,13,1
stxvw4x 32,10,9
addi 9,9,16
bdnz .L18
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_2,.-.L.shift_right_2
.section ".toc","aw"
.set .LC26,.LC2
.set .LC27,.LC7
.set .LC28,.LC9
.set .LC29,.LC1
.set .LC30,.LC12
.set .LC31,.LC14
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_scalar
.section ".opd","aw"
.align 3
shift_right_scalar:
.quad .L.shift_right_scalar,.TOC.@tocbase,0
.previous
.type shift_right_scalar, @function
.L.shift_right_scalar:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xfffc
mtvrsave 0
addis 11,2,.LC30@toc@ha
ld 11,.LC30@toc@l(11)
addis 7,2,.LC29@toc@ha
addis 8,2,.LC27@toc@ha
addis 10,2,.LC28@toc@ha
addis 9,2,.LC31@toc@ha
ld 8,.LC27@toc@l(8)
lxvw4x 39,0,11
ld 11,.LC29@toc@l(7)
li 7,64
addis 6,2,.LC26@toc@ha
mtctr 7
ld 10,.LC28@toc@l(10)
ld 9,.LC31@toc@l(9)
li 0,96
vspltish 6,0
lxvw4x 34,0,8
li 8,80
vspltisw 0,0
lxvw4x 35,0,10
lxvw4x 40,0,9
ld 10,.LC26@toc@l(6)
li 9,0
li 6,48
.p2align 4,,15
.L21:
stw 3,-80(1)
stw 3,-64(1)
stw 3,-48(1)
stw 3,-32(1)
addi 5,1,-128
lvewx 11,5,6
lvewx 12,5,7
lvewx 13,5,8
lvewx 1,5,0
xxspltw 43,43,0
xxspltw 44,44,0
xxspltw 45,45,0
xxspltw 33,33,0
lxvw4x 42,10,9
vperm 9,10,6,2
vperm 10,10,6,3
vperm 4,9,0,7
vperm 5,10,0,7
vperm 9,9,0,8
vperm 10,10,0,8
vsraw 11,4,11
vsraw 12,9,12
vsraw 13,5,13
vsraw 1,10,1
vpkuwum 12,11,12
vpkuwum 1,13,1
vpkuhum 1,12,1
stxvw4x 33,11,9
addi 9,9,16
bdnz .L21
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_scalar,.-.L.shift_right_scalar
.section ".toc","aw"
.set .LC32,.LC2
.set .LC33,.LC7
.set .LC34,.LC9
.set .LC35,.LC18
.set .LC36,.LC1
.set .LC37,.LC12
.set .LC38,.LC14
.section ".text"
.align 2
.p2align 4,,15
.globl shift_right_vect
.section ".opd","aw"
.align 3
shift_right_vect:
.quad .L.shift_right_vect,.TOC.@tocbase,0
.previous
.type shift_right_vect, @function
.L.shift_right_vect:
mfvrsave 0
stw 0,-4(1)
oris 0,0,0xfffc
mtvrsave 0
addis 8,2,.LC33@toc@ha
addis 10,2,.LC34@toc@ha
addis 11,2,.LC37@toc@ha
ld 8,.LC33@toc@l(8)
li 0,64
ld 10,.LC34@toc@l(10)
ld 11,.LC37@toc@l(11)
addis 9,2,.LC38@toc@ha
mtctr 0
ld 9,.LC38@toc@l(9)
addis 5,2,.LC32@toc@ha
lxvw4x 38,0,8
addis 6,2,.LC35@toc@ha
addis 7,2,.LC36@toc@ha
lxvw4x 39,0,10
lxvw4x 45,0,11
vspltish 12,0
ld 8,.LC32@toc@l(5)
ld 10,.LC35@toc@l(6)
vspltisw 0,0
ld 11,.LC36@toc@l(7)
lxvw4x 33,0,9
li 9,0
.p2align 4,,15
.L24:
lxvw4x 42,8,9
lxvw4x 43,10,9
vperm 8,10,12,6
vperm 9,11,12,6
vperm 10,10,12,7
vperm 11,11,12,7
vperm 2,8,0,13
vperm 4,9,0,13
vperm 3,10,0,13
vperm 5,11,0,13
vperm 8,8,0,1
vperm 9,9,0,1
vperm 10,10,0,1
vperm 11,11,0,1
vsraw 4,2,4
vsraw 9,8,9
vsraw 5,3,5
vsraw 11,10,11
vpkuwum 9,4,9
vpkuwum 11,5,11
vpkuhum 11,9,11
stxvw4x 43,11,9
addi 9,9,16
bdnz .L24
lwz 12,-4(1)
mtvrsave 12
blr
.long 0
.byte 0,0,0,0,0,0,0,0
.size shift_right_vect,.-.L.shift_right_vect
.comm c,1024,32
.comm b,1024,32
.comm a,1024,32
.section .rodata.cst16,"aM",@progbits,16
.align 4
.LC6:
.byte 16
.byte 0
.byte 16
.byte 1
.byte 16
.byte 2
.byte 16
.byte 3
.byte 16
.byte 4
.byte 16
.byte 5
.byte 16
.byte 6
.byte 16
.byte 7
.LC8:
.byte 16
.byte 8
.byte 16
.byte 9
.byte 16
.byte 10
.byte 16
.byte 11
.byte 16
.byte 12
.byte 16
.byte 13
.byte 16
.byte 14
.byte 16
.byte 15
.LC11:
.byte 16
.byte 17
.byte 0
.byte 1
.byte 16
.byte 17
.byte 2
.byte 3
.byte 16
.byte 17
.byte 4
.byte 5
.byte 16
.byte 17
.byte 6
.byte 7
.LC13:
.byte 16
.byte 17
.byte 8
.byte 9
.byte 16
.byte 17
.byte 10
.byte 11
.byte 16
.byte 17
.byte 12
.byte 13
.byte 16
.byte 17
.byte 14
.byte 15
.ident "GCC: (GNU) 4.7.0 20111014 (experimental)"