Re: [Mesa-dev] [PATCH] R600/SI: Split global vector loads with more than 4 elements
On Mon, Feb 10, 2014 at 02:35:03PM -0800, Matt Arsenault wrote: Why would you want to do this for the small types? You should be able to load those in fewer loads and then promote them. We only custom lower v*i32 loads, so this code won't be executed on smaller types. -Tom On 02/10/2014 01:32 PM, Tom Stellard wrote: From: Tom Stellard thomas.stell...@amd.com --- lib/Target/R600/SIISelLowering.cpp | 8 +- test/CodeGen/R600/load.ll | 178 +++-- 2 files changed, 98 insertions(+), 88 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9537405..eb08a13 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -478,9 +478,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_castLoadSDNode(Op); -if ((Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) -Op.getValueType().isVector()) { +if (Op.getValueType().isVector() +(Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + (Load-getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS + Op.getValueType().getVectorNumElements() 4))) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), Load-getChain() diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 0153524..1486c4d 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -1,16 +1,15 @@ -; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s ;======; ; GLOBAL ADDRESS SPACE ;======; ; Load an i8 value from the global address space. -; R600-CHECK-LABEL: @load_i8 +; FUNC-LABEL: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK-LABEL: @load_i8 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in @@ -19,13 +18,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } -; R600-CHECK-LABEL: @load_i8_sext +; FUNC-LABEL: @load_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK-LABEL: @load_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: @@ -35,10 +33,9 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8 +; FUNC-LABEL: @load_v2i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v2i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE define void @load_v2i8(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -49,7 +46,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8_sext +; FUNC-LABEL: @load_v2i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -60,7 +57,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK-LABEL: @load_v2i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_v2i8_sext(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -71,12 +67,11 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v4i8 +; FUNC-LABEL: @load_v4i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v4i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE @@ -89,7 +84,7
[Mesa-dev] [PATCH] R600/SI: Split global vector loads with more than 4 elements
From: Tom Stellard thomas.stell...@amd.com --- lib/Target/R600/SIISelLowering.cpp | 8 +- test/CodeGen/R600/load.ll | 178 +++-- 2 files changed, 98 insertions(+), 88 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9537405..eb08a13 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -478,9 +478,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_castLoadSDNode(Op); -if ((Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) -Op.getValueType().isVector()) { +if (Op.getValueType().isVector() +(Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + (Load-getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS + Op.getValueType().getVectorNumElements() 4))) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), Load-getChain() diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 0153524..1486c4d 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -1,16 +1,15 @@ -; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s ;======; ; GLOBAL ADDRESS SPACE ;======; ; Load an i8 value from the global address space. -; R600-CHECK-LABEL: @load_i8 +; FUNC-LABEL: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK-LABEL: @load_i8 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in @@ -19,13 +18,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } -; R600-CHECK-LABEL: @load_i8_sext +; FUNC-LABEL: @load_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK-LABEL: @load_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: @@ -35,10 +33,9 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8 +; FUNC-LABEL: @load_v2i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v2i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE define void @load_v2i8(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -49,7 +46,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8_sext +; FUNC-LABEL: @load_v2i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -60,7 +57,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK-LABEL: @load_v2i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_v2i8_sext(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -71,12 +67,11 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v4i8 +; FUNC-LABEL: @load_v4i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v4i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE @@ -89,7 +84,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v4i8_sext +; FUNC-LABEL: @load_v4i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] @@ -110,7 +105,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK-LABEL: @load_v4i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE @@ -124,9 +118,8 @@ entry: } ; Load an i16 value from the global
Re: [Mesa-dev] [PATCH] R600/SI: Split global vector loads with more than 4 elements
Why would you want to do this for the small types? You should be able to load those in fewer loads and then promote them. On 02/10/2014 01:32 PM, Tom Stellard wrote: From: Tom Stellard thomas.stell...@amd.com --- lib/Target/R600/SIISelLowering.cpp | 8 +- test/CodeGen/R600/load.ll | 178 +++-- 2 files changed, 98 insertions(+), 88 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9537405..eb08a13 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -478,9 +478,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_castLoadSDNode(Op); -if ((Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) -Op.getValueType().isVector()) { +if (Op.getValueType().isVector() +(Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + (Load-getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS + Op.getValueType().getVectorNumElements() 4))) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), Load-getChain() diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 0153524..1486c4d 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -1,16 +1,15 @@ -; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s ;======; ; GLOBAL ADDRESS SPACE ;======; ; Load an i8 value from the global address space. -; R600-CHECK-LABEL: @load_i8 +; FUNC-LABEL: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK-LABEL: @load_i8 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in @@ -19,13 +18,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } -; R600-CHECK-LABEL: @load_i8_sext +; FUNC-LABEL: @load_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK-LABEL: @load_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: @@ -35,10 +33,9 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8 +; FUNC-LABEL: @load_v2i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v2i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE define void @load_v2i8(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -49,7 +46,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8_sext +; FUNC-LABEL: @load_v2i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -60,7 +57,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK-LABEL: @load_v2i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_v2i8_sext(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -71,12 +67,11 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v4i8 +; FUNC-LABEL: @load_v4i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v4i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE @@ -89,7 +84,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v4i8_sext +; FUNC-LABEL: @load_v4i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] @@ -110,7 +105,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[*
Re: [Mesa-dev] [PATCH] R600/SI: Split global vector loads with more than 4 elements
Hi Tom, This definitely fixes some issues that I've been seeing with int8/16 vload8() and vload16() in CL. vstore8/vstore16 are still broken, but at least the loads are working now (I've only tested int, but I can give a full test run if you want/need). For reference, the tests that failed before were in: piglit/test/cl/program/execute/vload-int.cl comment out the vload3 tests to get the rest to work. Previously int3, int8, and int16 all failed, now just the int3 tests fail to build. If you're curious, the vstore issues start like the following and can be reproduced with the attached test case: LLVM ERROR: Cannot select: 0x12fa1b0: v4i32 = extract_subvector 0x1c55240, 0x12f8ba0 [ORD=28] [ID=38] 0x1c55240: v8i32 = BUILD_VECTOR 0x1c55140, 0x12f7a90, 0x12f7f90, 0x12f8490, 0x12f8aa0, 0x12f8fa0, 0x12f94a0, 0x12f9ab0 [ORD=24] [ID=35] --Aaron On Mon, Feb 10, 2014 at 3:32 PM, Tom Stellard t...@stellard.net wrote: From: Tom Stellard thomas.stell...@amd.com --- lib/Target/R600/SIISelLowering.cpp | 8 +- test/CodeGen/R600/load.ll | 178 +++-- 2 files changed, 98 insertions(+), 88 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 9537405..eb08a13 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -478,9 +478,11 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_castLoadSDNode(Op); -if ((Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) -Op.getValueType().isVector()) { +if (Op.getValueType().isVector() +(Load-getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load-getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + (Load-getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS + Op.getValueType().getVectorNumElements() 4))) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), Load-getChain() diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll index 0153524..1486c4d 100644 --- a/test/CodeGen/R600/load.ll +++ b/test/CodeGen/R600/load.ll @@ -1,16 +1,15 @@ -; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s -; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s +; RUN: llc %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s +; RUN: llc %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s ;======; ; GLOBAL ADDRESS SPACE ;======; ; Load an i8 value from the global address space. -; R600-CHECK-LABEL: @load_i8 +; FUNC-LABEL: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} -; SI-CHECK-LABEL: @load_i8 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { %1 = load i8 addrspace(1)* %in @@ -19,13 +18,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { ret void } -; R600-CHECK-LABEL: @load_i8_sext +; FUNC-LABEL: @load_i8_sext ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] ; R600-CHECK: 24 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] ; R600-CHECK: 24 -; SI-CHECK-LABEL: @load_i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: @@ -35,10 +33,9 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8 +; FUNC-LABEL: @load_v2i8 ; R600-CHECK: VTX_READ_8 ; R600-CHECK: VTX_READ_8 -; SI-CHECK-LABEL: @load_v2i8 ; SI-CHECK: BUFFER_LOAD_UBYTE ; SI-CHECK: BUFFER_LOAD_UBYTE define void @load_v2i8(2 x i32 addrspace(1)* %out, 2 x i8 addrspace(1)* %in) { @@ -49,7 +46,7 @@ entry: ret void } -; R600-CHECK-LABEL: @load_v2i8_sext +; FUNC-LABEL: @load_v2i8_sext ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] @@ -60,7 +57,6 @@ entry: ; R600-CHECK-DAG: 24 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] ; R600-CHECK-DAG: 24 -; SI-CHECK-LABEL: @load_v2i8_sext ; SI-CHECK: BUFFER_LOAD_SBYTE ; SI-CHECK: BUFFER_LOAD_SBYTE define void @load_v2i8_sext(2 x i32