--- lib/Target/R600/AMDGPUCallingConv.td | 15 ++++- lib/Target/R600/R600ISelLowering.cpp | 55 ++++++---------- lib/Target/R600/R600Instructions.td | 2 +- lib/Target/R600/R600Intrinsics.td | 8 ++- test/CodeGen/R600/big_alu.ll | 85 ++++++++++++------------- test/CodeGen/R600/complex-folding.ll | 9 +-- test/CodeGen/R600/floor.ll | 14 ++-- test/CodeGen/R600/fmad.ll | 20 +++--- test/CodeGen/R600/fmax.ll | 13 ++-- test/CodeGen/R600/fmin.ll | 13 ++-- test/CodeGen/R600/llvm.AMDGPU.mul.ll | 16 ++--- test/CodeGen/R600/llvm.cos.ll | 12 ++-- test/CodeGen/R600/llvm.pow.ll | 16 ++--- test/CodeGen/R600/llvm.sin.ll | 12 ++-- test/CodeGen/R600/load-input-fold.ll | 29 ++++----- test/CodeGen/R600/max-literals.ll | 25 ++++---- test/CodeGen/R600/pv-packing.ll | 25 +++----- test/CodeGen/R600/pv.ll | 61 +++++++++--------- test/CodeGen/R600/r600-encoding.ll | 15 +++-- test/CodeGen/R600/r600-export-fix.ll | 14 ++-- test/CodeGen/R600/r600cfg.ll | 14 ++-- test/CodeGen/R600/reciprocal.ll | 13 ++-- test/CodeGen/R600/rv7x0_count3.ll | 19 +++--- test/CodeGen/R600/schedule-fs-loop-nested-if.ll | 13 ++-- test/CodeGen/R600/schedule-vs-if-nested-loop.ll | 14 ++-- test/CodeGen/R600/shared-op-cycle.ll | 16 ++--- test/CodeGen/R600/swizzle-export.ll | 32 ++++------ test/CodeGen/R600/tex-clause-antidep.ll | 13 ++-- test/CodeGen/R600/texture-input-merge.ll | 13 ++-- 29 files changed, 285 insertions(+), 321 deletions(-)
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index a194e6d..bb7d6f8 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -42,6 +42,17 @@ def CC_SI : CallingConv<[ ]>; +// Calling convention for R600 +def CC_R600 : CallingConv<[ + CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[ + T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW, + T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW, + T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW, + T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW, + T30_XYZW, T31_XYZW, T32_XYZW + ]>>> +]>; + // Calling convention for compute kernels def CC_AMDGPU_Kernel : CallingConv<[ CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>, @@ -61,5 +72,7 @@ def CC_AMDGPU : CallingConv<[ "State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->" "ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>, CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"# - ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>> + ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>, + CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"# + ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>> ]>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 3c2e388..deab985 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -554,51 +554,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const SDLoc DL(Op); switch(IntrinsicID) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case AMDGPUIntrinsic::R600_load_input: { - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - MRI.addLiveIn(Reg); - return DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), Reg, VT); - } - - case AMDGPUIntrinsic::R600_interp_input: { + case AMDGPUIntrinsic::R600_interp_xy: + case AMDGPUIntrinsic::R600_interp_zw: { int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); MachineSDNode *interp; - if (ijb < 0) { - const MachineFunction &MF = DAG.getMachineFunction(); - const R600InstrInfo *TII = - static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo()); - interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, - MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); - return DAG.getTargetExtractSubreg( - TII->getRegisterInfo().getSubRegFromChannel(slot % 4), - DL, MVT::f32, SDValue(interp, 0)); - } + SDValue RegisterINode = Op.getOperand(2); + SDValue RegisterJNode = Op.getOperand(3); - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb); - unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1); - MRI.addLiveIn(RegisterI); - MRI.addLiveIn(RegisterJ); - SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32); - SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(), - SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32); - - if (slot % 4 < 2) + if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy) interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, - MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), RegisterJNode, RegisterINode); else interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, - MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), + MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32), RegisterJNode, RegisterINode); - return SDValue(interp, slot % 2); + return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, + SDValue(interp, 0), SDValue(interp, 1)); } case AMDGPUIntrinsic::R600_tex: case AMDGPUIntrinsic::R600_texc: @@ -1339,6 +1311,8 @@ SDValue R600TargetLowering::LowerFormalArguments( SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType; AnalyzeFormalArguments(CCInfo, Ins); @@ -1346,6 +1320,13 @@ SDValue R600TargetLowering::LowerFormalArguments( CCValAssign &VA = ArgLocs[i]; EVT VT = VA.getLocVT(); + if (ShaderType != ShaderType::COMPUTE) { + unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); + SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); + InVals.push_back(Register); + continue; + } + PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_BUFFER_0); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 63f95f6..7fc07a6 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -411,7 +411,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst < (outs R600_Reg128:$dst), (ins i32imm:$src0), "INTERP_LOAD $src0 : $dst", - []>; + [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>; def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> { let bank_swizzle = 5; diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index b5cb369..cd0b419 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -39,10 +39,14 @@ let TargetPrefix = "R600", isTarget = 1 in { llvm_i32_ty // coord_type_w ], [IntrNoMem]>; - def int_R600_load_input : - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_R600_interp_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_R600_interp_const : + Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_R600_interp_xy : + Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; +def int_R600_interp_zw : + Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_R600_load_texbuf : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_tex : TextureIntrinsicFloatInput; diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll index 75f2458..6b68376 100644 --- a/test/CodeGen/R600/big_alu.ll +++ b/test/CodeGen/R600/big_alu.ll @@ -4,54 +4,54 @@ ;This test ensures that R600 backend can handle ifcvt properly ;and do not generate ALU clauses with more than 128 instructions. -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = call float @llvm.R600.load.input(i32 2) - %3 = call float @llvm.R600.load.input(i32 3) - %4 = call float @llvm.R600.load.input(i32 4) - %5 = call float @llvm.R600.load.input(i32 36) - %6 = call float @llvm.R600.load.input(i32 32) + %0 = extractelement <4 x float> %reg0, i32 0 + %1 = extractelement <4 x float> %reg0, i32 1 + %2 = extractelement <4 x float> %reg0, i32 2 + %3 = extractelement <4 x float> %reg0, i32 3 + %4 = extractelement <4 x float> %reg1, i32 0 + %5 = extractelement <4 x float> %reg9, i32 0 + %6 = extractelement <4 x float> %reg8, i32 0 %7 = fcmp ugt float %6, 0.000000e+00 %8 = select i1 %7, float %4, float %5 - %9 = call float @llvm.R600.load.input(i32 5) - %10 = call float @llvm.R600.load.input(i32 37) - %11 = call float @llvm.R600.load.input(i32 32) + %9 = extractelement <4 x float> %reg1, i32 1 + %10 = extractelement <4 x float> %reg9, i32 1 + %11 = extractelement <4 x float> %reg8, i32 0 %12 = fcmp ugt float %11, 0.000000e+00 %13 = select i1 %12, float %9, float %10 - %14 = call float @llvm.R600.load.input(i32 6) - %15 = call float @llvm.R600.load.input(i32 38) - %16 = call float @llvm.R600.load.input(i32 32) + %14 = extractelement <4 x float> %reg1, i32 2 + %15 = extractelement <4 x float> %reg9, i32 2 + %16 = extractelement <4 x float> %reg8, i32 0 %17 = fcmp ugt float %16, 0.000000e+00 %18 = select i1 %17, float %14, float %15 - %19 = call float @llvm.R600.load.input(i32 7) - %20 = call float @llvm.R600.load.input(i32 39) - %21 = call float @llvm.R600.load.input(i32 32) - %22 = call float @llvm.R600.load.input(i32 8) - %23 = call float @llvm.R600.load.input(i32 9) - %24 = call float @llvm.R600.load.input(i32 10) - %25 = call float @llvm.R600.load.input(i32 11) - %26 = call float @llvm.R600.load.input(i32 12) - %27 = call float @llvm.R600.load.input(i32 13) - %28 = call float @llvm.R600.load.input(i32 14) - %29 = call float @llvm.R600.load.input(i32 15) - %30 = call float @llvm.R600.load.input(i32 16) - %31 = call float @llvm.R600.load.input(i32 17) - %32 = call float @llvm.R600.load.input(i32 18) - %33 = call float @llvm.R600.load.input(i32 19) - %34 = call float @llvm.R600.load.input(i32 20) - %35 = call float @llvm.R600.load.input(i32 21) - %36 = call float @llvm.R600.load.input(i32 22) - %37 = call float @llvm.R600.load.input(i32 23) - %38 = call float @llvm.R600.load.input(i32 24) - %39 = call float @llvm.R600.load.input(i32 25) - %40 = call float @llvm.R600.load.input(i32 26) - %41 = call float @llvm.R600.load.input(i32 27) - %42 = call float @llvm.R600.load.input(i32 28) - %43 = call float @llvm.R600.load.input(i32 29) - %44 = call float @llvm.R600.load.input(i32 30) - %45 = call float @llvm.R600.load.input(i32 31) + %19 = extractelement <4 x float> %reg1, i32 3 + %20 = extractelement <4 x float> %reg9, i32 3 + %21 = extractelement <4 x float> %reg8, i32 0 + %22 = extractelement <4 x float> %reg2, i32 0 + %23 = extractelement <4 x float> %reg2, i32 1 + %24 = extractelement <4 x float> %reg2, i32 2 + %25 = extractelement <4 x float> %reg2, i32 3 + %26 = extractelement <4 x float> %reg3, i32 0 + %27 = extractelement <4 x float> %reg3, i32 1 + %28 = extractelement <4 x float> %reg3, i32 2 + %29 = extractelement <4 x float> %reg3, i32 3 + %30 = extractelement <4 x float> %reg4, i32 0 + %31 = extractelement <4 x float> %reg4, i32 1 + %32 = extractelement <4 x float> %reg4, i32 2 + %33 = extractelement <4 x float> %reg4, i32 3 + %34 = extractelement <4 x float> %reg5, i32 0 + %35 = extractelement <4 x float> %reg5, i32 1 + %36 = extractelement <4 x float> %reg5, i32 2 + %37 = extractelement <4 x float> %reg5, i32 3 + %38 = extractelement <4 x float> %reg6, i32 0 + %39 = extractelement <4 x float> %reg6, i32 1 + %40 = extractelement <4 x float> %reg6, i32 2 + %41 = extractelement <4 x float> %reg6, i32 3 + %42 = extractelement <4 x float> %reg7, i32 0 + %43 = extractelement <4 x float> %reg7, i32 1 + %44 = extractelement <4 x float> %reg7, i32 2 + %45 = extractelement <4 x float> %reg7, i32 3 %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %47 = extractelement <4 x float> %46, i32 0 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) @@ -1147,9 +1147,6 @@ ENDIF178: ; preds = %ENDIF175, %IF179 } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll index 8dcd450..99f0d99 100644 --- a/test/CodeGen/R600/complex-folding.ll +++ b/test/CodeGen/R600/complex-folding.ll @@ -2,9 +2,9 @@ ; CHECK: @main ; CHECK-NOT: MOV -define void @main() { +define void @main(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) + %0 = extractelement <4 x float> %reg0, i32 0 %1 = call float @fabs(float %0) %2 = fptoui float %1 to i32 %3 = bitcast i32 %2 to float @@ -13,6 +13,7 @@ entry: ret void } -declare float @llvm.R600.load.input(i32) readnone declare float @fabs(float ) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) \ No newline at end of file +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll index 877d69a..67e86c4 100644 --- a/test/CodeGen/R600/floor.ll +++ b/test/CodeGen/R600/floor.ll @@ -2,15 +2,15 @@ ;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @floor(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @floor(float) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll index 75e65d8..935e351 100644 --- a/test/CodeGen/R600/fmad.ll +++ b/test/CodeGen/R600/fmad.ll @@ -2,18 +2,18 @@ ;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) - %r2 = call float @llvm.R600.load.input(i32 2) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = extractelement <4 x float> %reg0, i32 2 %r3 = fmul float %r0, %r1 - %r4 = fadd float %r3, %r2 - call void @llvm.AMDGPU.store.output(float %r4, i32 0) + %r4 = fadd float %r3, %r2 + %vec = insertelement <4 x float> undef, float %r4, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @fabs(float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll index be25c9c..d7127f4 100644 --- a/test/CodeGen/R600/fmax.ll +++ b/test/CodeGen/R600/fmax.ll @@ -2,15 +2,16 @@ ;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp oge float %r0, %r1 %r3 = select i1 %r2, float %r0, float %r1 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll index 5e34b7c..defa8c0 100644 --- a/test/CodeGen/R600/fmin.ll +++ b/test/CodeGen/R600/fmin.ll @@ -2,15 +2,16 @@ ;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = fcmp uge float %r0, %r1 %r3 = select i1 %r2, float %r1, float %r0 - call void @llvm.AMDGPU.store.output(float %r3, i32 0) + %vec = insertelement <4 x float> undef, float %r3, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll index cc0732b..83b56a5 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll @@ -2,16 +2,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.AMDGPU.mul(float ,float ) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } \ No newline at end of file diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll index 8fb4559..aaf2305 100644 --- a/test/CodeGen/R600/llvm.cos.ll +++ b/test/CodeGen/R600/llvm.cos.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.cos.f32(float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.cos.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 0f51cf4..b587d2b 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -4,16 +4,16 @@ ;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}} ;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) - %r1 = call float @llvm.R600.load.input(i32 1) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 %r2 = call float @llvm.pow.f32( float %r0, float %r1) - call void @llvm.AMDGPU.store.output(float %r2, i32 0) + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) - declare float @llvm.pow.f32(float ,float ) readonly +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll index e94c2ba..9eb9983 100644 --- a/test/CodeGen/R600/llvm.sin.ll +++ b/test/CodeGen/R600/llvm.sin.ll @@ -5,15 +5,15 @@ ;CHECK: ADD * ;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = call float @llvm.sin.f32( float %r0) - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } declare float @llvm.sin.f32(float) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare float @llvm.R600.load.input(i32) readnone - -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll index aff2a6e..ca86d0e 100644 --- a/test/CodeGen/R600/load-input-fold.ll +++ b/test/CodeGen/R600/load-input-fold.ll @@ -1,20 +1,20 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 %12 = load <4 x float> addrspace(8)* null %13 = extractelement <4 x float> %12, i32 0 %14 = fmul float %0, %13 @@ -96,9 +96,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll index c31b7c0..65a6d2b 100644 --- a/test/CodeGen/R600/max-literals.ll +++ b/test/CodeGen/R600/max-literals.ll @@ -3,13 +3,13 @@ ; CHECK: @main ; CHECK: ADD * -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -32,13 +32,13 @@ main_body: ; CHECK: @main ; CHECK-NOT: ADD * -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 %5 = fadd float %0, 2.0 %6 = fadd float %1, 3.0 %7 = fadd float %2, 4.0 @@ -59,7 +59,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll index 03fc204..e5615b9 100644 --- a/test/CodeGen/R600/pv-packing.ll +++ b/test/CodeGen/R600/pv-packing.ll @@ -3,17 +3,17 @@ ;CHECK: DOT4 T{{[0-9]\.X}} ;CHECK: MULADD_IEEE * T{{[0-9]\.W}} -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 8) - %4 = call float @llvm.R600.load.input(i32 9) - %5 = call float @llvm.R600.load.input(i32 10) - %6 = call float @llvm.R600.load.input(i32 12) - %7 = call float @llvm.R600.load.input(i32 13) - %8 = call float @llvm.R600.load.input(i32 14) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg2, i32 0 + %4 = extractelement <4 x float> %reg2, i32 1 + %5 = extractelement <4 x float> %reg2, i32 2 + %6 = extractelement <4 x float> %reg3, i32 0 + %7 = extractelement <4 x float> %reg3, i32 1 + %8 = extractelement <4 x float> %reg3, i32 2 %9 = load <4 x float> addrspace(8)* null %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9) @@ -36,9 +36,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 @@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly } diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll index 6d9396c..5a930b2 100644 --- a/test/CodeGen/R600/pv.ll +++ b/test/CodeGen/R600/pv.ll @@ -3,36 +3,36 @@ ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED) ;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) - %4 = call float @llvm.R600.load.input(i32 8) - %5 = call float @llvm.R600.load.input(i32 9) - %6 = call float @llvm.R600.load.input(i32 10) - %7 = call float @llvm.R600.load.input(i32 11) - %8 = call float @llvm.R600.load.input(i32 12) - %9 = call float @llvm.R600.load.input(i32 13) - %10 = call float @llvm.R600.load.input(i32 14) - %11 = call float @llvm.R600.load.input(i32 15) - %12 = call float @llvm.R600.load.input(i32 16) - %13 = call float @llvm.R600.load.input(i32 17) - %14 = call float @llvm.R600.load.input(i32 18) - %15 = call float @llvm.R600.load.input(i32 19) - %16 = call float @llvm.R600.load.input(i32 20) - %17 = call float @llvm.R600.load.input(i32 21) - %18 = call float @llvm.R600.load.input(i32 22) - %19 = call float @llvm.R600.load.input(i32 23) - %20 = call float @llvm.R600.load.input(i32 24) - %21 = call float @llvm.R600.load.input(i32 25) - %22 = call float @llvm.R600.load.input(i32 26) - %23 = call float @llvm.R600.load.input(i32 27) - %24 = call float @llvm.R600.load.input(i32 28) - %25 = call float @llvm.R600.load.input(i32 29) - %26 = call float @llvm.R600.load.input(i32 30) - %27 = call float @llvm.R600.load.input(i32 31) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 + %4 = extractelement <4 x float> %reg2, i32 0 + %5 = extractelement <4 x float> %reg2, i32 1 + %6 = extractelement <4 x float> %reg2, i32 2 + %7 = extractelement <4 x float> %reg2, i32 3 + %8 = extractelement <4 x float> %reg3, i32 0 + %9 = extractelement <4 x float> %reg3, i32 1 + %10 = extractelement <4 x float> %reg3, i32 2 + %11 = extractelement <4 x float> %reg3, i32 3 + %12 = extractelement <4 x float> %reg4, i32 0 + %13 = extractelement <4 x float> %reg4, i32 1 + %14 = extractelement <4 x float> %reg4, i32 2 + %15 = extractelement <4 x float> %reg4, i32 3 + %16 = extractelement <4 x float> %reg5, i32 0 + %17 = extractelement <4 x float> %reg5, i32 1 + %18 = extractelement <4 x float> %reg5, i32 2 + %19 = extractelement <4 x float> %reg5, i32 3 + %20 = extractelement <4 x float> %reg6, i32 0 + %21 = extractelement <4 x float> %reg6, i32 1 + %22 = extractelement <4 x float> %reg6, i32 2 + %23 = extractelement <4 x float> %reg6, i32 3 + %24 = extractelement <4 x float> %reg7, i32 0 + %25 = extractelement <4 x float> %reg7, i32 1 + %26 = extractelement <4 x float> %reg7, i32 2 + %27 = extractelement <4 x float> %reg7, i32 3 %28 = load <4 x float> addrspace(8)* null %29 = extractelement <4 x float> %28, i32 0 %30 = fmul float %0, %29 @@ -219,9 +219,6 @@ main_body: } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll index 6ef3c31..b760c88 100644 --- a/test/CodeGen/R600/r600-encoding.ll +++ b/test/CodeGen/R600/r600-encoding.ll @@ -10,15 +10,16 @@ ; R600-CHECK: @test ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}] -define void @test() { +define void @test(<4 x float> inreg %reg0) #0 { entry: - %0 = call float @llvm.R600.load.input(i32 0) - %1 = call float @llvm.R600.load.input(i32 1) - %2 = fmul float %0, %1 - call void @llvm.AMDGPU.store.output(float %2, i32 0) + %r0 = extractelement <4 x float> %reg0, i32 0 + %r1 = extractelement <4 x float> %reg0, i32 1 + %r2 = fmul float %r0, %r1 + %vec = insertelement <4 x float> undef, float %r2, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll index 78c703b..73bc063 100644 --- a/test/CodeGen/R600/r600-export-fix.ll +++ b/test/CodeGen/R600/r600-export-fix.ll @@ -10,12 +10,12 @@ ;CHECK: EXPORT T{{[0-9]}}.0000 -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %5, %0 @@ -137,10 +137,6 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll index 895ad5e..6dee3ef 100644 --- a/test/CodeGen/R600/r600cfg.ll +++ b/test/CodeGen/R600/r600cfg.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood ;REQUIRES: asserts -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = bitcast float %0 to i32 %5 = icmp eq i32 %4, 0 %6 = sext i1 %5 to i32 @@ -113,12 +113,8 @@ ENDIF48: ; preds = %LOOP47 br label %LOOP47 } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll index 2783929..b4ac47a 100644 --- a/test/CodeGen/R600/reciprocal.ll +++ b/test/CodeGen/R600/reciprocal.ll @@ -2,15 +2,14 @@ ;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define void @test() { - %r0 = call float @llvm.R600.load.input(i32 0) +define void @test(<4 x float> inreg %reg0) #0 { + %r0 = extractelement <4 x float> %reg0, i32 0 %r1 = fdiv float 1.0, %r0 - call void @llvm.AMDGPU.store.output(float %r1, i32 0) + %vec = insertelement <4 x float> undef, float %r1, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0) ret void } -declare float @llvm.R600.load.input(i32) readnone +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -declare void @llvm.AMDGPU.store.output(float, i32) - -declare float @llvm.AMDGPU.rcp(float ) readnone +attributes #0 = { "ShaderType"="0" } diff --git a/test/CodeGen/R600/rv7x0_count3.ll b/test/CodeGen/R600/rv7x0_count3.ll index 474d6ba..c3fd923 100644 --- a/test/CodeGen/R600/rv7x0_count3.ll +++ b/test/CodeGen/R600/rv7x0_count3.ll @@ -1,12 +1,12 @@ ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s -; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80] +; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80] -define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { - %1 = call float @llvm.R600.load.input(i32 4) - %2 = call float @llvm.R600.load.input(i32 5) - %3 = call float @llvm.R600.load.input(i32 6) - %4 = call float @llvm.R600.load.input(i32 7) +define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { + %1 = extractelement <4 x float> %reg1, i32 0 + %2 = extractelement <4 x float> %reg1, i32 1 + %3 = extractelement <4 x float> %reg1, i32 2 + %4 = extractelement <4 x float> %reg1, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #1 = { readnone } + +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll index 2a66094..11e8f51 100644 --- a/test/CodeGen/R600/schedule-fs-loop-nested-if.ll +++ b/test/CodeGen/R600/schedule-fs-loop-nested-if.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 { main_body: - %0 = call float @llvm.R600.interp.input(i32 0, i32 0) - %1 = call float @llvm.R600.interp.input(i32 1, i32 0) - %2 = call float @llvm.R600.interp.input(i32 2, i32 0) - %3 = call float @llvm.R600.interp.input(i32 3, i32 0) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %1, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -74,10 +74,9 @@ ELSE17: ; preds = %ELSE br label %ENDIF } -declare float @llvm.R600.interp.input(i32, i32) #0 - declare float @llvm.AMDIL.clamp.(float, float, float) #0 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { readnone } +attributes #1 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll index 44b7c2f..33b20d3 100644 --- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll +++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll @@ -1,12 +1,12 @@ ;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched ;REQUIRES: asserts -define void @main() { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = fcmp ult float %0, 0.000000e+00 %5 = select i1 %4, float 1.000000e+00, float 0.000000e+00 %6 = fsub float -0.000000e+00, %5 @@ -127,8 +127,6 @@ ENDIF19: ; preds = %ENDIF16 br label %LOOP } -declare float @llvm.R600.load.input(i32) #0 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -attributes #0 = { readnone } +attributes #0 = { "ShaderType"="1" } diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll index c49b5f4..0484fc9 100644 --- a/test/CodeGen/R600/shared-op-cycle.ll +++ b/test/CodeGen/R600/shared-op-cycle.ll @@ -4,10 +4,10 @@ ; CHECK: MULADD_IEEE * ; CHECK-NOT: MULADD_IEEE * -define void @main() { - %w0 = call float @llvm.R600.load.input(i32 3) - %w1 = call float @llvm.R600.load.input(i32 7) - %w2 = call float @llvm.R600.load.input(i32 11) +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 { + %w0 = extractelement <4 x float> %reg0, i32 3 + %w1 = extractelement <4 x float> %reg1, i32 3 + %w2 = extractelement <4 x float> %reg2, i32 3 %sq0 = fmul float %w0, %w0 %r0 = fadd float %sq0, 2.0 %sq1 = fmul float %w1, %w1 @@ -24,15 +24,9 @@ define void @main() { } ; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - -; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 - declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { readonly } -attributes #3 = { nounwind readonly } \ No newline at end of file +attributes #1 = { readnone } \ No newline at end of file diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll index 9a58f66..16c3f19 100644 --- a/test/CodeGen/R600/swizzle-export.ll +++ b/test/CodeGen/R600/swizzle-export.ll @@ -6,12 +6,12 @@ ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX ;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW -define void @main() #0 { +define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = extractelement <4 x float> %reg1, i32 2 + %3 = extractelement <4 x float> %reg1, i32 3 %4 = load <4 x float> addrspace(8)* null %5 = extractelement <4 x float> %4, i32 1 %6 = load <4 x float> addrspace(8)* null @@ -96,12 +96,12 @@ main_body: ; EG-CHECK: T{{[0-9]+}}.XY__ ; EG-CHECK: T{{[0-9]+}}.YXZ0 -define void @main2() #0 { +define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 { main_body: - %0 = call float @llvm.R600.load.input(i32 4) - %1 = call float @llvm.R600.load.input(i32 5) - %2 = call float @llvm.R600.load.input(i32 6) - %3 = call float @llvm.R600.load.input(i32 7) + %0 = extractelement <4 x float> %reg1, i32 0 + %1 = extractelement <4 x float> %reg1, i32 1 + %2 = fadd float %0, 2.5 + %3 = fmul float %1, 3.5 %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %5 = extractelement <4 x float> %4, i32 0 %6 = call float @llvm.cos.f32(float %5) @@ -109,8 +109,8 @@ main_body: %8 = extractelement <4 x float> %7, i32 0 %9 = load <4 x float> addrspace(8)* null %10 = extractelement <4 x float> %9, i32 1 - %11 = insertelement <4 x float> undef, float %0, i32 0 - %12 = insertelement <4 x float> %11, float %1, i32 1 + %11 = insertelement <4 x float> undef, float %2, i32 0 + %12 = insertelement <4 x float> %11, float %3, i32 1 call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1) %13 = insertelement <4 x float> undef, float %6, i32 0 %14 = insertelement <4 x float> %13, float %8, i32 1 @@ -120,14 +120,10 @@ main_body: ret void } -; Function Attrs: readnone -declare float @llvm.R600.load.input(i32) #1 - ; Function Attrs: nounwind readonly -declare float @llvm.cos.f32(float) #2 +declare float @llvm.cos.f32(float) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } -attributes #1 = { readnone } -attributes #2 = { nounwind readonly } +attributes #1 = { nounwind readonly } diff --git a/test/CodeGen/R600/tex-clause-antidep.ll b/test/CodeGen/R600/tex-clause-antidep.ll index 5979609..cbb9c50 100644 --- a/test/CodeGen/R600/tex-clause-antidep.ll +++ b/test/CodeGen/R600/tex-clause-antidep.ll @@ -3,11 +3,11 @@ ;CHECK: TEX ;CHECK-NEXT: ALU -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = insertelement <4 x float> undef, float %1, i32 0 %6 = insertelement <4 x float> %5, float %2, i32 1 %7 = insertelement <4 x float> %6, float %3, i32 2 @@ -19,6 +19,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } \ No newline at end of file diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll index 5d0ecef..789538a 100644 --- a/test/CodeGen/R600/texture-input-merge.ll +++ b/test/CodeGen/R600/texture-input-merge.ll @@ -2,11 +2,11 @@ ;CHECK-NOT: MOV -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) +define void @test(<4 x float> inreg %reg0) #0 { + %1 = extractelement <4 x float> %reg0, i32 0 + %2 = extractelement <4 x float> %reg0, i32 1 + %3 = extractelement <4 x float> %reg0, i32 2 + %4 = extractelement <4 x float> %reg0, i32 3 %5 = fmul float %1, 3.0 %6 = fmul float %2, 3.0 %7 = fmul float %3, 3.0 @@ -25,6 +25,7 @@ define void @test() { ret void } -declare float @llvm.R600.load.input(i32) readnone declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } \ No newline at end of file -- 1.8.3.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev