https://github.com/mariusz-sikora-at-amd updated https://github.com/llvm/llvm-project/pull/130041
>From b62b5fb5137585872f7a10950bc08f28383e3eea Mon Sep 17 00:00:00 2001 From: Ivan Kosarev <ivan.kosa...@amd.com> Date: Mon, 3 Mar 2025 05:34:48 -0500 Subject: [PATCH 1/4] [AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 11 +++ .../AMDGPU/AMDGPUInstructionSelector.cpp | 1 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 21 +++-- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h | 3 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 18 ++-- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 32 ++++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++-- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 ++ .../AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll | 87 +++++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vimage.s | 3 + llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s | 3 + .../Disassembler/AMDGPU/gfx12_dasm_vimage.txt | 3 + 12 files changed, 171 insertions(+), 35 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index a1cfcfa8e67d6..33aca93735074 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2812,6 +2812,17 @@ def int_amdgcn_image_bvh_dual_intersect_ray : llvm_v3f32_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>; +// <vdata>, <ray_origin>, <ray_dir> +// llvm.amdgcn.image.bvh8.intersect.ray <node_ptr>, <ray_extent>, +// <instance_mask>, <ray_origin>, +// <ray_dir>, <offset>, +// <texture_descr> +def int_amdgcn_image_bvh8_intersect_ray : + Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty], + [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty, + llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty], + [IntrReadMem, IntrWillReturn]>; + // llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control> def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">, Intrinsic<[llvm_i32_ty], diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 0f9096d4cec5b..20abe4bc77dd1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4101,6 +4101,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { } case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: + case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY: return selectBVHIntersectRayIntrinsic(I); case AMDGPU::G_SBFX: case AMDGPU::G_UBFX: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 03d75d006c982..761770ebe1c20 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -7183,8 +7183,8 @@ bool AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic( return true; } -bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI, - MachineIRBuilder &B) const { +bool AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic( + MachineInstr &MI, MachineIRBuilder &B) const { const LLT S32 = LLT::scalar(32); const LLT V2S32 = LLT::fixed_vector(2, 32); @@ -7207,17 +7207,21 @@ bool AMDGPULegalizerInfo::legalizeBVHDualIntrinsic(MachineInstr &MI, return false; } + bool IsBVH8 = cast<GIntrinsic>(MI).getIntrinsicID() == + Intrinsic::amdgcn_image_bvh8_intersect_ray; const unsigned NumVDataDwords = 10; - const unsigned NumVAddrDwords = 12; - int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY, - AMDGPU::MIMGEncGfx12, NumVDataDwords, - NumVAddrDwords); + const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12; + int Opcode = AMDGPU::getMIMGOpcode( + IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY + : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY, + AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords); assert(Opcode != -1); auto RayExtentInstanceMaskVec = B.buildMergeLikeInstr( V2S32, {RayExtent, B.buildAnyExt(S32, InstanceMask)}); - B.buildInstr(AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY) + B.buildInstr(IsBVH8 ? AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY + : AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY) .addDef(DstReg) .addDef(DstOrigin) .addDef(DstDir) @@ -7583,7 +7587,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, case Intrinsic::amdgcn_image_bvh_intersect_ray: return legalizeBVHIntersectRayIntrinsic(MI, B); case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: - return legalizeBVHDualIntrinsic(MI, B); + case Intrinsic::amdgcn_image_bvh8_intersect_ray: + return legalizeBVHDualOrBVH8IntersectRayIntrinsic(MI, B); case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16: case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16: case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index aba1f55330913..1f4e02b0d600a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -208,7 +208,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeBVHIntersectRayIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; - bool legalizeBVHDualIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeBVHDualOrBVH8IntersectRayIntrinsic(MachineInstr &MI, + MachineIRBuilder &B) const; bool legalizeLaneOp(LegalizerHelper &Helper, MachineInstr &MI, Intrinsic::ID IID) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index ca74e45338744..cde55494ec3d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3218,9 +3218,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl( return; } case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: + case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY: case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: { - bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY; - unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier + bool IsDualOrBVH8 = + MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY || + MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY; + unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods; applyDefaultMapping(OpdMapper); executeInWaterfallLoop(B, MI, {LastRegOpIdx}); @@ -5014,13 +5017,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg); } case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY: + case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY: case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY: { - bool IsDual = MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY; - unsigned NumMods = IsDual ? 0 : 1; // Has A16 modifier + bool IsDualOrBVH8 = + MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY || + MI.getOpcode() == AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY; + unsigned NumMods = IsDualOrBVH8 ? 0 : 1; // Has A16 modifier unsigned LastRegOpIdx = MI.getNumExplicitOperands() - 1 - NumMods; unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize); - if (IsDual) { + if (IsDualOrBVH8) { OpdsMapping[1] = AMDGPU::getValueMapping( AMDGPU::VGPRRegBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()); @@ -5038,7 +5044,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } else { // NSA form - unsigned FirstSrcOpIdx = IsDual ? 4 : 2; + unsigned FirstSrcOpIdx = IsDualOrBVH8 ? 4 : 2; for (unsigned I = FirstSrcOpIdx; I < LastRegOpIdx; ++I) { unsigned Size = MRI.getType(MI.getOperand(I).getReg()).getSizeInBits(); OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 63af4b2e351fb..2b9bc2b89a825 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1509,18 +1509,19 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0, multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample> : MIMG_Gather<op, sample, 1>; -class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual> { - int num_addrs = !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11)); +class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { + int num_addrs = !if(isBVH8, 11, !if(Is64, !if(IsA16, 9, 12), !if(IsA16, 8, 11))); RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass; int VAddrDwords = !srl(RegClass.Size, 5); int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); list<RegisterClass> GFX11PlusAddrTypes = - !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64], + !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32], + !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64], !if(IsA16, [node_ptr_type, VGPR_32, VReg_96, VReg_96], - [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96])); + [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]))); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC> @@ -1554,26 +1555,26 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs, } class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs, - bit isDual, + bit isDual, bit isBVH8, list<RegisterClass> addr_types> - : VIMAGE_gfx12<op.GFX12, !if(isDual, + : VIMAGE_gfx12<op.GFX12, !if(!or(isDual, isBVH8), (outs VReg_320:$vdata, VReg_96:$ray_origin_out, VReg_96:$ray_dir_out), (outs VReg_128:$vdata)), num_addrs, "GFX12", addr_types> { - let Constraints = !if(isDual, + let Constraints = !if(!or(isDual, isBVH8), "$ray_origin_out = $vaddr2, $ray_dir_out = $vaddr3", ""); let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc), - !if(isDual, (ins), (ins A16:$a16))); + !if(!or(isDual, isBVH8), (ins), (ins A16:$a16))); let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc"# - !if(isDual, "", "$a16"); - let SchedRW = !if(isDual, + !if(!or(isDual, isBVH8), "", "$a16"); + let SchedRW = !if(!or(isDual, isBVH8), [WriteVMEM, WriteVMEM, WriteVMEM], [WriteVMEM]); } multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16, - bit isDual> { - defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual>; + bit isDual, bit isBVH8 = 0> { + defvar info = MIMG_IntersectRay_Helper<Is64, IsA16, isDual, isBVH8>; def "" : MIMGBaseOpcode { let BVH = 1; let A16 = IsA16; @@ -1611,8 +1612,9 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16, } } def _gfx12 : VIMAGE_IntersectRay_gfx12<op, opcode, info.GFX11PlusNSAAddrs, - isDual, info.GFX11PlusAddrTypes> { - let VDataDwords = !if(isDual, 10, 4); + isDual, isBVH8, + info.GFX11PlusAddrTypes> { + let VDataDwords = !if(!or(isDual, isBVH8), 10, 4); let VAddrDwords = info.num_addrs; } } @@ -1791,11 +1793,13 @@ defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0x1a, 0x1a, 0xe7> } // End OtherPredicates = [HasImageInsts, HasGFX10_AEncoding] defm IMAGE_BVH_DUAL_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x80, MIMG.NOP, MIMG.NOP>, "image_bvh_dual_intersect_ray", 1, 0, 1>; +defm IMAGE_BVH8_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0x81, MIMG.NOP, MIMG.NOP>, "image_bvh8_intersect_ray", 1, 0, 0, 1>; let SubtargetPredicate = isGFX12Plus in { def : AMDGPUMnemonicAlias<"bvh_intersect_ray", "image_bvh_intersect_ray">; def : AMDGPUMnemonicAlias<"bvh64_intersect_ray", "image_bvh64_intersect_ray">; def : AMDGPUMnemonicAlias<"bvh_dual_intersect_ray", "image_bvh_dual_intersect_ray">; + def : AMDGPUMnemonicAlias<"bvh8_intersect_ray", "image_bvh8_intersect_ray">; } } // End let OtherPredicates = [HasImageInsts] diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 5a9887101bfa2..cc0ea2a522f7a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1387,7 +1387,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: - case Intrinsic::amdgcn_image_bvh_intersect_ray: { + case Intrinsic::amdgcn_image_bvh_intersect_ray: + case Intrinsic::amdgcn_image_bvh8_intersect_ray: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::getVT(IntrID == Intrinsic::amdgcn_image_bvh_intersect_ray @@ -9440,7 +9441,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, Op->getVTList(), Ops, VT, M->getMemOperand()); } - case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: { + case Intrinsic::amdgcn_image_bvh_dual_intersect_ray: + case Intrinsic::amdgcn_image_bvh8_intersect_ray: { MemSDNode *M = cast<MemSDNode>(Op); SDValue NodePtr = M->getOperand(2); SDValue RayExtent = M->getOperand(3); @@ -9458,11 +9460,13 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, return SDValue(); } + bool IsBVH8 = IntrID == Intrinsic::amdgcn_image_bvh8_intersect_ray; const unsigned NumVDataDwords = 10; - const unsigned NumVAddrDwords = 12; - int Opcode = AMDGPU::getMIMGOpcode(AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY, - AMDGPU::MIMGEncGfx12, NumVDataDwords, - NumVAddrDwords); + const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12; + int Opcode = AMDGPU::getMIMGOpcode( + IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY + : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY, + AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords); assert(Opcode != -1); SmallVector<SDValue, 7> Ops; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b2315bc80f0a4..47a78bbcd7aee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -4376,6 +4376,14 @@ def G_AMDGPU_BVH_DUAL_INTERSECT_RAY : AMDGPUGenericInstruction { let mayStore = 0; } +def G_AMDGPU_BVH8_INTERSECT_RAY : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst, type1:$ray_origin, type1:$ray_dir); + let InOperandList = (ins unknown:$opcode, variable_ops); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; +} + // Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop // if necessary. def G_SI_CALL : AMDGPUGenericInstruction { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll new file mode 100644 index 0000000000000..ff65d5d96cb2c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s + +declare {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64, float, i8, <3 x float>, <3 x float>, i32, <4 x i32>) + +define amdgpu_ps <10 x float> @image_bvh8_intersect_ray(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { +; GFX12-SDAG-LABEL: image_bvh8_intersect_ray: +; GFX12-SDAG: ; %bb.0: ; %main_body +; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 0 +; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off +; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: image_bvh8_intersect_ray: +; GFX12-GISEL: ; %bb.0: ; %main_body +; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 0 +; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3] +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off +; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off +; GFX12-GISEL-NEXT: ; return to shader part epilog +main_body: + %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 + %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 + %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 + %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 + %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 + %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 + %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 0, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr) + %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 + %r = bitcast <10 x i32> %a to <10 x float> + %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 + store <3 x float> %o, ptr addrspace(1) %origin + %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 + store <3 x float> %d, ptr addrspace(1) %dir + ret <10 x float> %r +} + +define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_1(i64 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, i32 %offset, <4 x i32> inreg %tdescr, ptr addrspace(1) %origin, ptr addrspace(1) %dir) { +; GFX12-SDAG-LABEL: image_bvh8_intersect_ray_1: +; GFX12-SDAG: ; %bb.0: ; %main_body +; GFX12-SDAG-NEXT: v_dual_mov_b32 v21, v8 :: v_dual_mov_b32 v20, v7 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v19, v6 :: v_dual_mov_b32 v18, v5 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v17, v4 :: v_dual_mov_b32 v16, v3 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v3, 1 +; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 +; GFX12-SDAG-NEXT: global_store_b96 v[10:11], v[16:18], off +; GFX12-SDAG-NEXT: global_store_b96 v[12:13], v[19:21], off +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: image_bvh8_intersect_ray_1: +; GFX12-GISEL: ; %bb.0: ; %main_body +; GFX12-GISEL-NEXT: v_dual_mov_b32 v14, v3 :: v_dual_mov_b32 v15, v4 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v16, v5 :: v_dual_mov_b32 v17, v6 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v18, v7 :: v_dual_mov_b32 v19, v8 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v3, 1 +; GFX12-GISEL-NEXT: image_bvh8_intersect_ray v[0:9], [v[0:1], v[2:3], v[14:16], v[17:19], v9], s[0:3] +; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 +; GFX12-GISEL-NEXT: global_store_b96 v[10:11], v[14:16], off +; GFX12-GISEL-NEXT: global_store_b96 v[12:13], v[17:19], off +; GFX12-GISEL-NEXT: ; return to shader part epilog +main_body: + %ray_origin0 = insertelement <3 x float> poison, float %ray_origin_x, i32 0 + %ray_origin1 = insertelement <3 x float> %ray_origin0, float %ray_origin_y, i32 1 + %ray_origin = insertelement <3 x float> %ray_origin1, float %ray_origin_z, i32 2 + %ray_dir0 = insertelement <3 x float> poison, float %ray_dir_x, i32 0 + %ray_dir1 = insertelement <3 x float> %ray_dir0, float %ray_dir_y, i32 1 + %ray_dir = insertelement <3 x float> %ray_dir1, float %ray_dir_z, i32 2 + %v = call {<10 x i32>, <3 x float>, <3 x float>} @llvm.amdgcn.image.bvh8.intersect.ray(i64 %node_ptr, float %ray_extent, i8 1, <3 x float> %ray_origin, <3 x float> %ray_dir, i32 %offset, <4 x i32> %tdescr) + %a = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 0 + %r = bitcast <10 x i32> %a to <10 x float> + %o = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 1 + store <3 x float> %o, ptr addrspace(1) %origin + %d = extractvalue {<10 x i32>, <3 x float>, <3 x float>} %v, 2 + store <3 x float> %d, ptr addrspace(1) %dir + ret <10 x float> %r +} diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s index 3ca8f4308a0ee..c99123bbe1ee0 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s @@ -1069,6 +1069,9 @@ image_bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17]], s[4:7] a16 image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] // GFX12: encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] +image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] +// GFX12: encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] + image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s index 0148ff6cabc93..f693fe3d22d26 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s @@ -44,3 +44,6 @@ bvh64_intersect_ray v[4:7], [v[9:10], v11, v[12:14], v[15:17], v[18:20]], s[4:7] bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] // GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] + +bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] +// GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt index afb7c3c24db17..387bdf5a6018f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt @@ -1069,6 +1069,9 @@ # GFX12: image_bvh_dual_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v[9:10]], s[0:3] ; encoding: [0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] 0x10,0x00,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06 +# GFX12: image_bvh8_intersect_ray v[0:9], [v[0:1], v[11:12], v[3:5], v[6:8], v9], s[0:3] ; encoding: [0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06] +0x10,0x40,0xe0,0xd3,0x00,0x00,0x00,0x09,0x00,0x0b,0x03,0x06 + # GFX12: image_get_resinfo v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00] 0x00,0xc0,0x45,0xd0,0x04,0xc0,0x00,0x00,0x20,0x00,0x00,0x00 >From 980c2032d251263d3bbb33083393bb8be55b707f Mon Sep 17 00:00:00 2001 From: Mariusz Sikora <mariusz.sik...@amd.com> Date: Sun, 16 Mar 2025 15:59:16 -0400 Subject: [PATCH 2/4] Update Intrinsic properties --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 33aca93735074..643a28e63cadc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2821,7 +2821,7 @@ def int_amdgcn_image_bvh8_intersect_ray : Intrinsic<[llvm_v10i32_ty, llvm_v3f32_ty, llvm_v3f32_ty], [llvm_i64_ty, llvm_float_ty, llvm_i8_ty, llvm_v3f32_ty, llvm_v3f32_ty, llvm_i32_ty, llvm_v4i32_ty], - [IntrReadMem, IntrWillReturn]>; + [IntrReadMem, IntrWillReturn, IntrNoCallback, IntrNoFree]>; // llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control> def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">, >From c88a7c2b902d2bbc654a2835a01b3c1cc7778e78 Mon Sep 17 00:00:00 2001 From: Mariusz Sikora <mariusz.sik...@amd.com> Date: Sun, 16 Mar 2025 16:00:27 -0400 Subject: [PATCH 3/4] Use !cond for addr types --- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 2b9bc2b89a825..9064af6807682 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1517,11 +1517,10 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); list<RegisterClass> GFX11PlusAddrTypes = - !if(isBVH8, [VReg_64, VReg_64, VReg_96, VReg_96, VGPR_32], - !if(isDual, [VReg_64, VReg_64, VReg_96, VReg_96, VReg_64], - !if(IsA16, - [node_ptr_type, VGPR_32, VReg_96, VReg_96], - [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]))); + !cond(!eq(isBVH8, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], + !eq(isDual, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], + !eq(IsA16, 0) : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96], + !eq(IsA16, 1) : [node_ptr_type, VGPR_32, VReg_96, VReg_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC> >From 2c08a9c182c84d8308003b37a80b5d4c54549ed5 Mon Sep 17 00:00:00 2001 From: Mariusz Sikora <mariusz.sik...@amd.com> Date: Tue, 18 Mar 2025 06:52:40 -0400 Subject: [PATCH 4/4] Update AddrType cond --- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 9064af6807682..fd19ebf8d069f 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1517,10 +1517,10 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16, bit isDual, bit isBVH8> { int GFX11PlusNSAAddrs = !if(IsA16, 4, 5); RegisterClass node_ptr_type = !if(Is64, VReg_64, VGPR_32); list<RegisterClass> GFX11PlusAddrTypes = - !cond(!eq(isBVH8, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], - !eq(isDual, 1) : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], - !eq(IsA16, 0) : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96], - !eq(IsA16, 1) : [node_ptr_type, VGPR_32, VReg_96, VReg_96]); + !cond(isBVH8 : [node_ptr_type, VReg_64, VReg_96, VReg_96, VGPR_32], + isDual : [node_ptr_type, VReg_64, VReg_96, VReg_96, VReg_64], + IsA16 : [node_ptr_type, VGPR_32, VReg_96, VReg_96], + true : [node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]); } class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC> _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits