llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-clang Author: None (dyung) <details> <summary>Changes</summary> This reverts commits 677cc15e0ff2e0e6aa30538eb187990a6a8f53c0 and 78bc1b64a6dc3fb6191355a5e1b502be8b3668e7. The test CodeGenHIP/default-attributes.hip is failing on multiple bots even after the attempted fix including the following: - https://lab.llvm.org/buildbot/#/builders/3/builds/1473 - https://lab.llvm.org/buildbot/#/builders/65/builds/1380 - https://lab.llvm.org/buildbot/#/builders/161/builds/595 - https://lab.llvm.org/buildbot/#/builders/154/builds/1372 - https://lab.llvm.org/buildbot/#/builders/133/builds/1547 - https://lab.llvm.org/buildbot/#/builders/81/builds/755 - https://lab.llvm.org/buildbot/#/builders/40/builds/570 - https://lab.llvm.org/buildbot/#/builders/13/builds/748 - https://lab.llvm.org/buildbot/#/builders/12/builds/1845 - https://lab.llvm.org/buildbot/#/builders/11/builds/1695 - https://lab.llvm.org/buildbot/#/builders/190/builds/1829 - https://lab.llvm.org/buildbot/#/builders/193/builds/962 - https://lab.llvm.org/buildbot/#/builders/23/builds/991 - https://lab.llvm.org/buildbot/#/builders/144/builds/2256 - https://lab.llvm.org/buildbot/#/builders/46/builds/1614 These bots have been broken for a day, so reverting to get everything back to green. --- Patch is 16.75 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98851.diff 562 Files Affected: - (modified) clang/test/CodeGenHIP/default-attributes.hip (+16-35) - (modified) llvm/docs/ReleaseNotes.rst (-4) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-8) - (modified) llvm/lib/Target/AMDGPU/SIFrameLowering.cpp (-6) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll (+36-116) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll (+36-116) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll (+259-278) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll (+278-297) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll (+38-38) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll (+15-15) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+138-138) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll (+3-5) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll (+88-104) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+14-66) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll (+362-483) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+56-56) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll (+20-20) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll (-3) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll (+3-5) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll (+236-236) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fence.ll (-120) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll (+148-121) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll (+19-19) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll (+233-295) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll (+55-58) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll (+3-4) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll (+17-17) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll (+2-13) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll (+49-49) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll (+43-43) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll (+63-63) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll (+38-45) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll (+4-5) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll (+9-11) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll (+40-42) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll (+48-66) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll (+23-23) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+369-369) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll (+17-18) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll (+86-86) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll (+86-86) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+243-243) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll (+98-98) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll (+30-30) - (modified) llvm/test/CodeGen/AMDGPU/add.ll (+159-168) - (modified) llvm/test/CodeGen/AMDGPU/add.v2i16.ll (+102-122) - (modified) llvm/test/CodeGen/AMDGPU/addrspacecast.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll (+64-65) - (modified) llvm/test/CodeGen/AMDGPU/agpr-register-count.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/always-uniform.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/amd.endpgm.ll (+17-17) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+1435-1442) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/amdpal-elf.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/anyext.ll (+19-19) - (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll (+702-808) - (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll (+592-701) - (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll (+1059-1160) - (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll (+554-652) - (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll (+626-722) - (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/attr-amdgpu-waves-per-eu.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/attributor-noopt.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+288-288) - (modified) llvm/test/CodeGen/AMDGPU/bfe-combine.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/bfe-patterns.ll (+28-28) - (modified) llvm/test/CodeGen/AMDGPU/bfi_int.ll (+121-115) - (modified) llvm/test/CodeGen/AMDGPU/bfi_nested.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/bfm.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/bitreverse.ll (+73-89) - (modified) llvm/test/CodeGen/AMDGPU/br_cc.f16.ll (+32-32) - (modified) llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/branch-relaxation.ll (+46-46) - (modified) llvm/test/CodeGen/AMDGPU/bswap.ll (+21-21) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll (+9434-5415) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll (+2787-2371) - (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll (+2787-2371) - (modified) llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+37-37) - (modified) llvm/test/CodeGen/AMDGPU/call-constexpr.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/call-reqd-group-size.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (+1-3) - (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+77-135) - (modified) llvm/test/CodeGen/AMDGPU/carryout-selection.ll (+382-394) - (modified) llvm/test/CodeGen/AMDGPU/cc-update.ll (+9-9) - (modified) llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/clamp-modifier.ll (+101-133) - (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+272-455) - (modified) llvm/test/CodeGen/AMDGPU/cluster_stores.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/code-object-v3.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+24-3) - (modified) llvm/test/CodeGen/AMDGPU/collapse-endcf.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll (+113-113) - (modified) llvm/test/CodeGen/AMDGPU/combine-reg-or-const.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll (+76-74) - (modified) llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/copy_to_scc.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+126-144) - (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+136-136) - (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (+44-44) - (modified) llvm/test/CodeGen/AMDGPU/ctpop64.ll (+62-62) - (modified) llvm/test/CodeGen/AMDGPU/cttz.ll (+84-84) - (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+83-83) - (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+110-141) - (modified) llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll (+99-102) - (modified) llvm/test/CodeGen/AMDGPU/dagcomb-extract-vec-elt-different-sizes.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/dagcombine-setcc-select.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll (+76-76) - (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/ds-alignment.ll (+45-45) - (modified) llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/ds-combine-with-dependence.ll (+4-6) - (modified) llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll (+34-46) - (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+126-117) - (modified) llvm/test/CodeGen/AMDGPU/ds_write2.ll (+75-75) - (modified) llvm/test/CodeGen/AMDGPU/early-inline.ll (-1) - (modified) llvm/test/CodeGen/AMDGPU/elf-notes.ll (+1-3) - (modified) llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll (+101-112) - (modified) llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/fabs.f16.ll (+79-88) - (modified) llvm/test/CodeGen/AMDGPU/fabs.ll (+56-56) - (modified) llvm/test/CodeGen/AMDGPU/fadd.f16.ll (+54-78) - (modified) llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll (+245-224) - (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.ll (+238-274) - (modified) llvm/test/CodeGen/AMDGPU/fcmp.f16.ll (+466-466) - (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll (+290-311) - (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll (+168-169) - (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll (+219-218) - (modified) llvm/test/CodeGen/AMDGPU/fdiv.f16.ll (+127-155) - (modified) llvm/test/CodeGen/AMDGPU/fdiv.ll (+142-148) - (modified) llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll (+46-46) - (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (+26-26) - (modified) llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll (+182-242) - (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+343-371) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+1653-1653) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+129-129) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll (+552-552) - (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system.ll (+49-49) - (modified) llvm/test/CodeGen/AMDGPU/fma-combine.ll (+377-411) - (modified) llvm/test/CodeGen/AMDGPU/fma.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/fmax3.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/fmaximum.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/fmed3.ll (+348-470) - (modified) llvm/test/CodeGen/AMDGPU/fmin3.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll (+16-16) - (modified) llvm/test/CodeGen/AMDGPU/fminimum.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll (+162-162) - (modified) llvm/test/CodeGen/AMDGPU/fmul.f16.ll (+122-122) - (modified) llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll (+136-220) - (modified) llvm/test/CodeGen/AMDGPU/fnearbyint.ll (+61-62) - (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+36-36) - (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (+89-89) - (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll (+29-29) - (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/fneg.f16.ll (+62-66) - (modified) llvm/test/CodeGen/AMDGPU/fneg.ll (+108-110) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (+6-116) - (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+318-24) - (modified) llvm/test/CodeGen/AMDGPU/fp-classify.ll (+161-161) - (modified) llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll (+127-120) - (modified) llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll (+120-113) - (modified) llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (+630-372) - (modified) llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll (+64-64) - (modified) llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll (+64-64) - (modified) llvm/test/CodeGen/AMDGPU/fp_to_sint.ll (+59-59) - (modified) llvm/test/CodeGen/AMDGPU/fp_to_uint.ll (+52-52) - (modified) llvm/test/CodeGen/AMDGPU/fpext.f16.ll (+68-60) - (modified) llvm/test/CodeGen/AMDGPU/fptosi.f16.ll (+25-25) - (modified) llvm/test/CodeGen/AMDGPU/fptoui.f16.ll (+27-28) - (modified) llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll (+80-80) - (modified) llvm/test/CodeGen/AMDGPU/fptrunc.ll (+94-96) - (modified) llvm/test/CodeGen/AMDGPU/frem.ll (+224-224) - (modified) llvm/test/CodeGen/AMDGPU/fshl.ll (+173-171) - (modified) llvm/test/CodeGen/AMDGPU/fshr.ll (+105-103) - (modified) llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll (+89-92) - (modified) llvm/test/CodeGen/AMDGPU/fsub.f16.ll (+78-78) - (modified) llvm/test/CodeGen/AMDGPU/function-args-inreg.ll (+664-685) - (modified) llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/gds-allocation.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll (+14-17) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd-wrong-subtarget.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll (+14212-7373) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll (+2624-1958) - (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll (+2624-1958) - (modified) llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/global-constant.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/global-i16-load-store.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics.ll (+1351-1351) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll (+122-122) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll (+812-812) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll (+53-53) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+2239-3826) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+1266-2974) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+1266-2974) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+2039-3626) - (modified) llvm/test/CodeGen/AMDGPU/global_smrd.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/half.ll (+183-183) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-agpr-register-count.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll (+24-34) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll (+7-9) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/hsa.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/idiv-licm.ll (+214-220) - (modified) llvm/test/CodeGen/AMDGPU/idot2.ll (+328-347) - (modified) llvm/test/CodeGen/AMDGPU/idot4s.ll (+340-394) - (modified) llvm/test/CodeGen/AMDGPU/idot4u.ll (+638-736) - (modified) llvm/test/CodeGen/AMDGPU/idot8s.ll (+355-367) - (modified) llvm/test/CodeGen/AMDGPU/idot8u.ll (+459-466) - (modified) llvm/test/CodeGen/AMDGPU/imm.ll (+234-266) - (modified) llvm/test/CodeGen/AMDGPU/imm16.ll (+258-272) - (modified) llvm/test/CodeGen/AMDGPU/immv216.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll (+20-20) - (modified) llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll (+39-43) - (modified) llvm/test/CodeGen/AMDGPU/infinite-loop.ll (+10-6) - (modified) llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll (+12-12) - (modified) llvm/test/CodeGen/AMDGPU/inline-attr.ll (+7-10) - (modified) llvm/test/CodeGen/AMDGPU/inlineasm-packed.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+366-368) - (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll (+426-426) - (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2bf16.ll (+285-282) - (modified) llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll (+283-344) - (modified) llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll (+130-130) - (modified) llvm/test/CodeGen/AMDGPU/ipra.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/kernarg-size.ll (+2-4) - (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+465-468) - (modified) llvm/test/CodeGen/AMDGPU/kernel-argument-dag-lowering.ll (+48-50) - (modified) llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll (+3-3) - (modified) llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll (+7-7) - (modified) llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (+192-240) - (modified) llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+12) ``````````diff diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip index ee16ecd134bfe..63572bfd242b9 100644 --- a/clang/test/CodeGenHIP/default-attributes.hip +++ b/clang/test/CodeGenHIP/default-attributes.hip @@ -8,68 +8,49 @@ #define __device__ __attribute__((device)) #define __global__ __attribute__((global)) -//. -// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0 -// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -//. -// OPT: @__hip_cuid_ = addrspace(1) global i8 0 -// OPT: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 -// OPT: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata" -//. -__device__ void extern_func(); - // OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone // OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv // OPTNONE-SAME: () #[[ATTR0:[0-9]+]] { // OPTNONE-NEXT: entry: -// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]] // OPTNONE-NEXT: ret void // -// OPT: Function Attrs: convergent mustprogress nounwind +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) // OPT-LABEL: define {{[^@]+}}@_Z4funcv // OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { // OPT-NEXT: entry: -// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]] // OPT-NEXT: ret void // __device__ void func() { - extern_func(); + } // OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone // OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv -// OPTNONE-SAME: () #[[ATTR2:[0-9]+]] { +// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] { // OPTNONE-NEXT: entry: -// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]] // OPTNONE-NEXT: ret void // -// OPT: Function Attrs: convergent mustprogress norecurse nounwind +// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) // OPT-LABEL: define {{[^@]+}}@_Z6kernelv -// OPT-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] { +// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] { // OPT-NEXT: entry: -// OPT-NEXT: tail call void @_Z11extern_funcv() #[[ATTR3]] // OPT-NEXT: ret void // __global__ void kernel() { - extern_func(); + } //. -// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind } +// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPT: attributes #[[ATTR0]] = { convergent mustprogress nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// OPT: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "amdgpu-waves-per-eu"="4,10" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" } -// OPT: attributes #[[ATTR2]] = { convergent mustprogress norecurse nounwind "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// OPT: attributes #[[ATTR3]] = { convergent nounwind } +// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } //. -// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} -// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// OPTNONE: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} +// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4} //. -// OPT: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} -// OPT: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} -// OPT: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// OPT: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} +// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"} +// OPT: !2 = !{i32 1, !"wchar_size", i32 4} //. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 55b3b486d705d..1dd7fce2334c9 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -139,10 +139,6 @@ Changes to the AMDGPU Backend :ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and `fmax` with addrspace(3) instead. -* AMDGPUAttributor is no longer run as part of the codegen pass - pipeline. It is expected to run as part of the middle end - optimizations. - Changes to the ARM Backend -------------------------- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 9ddf0a310ed06..f50a18ccc2188 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -731,14 +731,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); }); - // FIXME: Why is AMDGPUAttributor not in CGSCC? - PB.registerOptimizerLastEPCallback( - [this](ModulePassManager &MPM, OptimizationLevel Level) { - if (Level != OptimizationLevel::O0) { - MPM.addPass(AMDGPUAttributorPass(*this)); - } - }); - PB.registerFullLinkTimeOptimizationLastEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { // We want to support the -lto-partitions=N option as "best effort". @@ -1045,6 +1037,11 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createAMDGPULowerModuleLDSLegacyPass(&TM)); } + // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run + // after their introduction + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(createAMDGPUAttributorLegacyPass()); + if (TM.getOptLevel() > CodeGenOptLevel::None) addPass(createInferAddressSpacesPass()); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 8c951105101d9..97a8ff4486609 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -679,12 +679,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, break; } } - - // FIXME: We can spill incoming arguments and restore at the end of the - // prolog. - if (!ScratchWaveOffsetReg) - report_fatal_error( - "could not find temporary scratch offset register in prolog"); } else { ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll index 359c1e53de99e..a38b6e3263882 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll @@ -6,8 +6,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { ; GFX11-LABEL: s_add_u64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24 -; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_add_u32 s0, s6, s0 @@ -22,8 +22,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { ; GFX12-LABEL: s_add_u64: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24 -; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34 +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 ; GFX12-NEXT: v_mov_b32_e32 v2, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1] @@ -58,8 +58,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { ; GFX11-LABEL: s_sub_u64: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24 -; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34 +; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 ; GFX11-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_sub_u32 s0, s6, s0 @@ -74,8 +74,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) { ; GFX12-LABEL: s_sub_u64: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24 -; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34 +; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 +; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 ; GFX12-NEXT: v_mov_b32_e32 v2, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll index 0a8e805027c77..9be8620b024eb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll @@ -2026,7 +2026,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s6 +; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s4 ; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f32_e32 v3, v1, v1 @@ -2056,7 +2056,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: v_mov_b32_e32 v1, v0 ; GFX940-NEXT: buffer_load_dword v0, v2, s[0:3], 0 offen ; GFX940-NEXT: s_mov_b64 s[4:5], 0 @@ -2083,7 +2083,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s6 +; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, s4 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_f32_e32 v3, v1, v1 @@ -2114,14 +2114,10 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v2, s18 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s16 -; GFX10-NEXT: s_mov_b32 s7, s17 +; GFX10-NEXT: v_mov_b32_e32 v2, s8 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX10-NEXT: v_max_f32_e32 v3, v1, v1 ; GFX10-NEXT: .LBB12_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -2147,11 +2143,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_mov_b32 s4, s6 -; GFX90A-NEXT: s_mov_b32 s5, s7 -; GFX90A-NEXT: s_mov_b32 s6, s16 -; GFX90A-NEXT: s_mov_b32 s7, s17 -; GFX90A-NEXT: v_mov_b32_e32 v2, s18 +; GFX90A-NEXT: v_mov_b32_e32 v2, s8 ; GFX90A-NEXT: v_mov_b32_e32 v1, v0 ; GFX90A-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 @@ -2177,11 +2169,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: s_mov_b32 s4, s6 -; GFX908-NEXT: s_mov_b32 s5, s7 -; GFX908-NEXT: s_mov_b32 s6, s16 -; GFX908-NEXT: s_mov_b32 s7, s17 -; GFX908-NEXT: v_mov_b32_e32 v2, s18 +; GFX908-NEXT: v_mov_b32_e32 v2, s8 ; GFX908-NEXT: v_mov_b32_e32 v1, v0 ; GFX908-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX908-NEXT: s_mov_b64 s[8:9], 0 @@ -2208,11 +2196,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, s6 -; GFX8-NEXT: s_mov_b32 s5, s7 -; GFX8-NEXT: s_mov_b32 s6, s16 -; GFX8-NEXT: s_mov_b32 s7, s17 -; GFX8-NEXT: v_mov_b32_e32 v2, s18 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 ; GFX8-NEXT: v_mov_b32_e32 v1, v0 ; GFX8-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX8-NEXT: s_mov_b64 s[8:9], 0 @@ -2239,11 +2223,7 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_m ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__amdgpu_no_fine_grained_memory: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s4, s6 -; GFX7-NEXT: s_mov_b32 s5, s7 -; GFX7-NEXT: s_mov_b32 s6, s16 -; GFX7-NEXT: s_mov_b32 s7, s17 -; GFX7-NEXT: v_mov_b32_e32 v2, s18 +; GFX7-NEXT: v_mov_b32_e32 v2, s8 ; GFX7-NEXT: v_mov_b32_e32 v1, v0 ; GFX7-NEXT: buffer_load_dword v0, v2, s[4:7], 0 offen ; GFX7-NEXT: s_mov_b64 s[8:9], 0 @@ -2278,7 +2258,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_max_num_f32 v3, v0, v0 +; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_max_num_f32 v3, v0, v0 ; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen ; GFX12-NEXT: .LBB13_1: ; %atomicrmw.start @@ -2305,7 +2285,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX940-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_max_f32_e32 v3, v0, v0 @@ -2331,7 +2311,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_max_f32 v3, v0, v0 +; GFX11-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_max_f32 v3, v0, v0 ; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen ; GFX11-NEXT: .LBB13_1: ; %atomicrmw.start @@ -2359,14 +2339,10 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v2, s18 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s16 -; GFX10-NEXT: s_mov_b32 s7, s17 +; GFX10-NEXT: v_mov_b32_e32 v2, s8 ; GFX10-NEXT: v_max_f32_e32 v3, v0, v0 -; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX10-NEXT: .LBB13_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -2391,11 +2367,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_mov_b32 s4, s6 -; GFX90A-NEXT: s_mov_b32 s5, s7 -; GFX90A-NEXT: s_mov_b32 s6, s16 -; GFX90A-NEXT: s_mov_b32 s7, s17 -; GFX90A-NEXT: v_mov_b32_e32 v2, s18 +; GFX90A-NEXT: v_mov_b32_e32 v2, s8 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_max_f32_e32 v3, v0, v0 @@ -2420,11 +2392,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: s_mov_b32 s4, s6 -; GFX908-NEXT: s_mov_b32 s5, s7 -; GFX908-NEXT: s_mov_b32 s6, s16 -; GFX908-NEXT: s_mov_b32 s7, s17 -; GFX908-NEXT: v_mov_b32_e32 v2, s18 +; GFX908-NEXT: v_mov_b32_e32 v2, s8 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_max_f32_e32 v3, v0, v0 @@ -2450,11 +2418,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_mov_b32 s4, s6 -; GFX8-NEXT: s_mov_b32 s5, s7 -; GFX8-NEXT: s_mov_b32 s6, s16 -; GFX8-NEXT: s_mov_b32 s7, s17 -; GFX8-NEXT: v_mov_b32_e32 v2, s18 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_mul_f32_e32 v3, 1.0, v0 @@ -2480,11 +2444,7 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_ ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f32__amdgpu_no_fine_grained_memory: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_mov_b32 s4, s6 -; GFX7-NEXT: s_mov_b32 s5, s7 -; GFX7-NEXT: s_mov_b32 s6, s16 -; GFX7-NEXT: s_mov_b32 s7, s17 -; GFX7-NEXT: v_mov_b32_e32 v2, s18 +; GFX7-NEXT: v_mov_b32_e32 v2, s8 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v0 @@ -2518,7 +2478,7 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__amdgpu_no_fine_grained_ ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/98851 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits