| Issue |
56042
|
| Summary |
LLVM ERROR: Cannot select: 0x7fd5e40700e8: f32 = bitcast 0x7fd5e406fb38
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
gerddie
|
Compiling a vertex shader with fp16 support results in a crash with the error message
```
LLVM ERROR: Cannot select: 0x7fd5e40700e8: f32 = bitcast 0x7fd5e406fb38
0x7fd5e406fb38: f32 = splat_vector 0x7fd5e406d398
0x7fd5e406d398: f16 = select nsz arcp 0x7fd5e4076030, ConstantFP:f16<APFloat(15360)>, ConstantFP:f16<APFloat(0)>
0x7fd5e4076030: i1 = setcc nsz arcp 0x7fd5e4133920, ConstantFP:f16<APFloat(15360)>, setoge:ch
0x7fd5e4133920: f16 = bitcast 0x7fd5e4133578
0x7fd5e4133578: i16 = truncate 0x7fd5e40769f0
0x7fd5e40769f0: i32 = bitcast 0x7fd5e406fc08
0x7fd5e406fc08: v2f16 = CVT_PKRTZ_F16_F32 0x7fd5e4076c60, undef:f32
0x7fd5e4076c60: f32,ch = CopyFromReg 0x560db4761a28, Register:f32 %7
0x7fd5e4076d30: f32 = Register %7
0x7fd5e4076920: f32 = undef
0x7fd5e406da80: f16 = ConstantFP<APFloat(15360)>
0x7fd5e406da80: f16 = ConstantFP<APFloat(15360)>
0x7fd5e406f790: f16 = ConstantFP<APFloat(0)>
In function: main
```
LLVM version is 14.0.4, mesa was at 51bdac48465186
The shader leading to the crash follows:
```
radeonsi: Compiling shader 7
Vertex Shader as ESGS LLVM IR:
; ModuleID = 'mesa-shader'
source_filename = "mesa-shader"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--"
@esgs_ring = external addrspace(3) global [0 x i32], align 65536
define amdgpu_gs void @main(float addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %0, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %1, i32 inreg %2, i32 inreg %3, i32 inreg %4, i32 inreg %5, i32 inreg %6, i32 inreg %7, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %8, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %9, float addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %10, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %11, i32 inreg %12, i32 inreg %13, i32 inreg %14, i32 inreg %15, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %16, i32 inreg %17, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %18, i32 inreg %19, <4 x i32> inreg %20, <4 x i32> inreg %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 %26, i32 %27, i32 %28, i32 %29, i32 %30, i32 %31, i32 %32) #0 {
main_body:
%33 = and i32 %3, 251658240
%34 = icmp eq i32 %33, 0
br i1 %34, label %if5020, label %endif5020
if5020: ; preds = %main_body
%35 = lshr i32 %2, 12
%36 = and i32 %35, 511
%37 = lshr i32 %2, 10
%38 = and i32 %37, 2093056
%39 = or i32 %38, %36
call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %39) #1
br label %endif5020
endif5020: ; preds = %if5020, %main_body
%40 = lshr i32 %3, 8
%41 = and i32 %40, 255
%42 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #7, !range !0
%43 = icmp ult i32 %42, %41
br i1 %43, label %if6001, label %endif6001
if6001: ; preds = %endif5020
%44 = and i32 %25, 1792
%45 = mul nuw nsw i32 %44, 525314
%46 = and i32 %45, 537395712
%47 = and i32 %22, 65535
%48 = or i32 %46, %47
%49 = lshr i32 %22, 6
%50 = and i32 %49, 67107840
%51 = or i32 %48, %50
%52 = shl i32 %23, 20
%53 = or i32 %51, %52
%54 = bitcast i32 %53 to float
call void @llvm.amdgcn.exp.f32(i32 20, i32 1, float %54, float undef, float undef, float undef, i1 true, i1 false) #1
br label %endif6001
endif6001: ; preds = %if6001, %endif5020
%55 = and i32 %3, 255
%56 = icmp ult i32 %42, %55
br i1 %56, label %if11500, label %endif6002
if11500: ; preds = %endif6001
%57 = call nsz arcp <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %20, i32 %31, i32 0, i32 0, i32 0) #8
%58 = extractelement <4 x float> %57, i64 0
%59 = extractelement <4 x float> %57, i64 1
%60 = extractelement <4 x float> %57, i64 2
%61 = extractelement <4 x float> %57, i64 3
%62 = call nsz arcp float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %21, i32 %32, i32 0, i32 0, i32 0) #8
%63 = call nsz arcp <2 x half> @llvm.amdgcn.cvt.pkrtz(float %62, float undef) #7
%64 = extractelement <2 x half> %63, i64 0
%65 = fcmp nsz arcp oge half %64, 0xH3C00
%66 = ptrtoint float addrspace(6)* %10 to i32
%67 = insertelement <4 x i32> <i32 poison, i32 32768, i32 144, i32 822177708>, i32 %66, i64 0
%68 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 4, i32 0) #7
%69 = bitcast float %68 to i32
%.not = icmp eq i32 %69, 0
br i1 %.not, label %endif1, label %if1
if1: ; preds = %if11500
%70 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 8, i32 0) #7
%71 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 12, i32 0) #7
%72 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 16, i32 0) #7
%73 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 20, i32 0) #7
%74 = fmul nsz arcp float %61, %73
%75 = call nsz arcp float @llvm.fma.f32(float %60, float %72, float %74) #7
%76 = call nsz arcp float @llvm.fma.f32(float %59, float %71, float %75) #7
%77 = call nsz arcp float @llvm.fma.f32(float %58, float %70, float %76) #7
%78 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 24, i32 0) #7
%79 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 28, i32 0) #7
%80 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 32, i32 0) #7
%81 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 36, i32 0) #7
%82 = fmul nsz arcp float %61, %81
%83 = call nsz arcp float @llvm.fma.f32(float %60, float %80, float %82) #7
%84 = call nsz arcp float @llvm.fma.f32(float %59, float %79, float %83) #7
%85 = call nsz arcp float @llvm.fma.f32(float %58, float %78, float %84) #7
%86 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 40, i32 0) #7
%87 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 44, i32 0) #7
%88 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 48, i32 0) #7
%89 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 52, i32 0) #7
%90 = fmul nsz arcp float %61, %89
%91 = call nsz arcp float @llvm.fma.f32(float %60, float %88, float %90) #7
%92 = call nsz arcp float @llvm.fma.f32(float %59, float %87, float %91) #7
%93 = call nsz arcp float @llvm.fma.f32(float %58, float %86, float %92) #7
%94 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 56, i32 0) #7
%95 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 60, i32 0) #7
%96 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 64, i32 0) #7
%97 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 68, i32 0) #7
%98 = fmul nsz arcp float %61, %97
%99 = call nsz arcp float @llvm.fma.f32(float %60, float %96, float %98) #7
%100 = call nsz arcp float @llvm.fma.f32(float %59, float %95, float %99) #7
%101 = call nsz arcp float @llvm.fma.f32(float %58, float %94, float %100) #7
%102 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 72, i32 0) #7
%103 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 76, i32 0) #7
%104 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 80, i32 0) #7
%105 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 84, i32 0) #7
%106 = fmul nsz arcp float %61, %105
%107 = call nsz arcp float @llvm.fma.f32(float %60, float %104, float %106) #7
%108 = call nsz arcp float @llvm.fma.f32(float %59, float %103, float %107) #7
%109 = call nsz arcp float @llvm.fma.f32(float %58, float %102, float %108) #7
%110 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 88, i32 0) #7
%111 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 92, i32 0) #7
%112 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 96, i32 0) #7
%113 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 100, i32 0) #7
%114 = fmul nsz arcp float %61, %113
%115 = call nsz arcp float @llvm.fma.f32(float %60, float %112, float %114) #7
%116 = call nsz arcp float @llvm.fma.f32(float %59, float %111, float %115) #7
%117 = call nsz arcp float @llvm.fma.f32(float %58, float %110, float %116) #7
%118 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 104, i32 0) #7
%119 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 108, i32 0) #7
%120 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 112, i32 0) #7
%121 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 116, i32 0) #7
%122 = fmul nsz arcp float %61, %121
%123 = call nsz arcp float @llvm.fma.f32(float %60, float %120, float %122) #7
%124 = call nsz arcp float @llvm.fma.f32(float %59, float %119, float %123) #7
%125 = call nsz arcp float @llvm.fma.f32(float %58, float %118, float %124) #7
%126 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 120, i32 0) #7
%127 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 124, i32 0) #7
%128 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 128, i32 0) #7
%129 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 132, i32 0) #7
%130 = fmul nsz arcp float %61, %129
%131 = call nsz arcp float @llvm.fma.f32(float %60, float %128, float %130) #7
%132 = call nsz arcp float @llvm.fma.f32(float %59, float %127, float %131) #7
%133 = call nsz arcp float @llvm.fma.f32(float %58, float %126, float %132) #7
br label %endif1
endif1: ; preds = %if11500, %if1
%134 = phi float [ %109, %if1 ], [ 0.000000e+00, %if11500 ]
%135 = phi float [ %117, %if1 ], [ 0.000000e+00, %if11500 ]
%136 = phi float [ %125, %if1 ], [ 0.000000e+00, %if11500 ]
%137 = phi float [ %133, %if1 ], [ 0.000000e+00, %if11500 ]
%138 = phi float [ %77, %if1 ], [ 0.000000e+00, %if11500 ]
%139 = phi float [ %85, %if1 ], [ 0.000000e+00, %if11500 ]
%140 = phi float [ %93, %if1 ], [ 0.000000e+00, %if11500 ]
%141 = phi float [ %101, %if1 ], [ 0.000000e+00, %if11500 ]
%142 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 0, i32 0) #7
%143 = fmul nsz arcp float %59, %142
%144 = select nsz arcp i1 %65, half 0xH3C00, half 0xH0000
%145 = insertelement float undef, half %144, i64 0
%146 = bitcast <2 x half> %145 to float
%147 = select i1 %65, float 0.000000e+00, float 1.000000e+00
%148 = call nsz arcp <2 x half> @llvm.amdgcn.cvt.pkrtz(float %147, float undef) #7
%149 = shufflevector <2 x half> %148, <2 x half> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
%150 = bitcast <3 x half> %149 to float
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %58, float %143, float %60, float %61, i1 false, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float %138, float %139, float %140, float %141, i1 false, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float %134, float %135, float %136, float %137, i1 true, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %138, float %139, float %140, float %141, i1 false, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %134, float %135, float %136, float %137, i1 false, i1 false) #1
call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %146, float %150, float undef, float undef, i1 false, i1 false) #1
br label %endif6002
endif6002: ; preds = %endif1, %endif6001
ret void
}
; Function Attrs: nounwind
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32) #1
; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
; Function Attrs: inaccessiblememonly nounwind willreturn writeonly
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
; Function Attrs: nounwind readonly willreturn
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #4
; Function Attrs: nounwind readonly willreturn
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4
; Function Attrs: nounwind readnone speculatable willreturn
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #5
; Function Attrs: nounwind readnone willreturn
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #2
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fma.f32(float, float, float) #6
attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" "amdgpu-flat-work-group-size"="128,128" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+DumpCode" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readnone willreturn }
attributes #3 = { inaccessiblememonly nounwind willreturn writeonly }
attributes #4 = { nounwind readonly willreturn }
attributes #5 = { nounwind readnone speculatable willreturn }
attributes #6 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #7 = { nounwind readnone }
attributes #8 = { nounwind readonly }
!0 = !{i32 0, i32 32}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs