Issue 56042
Summary LLVM ERROR: Cannot select: 0x7fd5e40700e8: f32 = bitcast 0x7fd5e406fb38
Labels new issue
Assignees
Reporter gerddie
    Compiling a vertex shader with fp16 support results in a crash with the error message
```
LLVM ERROR: Cannot select: 0x7fd5e40700e8: f32 = bitcast 0x7fd5e406fb38
  0x7fd5e406fb38: f32 = splat_vector 0x7fd5e406d398
    0x7fd5e406d398: f16 = select nsz arcp 0x7fd5e4076030, ConstantFP:f16<APFloat(15360)>, ConstantFP:f16<APFloat(0)>
      0x7fd5e4076030: i1 = setcc nsz arcp 0x7fd5e4133920, ConstantFP:f16<APFloat(15360)>, setoge:ch
        0x7fd5e4133920: f16 = bitcast 0x7fd5e4133578
          0x7fd5e4133578: i16 = truncate 0x7fd5e40769f0
            0x7fd5e40769f0: i32 = bitcast 0x7fd5e406fc08
              0x7fd5e406fc08: v2f16 = CVT_PKRTZ_F16_F32 0x7fd5e4076c60, undef:f32
                0x7fd5e4076c60: f32,ch = CopyFromReg 0x560db4761a28, Register:f32 %7
                  0x7fd5e4076d30: f32 = Register %7
                0x7fd5e4076920: f32 = undef
        0x7fd5e406da80: f16 = ConstantFP<APFloat(15360)>
      0x7fd5e406da80: f16 = ConstantFP<APFloat(15360)>
      0x7fd5e406f790: f16 = ConstantFP<APFloat(0)>
In function: main
```
LLVM version is 14.0.4, mesa was at 51bdac48465186

The shader leading to the crash follows: 

```
radeonsi: Compiling shader 7
Vertex Shader as ESGS LLVM IR:

; ModuleID = 'mesa-shader'
source_filename = "mesa-shader"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
target triple = "amdgcn--"

@esgs_ring = external addrspace(3) global [0 x i32], align 65536

define amdgpu_gs void @main(float addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %0, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %1, i32 inreg %2, i32 inreg %3, i32 inreg %4, i32 inreg %5, i32 inreg %6, i32 inreg %7, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %8, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %9, float addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %10, <8 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %11, i32 inreg %12, i32 inreg %13, i32 inreg %14, i32 inreg %15, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %16, i32 inreg %17, <4 x i32> addrspace(6)* inreg noalias align 4 dereferenceable(18446744073709551615) %18, i32 inreg %19, <4 x i32> inreg %20, <4 x i32> inreg %21, i32 %22, i32 %23, i32 %24, i32 %25, i32 %26, i32 %27, i32 %28, i32 %29, i32 %30, i32 %31, i32 %32) #0 {
main_body:
  %33 = and i32 %3, 251658240
  %34 = icmp eq i32 %33, 0
  br i1 %34, label %if5020, label %endif5020

if5020:                                           ; preds = %main_body
  %35 = lshr i32 %2, 12
  %36 = and i32 %35, 511
  %37 = lshr i32 %2, 10
  %38 = and i32 %37, 2093056
  %39 = or i32 %38, %36
  call void @llvm.amdgcn.s.sendmsg(i32 9, i32 %39) #1
  br label %endif5020

endif5020:                                        ; preds = %if5020, %main_body
  %40 = lshr i32 %3, 8
  %41 = and i32 %40, 255
  %42 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #7, !range !0
  %43 = icmp ult i32 %42, %41
  br i1 %43, label %if6001, label %endif6001

if6001:                                           ; preds = %endif5020
  %44 = and i32 %25, 1792
  %45 = mul nuw nsw i32 %44, 525314
  %46 = and i32 %45, 537395712
  %47 = and i32 %22, 65535
  %48 = or i32 %46, %47
  %49 = lshr i32 %22, 6
  %50 = and i32 %49, 67107840
  %51 = or i32 %48, %50
  %52 = shl i32 %23, 20
  %53 = or i32 %51, %52
  %54 = bitcast i32 %53 to float
  call void @llvm.amdgcn.exp.f32(i32 20, i32 1, float %54, float undef, float undef, float undef, i1 true, i1 false) #1
  br label %endif6001

endif6001:                                        ; preds = %if6001, %endif5020
  %55 = and i32 %3, 255
  %56 = icmp ult i32 %42, %55
  br i1 %56, label %if11500, label %endif6002

if11500:                                          ; preds = %endif6001
  %57 = call nsz arcp <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %20, i32 %31, i32 0, i32 0, i32 0) #8
  %58 = extractelement <4 x float> %57, i64 0
  %59 = extractelement <4 x float> %57, i64 1
  %60 = extractelement <4 x float> %57, i64 2
  %61 = extractelement <4 x float> %57, i64 3
  %62 = call nsz arcp float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %21, i32 %32, i32 0, i32 0, i32 0) #8
  %63 = call nsz arcp <2 x half> @llvm.amdgcn.cvt.pkrtz(float %62, float undef) #7
  %64 = extractelement <2 x half> %63, i64 0
  %65 = fcmp nsz arcp oge half %64, 0xH3C00
  %66 = ptrtoint float addrspace(6)* %10 to i32
  %67 = insertelement <4 x i32> <i32 poison, i32 32768, i32 144, i32 822177708>, i32 %66, i64 0
  %68 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 4, i32 0) #7
  %69 = bitcast float %68 to i32
  %.not = icmp eq i32 %69, 0
  br i1 %.not, label %endif1, label %if1

if1:                                              ; preds = %if11500
  %70 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 8, i32 0) #7
  %71 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 12, i32 0) #7
  %72 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 16, i32 0) #7
  %73 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 20, i32 0) #7
  %74 = fmul nsz arcp float %61, %73
  %75 = call nsz arcp float @llvm.fma.f32(float %60, float %72, float %74) #7
  %76 = call nsz arcp float @llvm.fma.f32(float %59, float %71, float %75) #7
  %77 = call nsz arcp float @llvm.fma.f32(float %58, float %70, float %76) #7
  %78 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 24, i32 0) #7
  %79 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 28, i32 0) #7
  %80 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 32, i32 0) #7
  %81 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 36, i32 0) #7
  %82 = fmul nsz arcp float %61, %81
  %83 = call nsz arcp float @llvm.fma.f32(float %60, float %80, float %82) #7
  %84 = call nsz arcp float @llvm.fma.f32(float %59, float %79, float %83) #7
  %85 = call nsz arcp float @llvm.fma.f32(float %58, float %78, float %84) #7
  %86 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 40, i32 0) #7
  %87 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 44, i32 0) #7
  %88 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 48, i32 0) #7
  %89 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 52, i32 0) #7
  %90 = fmul nsz arcp float %61, %89
  %91 = call nsz arcp float @llvm.fma.f32(float %60, float %88, float %90) #7
  %92 = call nsz arcp float @llvm.fma.f32(float %59, float %87, float %91) #7
  %93 = call nsz arcp float @llvm.fma.f32(float %58, float %86, float %92) #7
  %94 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 56, i32 0) #7
  %95 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 60, i32 0) #7
  %96 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 64, i32 0) #7
  %97 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 68, i32 0) #7
  %98 = fmul nsz arcp float %61, %97
  %99 = call nsz arcp float @llvm.fma.f32(float %60, float %96, float %98) #7
  %100 = call nsz arcp float @llvm.fma.f32(float %59, float %95, float %99) #7
  %101 = call nsz arcp float @llvm.fma.f32(float %58, float %94, float %100) #7
  %102 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 72, i32 0) #7
  %103 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 76, i32 0) #7
  %104 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 80, i32 0) #7
  %105 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 84, i32 0) #7
  %106 = fmul nsz arcp float %61, %105
  %107 = call nsz arcp float @llvm.fma.f32(float %60, float %104, float %106) #7
  %108 = call nsz arcp float @llvm.fma.f32(float %59, float %103, float %107) #7
  %109 = call nsz arcp float @llvm.fma.f32(float %58, float %102, float %108) #7
  %110 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 88, i32 0) #7
  %111 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 92, i32 0) #7
  %112 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 96, i32 0) #7
  %113 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 100, i32 0) #7
  %114 = fmul nsz arcp float %61, %113
  %115 = call nsz arcp float @llvm.fma.f32(float %60, float %112, float %114) #7
  %116 = call nsz arcp float @llvm.fma.f32(float %59, float %111, float %115) #7
  %117 = call nsz arcp float @llvm.fma.f32(float %58, float %110, float %116) #7
  %118 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 104, i32 0) #7
  %119 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 108, i32 0) #7
  %120 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 112, i32 0) #7
  %121 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 116, i32 0) #7
  %122 = fmul nsz arcp float %61, %121
  %123 = call nsz arcp float @llvm.fma.f32(float %60, float %120, float %122) #7
  %124 = call nsz arcp float @llvm.fma.f32(float %59, float %119, float %123) #7
  %125 = call nsz arcp float @llvm.fma.f32(float %58, float %118, float %124) #7
  %126 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 120, i32 0) #7
  %127 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 124, i32 0) #7
  %128 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 128, i32 0) #7
  %129 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 132, i32 0) #7
  %130 = fmul nsz arcp float %61, %129
  %131 = call nsz arcp float @llvm.fma.f32(float %60, float %128, float %130) #7
  %132 = call nsz arcp float @llvm.fma.f32(float %59, float %127, float %131) #7
  %133 = call nsz arcp float @llvm.fma.f32(float %58, float %126, float %132) #7
  br label %endif1

endif1:                                           ; preds = %if11500, %if1
  %134 = phi float [ %109, %if1 ], [ 0.000000e+00, %if11500 ]
  %135 = phi float [ %117, %if1 ], [ 0.000000e+00, %if11500 ]
  %136 = phi float [ %125, %if1 ], [ 0.000000e+00, %if11500 ]
  %137 = phi float [ %133, %if1 ], [ 0.000000e+00, %if11500 ]
  %138 = phi float [ %77, %if1 ], [ 0.000000e+00, %if11500 ]
  %139 = phi float [ %85, %if1 ], [ 0.000000e+00, %if11500 ]
  %140 = phi float [ %93, %if1 ], [ 0.000000e+00, %if11500 ]
  %141 = phi float [ %101, %if1 ], [ 0.000000e+00, %if11500 ]
  %142 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %67, i32 0, i32 0) #7
  %143 = fmul nsz arcp float %59, %142
  %144 = select nsz arcp i1 %65, half 0xH3C00, half 0xH0000
  %145 = insertelement float undef, half %144, i64 0
  %146 = bitcast <2 x half> %145 to float
  %147 = select i1 %65, float 0.000000e+00, float 1.000000e+00
  %148 = call nsz arcp <2 x half> @llvm.amdgcn.cvt.pkrtz(float %147, float undef) #7
  %149 = shufflevector <2 x half> %148, <2 x half> undef, <3 x i32> <i32 0, i32 undef, i32 undef>
  %150 = bitcast <3 x half> %149 to float
  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %58, float %143, float %60, float %61, i1 false, i1 false) #1
  call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float %138, float %139, float %140, float %141, i1 false, i1 false) #1
  call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float %134, float %135, float %136, float %137, i1 true, i1 false) #1
  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %138, float %139, float %140, float %141, i1 false, i1 false) #1
  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %134, float %135, float %136, float %137, i1 false, i1 false) #1
  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float %146, float %150, float undef, float undef, i1 false, i1 false) #1
  br label %endif6002

endif6002:                                        ; preds = %endif1, %endif6001
  ret void
}

; Function Attrs: nounwind
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32) #1

; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2

; Function Attrs: inaccessiblememonly nounwind willreturn writeonly
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3

; Function Attrs: nounwind readonly willreturn
declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #4

; Function Attrs: nounwind readonly willreturn
declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4

; Function Attrs: nounwind readnone speculatable willreturn
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #5

; Function Attrs: nounwind readnone willreturn
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #2

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare float @llvm.fma.f32(float, float, float) #6

attributes #0 = { "amdgpu-32bit-address-high-bits"="0xffff8000" "amdgpu-flat-work-group-size"="128,128" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+DumpCode" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readnone willreturn }
attributes #3 = { inaccessiblememonly nounwind willreturn writeonly }
attributes #4 = { nounwind readonly willreturn }
attributes #5 = { nounwind readnone speculatable willreturn }
attributes #6 = { nofree nosync nounwind readnone speculatable willreturn }
attributes #7 = { nounwind readnone }
attributes #8 = { nounwind readonly }

!0 = !{i32 0, i32 32}
```

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to