Issue |
143386
|
Summary |
[AArch64] Expanding reductions for scalable vectors is undefined.
|
Labels |
backend:AArch64
|
Assignees |
|
Reporter |
banach-space
|
**To reproduce:**
```bash
bin/llc bin/llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 bug.ll
LLVM ERROR: Expanding reductions for scalable vectors is undefined.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
(...)
```
**Input IR**
```llvm
; ModuleID = 'bug.ll'
source_filename = "LLVMDialectModule"
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
define { ptr, ptr, i64 } @kernel_sum_reduce(ptr readnone captures(none) %0, ptr readonly captures(none) %1, i64 %2, i64 %3, i64 %4, ptr readnone captures(none) %5, ptr readnone captures(none) %6, i64 %7, i64 %8, i64 %9, ptr readnone captures(none) %10, ptr readonly captures(none) %11, i64 %12, i64 %13, i64 %14, ptr readnone captures(none) %15, ptr readnone captures(none) %16, i64 %17, i64 %18, i64 %19, ptr readnone captures(none) %20, ptr readonly captures(none) %21, i64 %22, i64 %23, i64 %24, { [2 x i64], [5 x i64] } %25, ptr %26, ptr %27, i64 %28) local_unnamed_addr #2 {
%30 = load bfloat, ptr %27, align 2
%31 = load i64, ptr %1, align 4
%32 = getelementptr inbounds nuw i8, ptr %1, i64 8
%33 = load i64, ptr %32, align 4
%34 = icmp slt i64 %31, %33
br i1 %34, label %.lr.ph5, label %._crit_edge6
.lr.ph5: ; preds = %29
%35 = tail call i64 @llvm.vscale.i64()
%36 = shl i64 %35, 1
%.phi.trans.insert = getelementptr inbounds nuw i64, ptr %11, i64 %31
%.pre = load i64, ptr %.phi.trans.insert, align 4
br label %37
37: ; preds = %.lr.ph5, %._crit_edge
%38 = phi i64 [ %.pre, %.lr.ph5 ], [ %43, %._crit_edge ]
%39 = phi bfloat [ %30, %.lr.ph5 ], [ %57, %._crit_edge ]
%40 = phi i64 [ %31, %.lr.ph5 ], [ %41, %._crit_edge ]
%41 = add nsw i64 %40, 1
%42 = getelementptr inbounds nuw i64, ptr %11, i64 %41
%43 = load i64, ptr %42, align 4
%44 = insertelement <vscale x 2 x bfloat> zeroinitializer, bfloat %39, i64 0
%45 = icmp slt i64 %38, %43
br i1 %45, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %37, %.lr.ph
%46 = phi <vscale x 2 x bfloat> [ %54, %.lr.ph ], [ %44, %37 ]
%47 = phi i64 [ %55, %.lr.ph ], [ %38, %37 ]
%48 = sub i64 %43, %47
%49 = tail call i64 @llvm.smin.i64(i64 %36, i64 %48)
%50 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 %49)
%51 = getelementptr bfloat, ptr %21, i64 %47
%52 = tail call <vscale x 2 x bfloat> @llvm.masked.load.nxv2bf16.p0(ptr %51, i32 2, <vscale x 2 x i1> %50, <vscale x 2 x bfloat> zeroinitializer)
%53 = fadd <vscale x 2 x bfloat> %46, %52
%54 = select <vscale x 2 x i1> %50, <vscale x 2 x bfloat> %53, <vscale x 2 x bfloat> %46
%55 = add i64 %47, %36
%56 = icmp slt i64 %55, %43
br i1 %56, label %.lr.ph, label %._crit_edge
._crit_edge: ; preds = %.lr.ph, %37
%.lcssa = phi <vscale x 2 x bfloat> [ %44, %37 ], [ %54, %.lr.ph ]
%57 = tail call reassoc bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> %.lcssa)
%58 = icmp slt i64 %41, %33
br i1 %58, label %37, label %._crit_edge6
._crit_edge6: ; preds = %._crit_edge, %29
%.lcssa3 = phi bfloat [ %30, %29 ], [ %57, %._crit_edge ]
%59 = insertvalue { ptr, ptr, i64 } poison, ptr %26, 0
%60 = insertvalue { ptr, ptr, i64 } %59, ptr %27, 1
%61 = insertvalue { ptr, ptr, i64 } %60, i64 %28, 2
store bfloat %.lcssa3, ptr %27, align 2
ret { ptr, ptr, i64 } %61
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat, <vscale x 2 x bfloat>) #5
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64, i64) #4
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: read)
declare <vscale x 2 x bfloat> @llvm.masked.load.nxv2bf16.p0(ptr captures(none), i32 immarg, <vscale x 2 x i1>, <vscale x 2 x bfloat>) #6
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.smin.i64(i64, i64) #8
attributes #2 = { nofree norecurse nosync nounwind memory(argmem: readwrite) }
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs