Issue 143386
Summary [AArch64] Expanding reductions for scalable vectors is undefined.
Labels backend:AArch64
Assignees
Reporter banach-space
    **To reproduce:**
```bash
bin/llc  bin/llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 bug.ll
LLVM ERROR: Expanding reductions for scalable vectors is undefined.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
(...)
```

**Input IR**
```llvm
; ModuleID = 'bug.ll'
source_filename = "LLVMDialectModule"

; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
define { ptr, ptr, i64 } @kernel_sum_reduce(ptr readnone captures(none) %0, ptr readonly captures(none) %1, i64 %2, i64 %3, i64 %4, ptr readnone captures(none) %5, ptr readnone captures(none) %6, i64 %7, i64 %8, i64 %9, ptr readnone captures(none) %10, ptr readonly captures(none) %11, i64 %12, i64 %13, i64 %14, ptr readnone captures(none) %15, ptr readnone captures(none) %16, i64 %17, i64 %18, i64 %19, ptr readnone captures(none) %20, ptr readonly captures(none) %21, i64 %22, i64 %23, i64 %24, { [2 x i64], [5 x i64] } %25, ptr %26, ptr %27, i64 %28) local_unnamed_addr #2 {
  %30 = load bfloat, ptr %27, align 2
  %31 = load i64, ptr %1, align 4
  %32 = getelementptr inbounds nuw i8, ptr %1, i64 8
  %33 = load i64, ptr %32, align 4
  %34 = icmp slt i64 %31, %33
  br i1 %34, label %.lr.ph5, label %._crit_edge6

.lr.ph5:                                          ; preds = %29
  %35 = tail call i64 @llvm.vscale.i64()
  %36 = shl i64 %35, 1
 %.phi.trans.insert = getelementptr inbounds nuw i64, ptr %11, i64 %31
 %.pre = load i64, ptr %.phi.trans.insert, align 4
  br label %37

37: ; preds = %.lr.ph5, %._crit_edge
 %38 = phi i64 [ %.pre, %.lr.ph5 ], [ %43, %._crit_edge ]
  %39 = phi bfloat [ %30, %.lr.ph5 ], [ %57, %._crit_edge ]
  %40 = phi i64 [ %31, %.lr.ph5 ], [ %41, %._crit_edge ]
  %41 = add nsw i64 %40, 1
  %42 = getelementptr inbounds nuw i64, ptr %11, i64 %41
  %43 = load i64, ptr %42, align 4
  %44 = insertelement <vscale x 2 x bfloat> zeroinitializer, bfloat %39, i64 0
 %45 = icmp slt i64 %38, %43
  br i1 %45, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %37, %.lr.ph
  %46 = phi <vscale x 2 x bfloat> [ %54, %.lr.ph ], [ %44, %37 ]
  %47 = phi i64 [ %55, %.lr.ph ], [ %38, %37 ]
  %48 = sub i64 %43, %47
 %49 = tail call i64 @llvm.smin.i64(i64 %36, i64 %48)
  %50 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 %49)
  %51 = getelementptr bfloat, ptr %21, i64 %47
  %52 = tail call <vscale x 2 x bfloat> @llvm.masked.load.nxv2bf16.p0(ptr %51, i32 2, <vscale x 2 x i1> %50, <vscale x 2 x bfloat> zeroinitializer)
  %53 = fadd <vscale x 2 x bfloat> %46, %52
  %54 = select <vscale x 2 x i1> %50, <vscale x 2 x bfloat> %53, <vscale x 2 x bfloat> %46
  %55 = add i64 %47, %36
  %56 = icmp slt i64 %55, %43
  br i1 %56, label %.lr.ph, label %._crit_edge

._crit_edge: ; preds = %.lr.ph, %37
  %.lcssa = phi <vscale x 2 x bfloat> [ %44, %37 ], [ %54, %.lr.ph ]
  %57 = tail call reassoc bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> %.lcssa)
  %58 = icmp slt i64 %41, %33
  br i1 %58, label %37, label %._crit_edge6

._crit_edge6:                                     ; preds = %._crit_edge, %29
  %.lcssa3 = phi bfloat [ %30, %29 ], [ %57, %._crit_edge ]
  %59 = insertvalue { ptr, ptr, i64 } poison, ptr %26, 0
 %60 = insertvalue { ptr, ptr, i64 } %59, ptr %27, 1
  %61 = insertvalue { ptr, ptr, i64 } %60, i64 %28, 2
  store bfloat %.lcssa3, ptr %27, align 2
 ret { ptr, ptr, i64 } %61
}


; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat, <vscale x 2 x bfloat>) #5

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64, i64) #4

; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: read)
declare <vscale x 2 x bfloat> @llvm.masked.load.nxv2bf16.p0(ptr captures(none), i32 immarg, <vscale x 2 x i1>, <vscale x 2 x bfloat>) #6

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i64 @llvm.smin.i64(i64, i64) #8

attributes #2 = { nofree norecurse nosync nounwind memory(argmem: readwrite) }
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to