Issue 168992
Summary [SystemZ] LLVM hangs trying to legalize types in a function using `@llvm.vector.reduce.and.v4i1(<4 x i1>)`
Labels backend:SystemZ, llvm:hang
Assignees
Reporter alexrp
    ```
❯ llc --version | head -n2
LLVM (http://llvm.org/):
  LLVM version 21.1.0
❯ timeout 60 llc test.ll; echo $status
124
```

```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "test"
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-unknown-unknown-unknown"

%Target.DynamicLinker = type { [255 x i8], i8 }

@0 = private unnamed_addr constant [1 x half] zeroinitializer, align 2
@__anon_628 = internal unnamed_addr constant [17 x i8] c"integer overflow\00", align 1
@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1
@debug.use_trap_panic = internal unnamed_addr constant i1 false, align 1
@builtin.os = internal unnamed_addr constant { { [176 x i8], i3, [7 x i8] }, i6, [7 x i8] } { { [176 x i8], i3, [7 x i8] } { [176 x i8] undef, i3 0, [7 x i8] undef }, i6 0, [7 x i8] undef }, align 8
@Target.DynamicLinker.none = internal unnamed_addr constant %Target.DynamicLinker { [255 x i8] undef, i8 0 }, align 1

; Function Attrs: noredzone nounwind uwtable
define dso_local void @repro() #0 {
  %1 = alloca i1, align 1
  %2 = alloca <4 x half>, align 8
  %3 = alloca [1 x half], align 2
  %4 = alloca i64, align 8
  %5 = alloca <4 x half>, align 8
  %6 = alloca [1 x half], align 2
  %7 = alloca i64, align 8
  %8 = alloca [1 x half], align 2
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %8, ptr align 2 @0, i64 2, i1 false)
  store i64 0, ptr %7, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %6, ptr align 2 %8, i64 2, i1 false)
  br label %10

9:                                                ; preds = %22
  ret void

10:                                               ; preds = %13, %0
  %11 = load i64, ptr %7, align 8
  %12 = icmp ult i64 %11, 1
  br i1 %12, label %15, label %22

13:                                               ; preds = %23
  %14 = add nuw i64 %11, 1
  store i64 %14, ptr %7, align 8
  br label %10

15:                                               ; preds = %10
  %16 = getelementptr inbounds [1 x half], ptr %6, i64 0, i64 %11
  %17 = load half, ptr %16, align 2
  %18 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 0
  store half %17, ptr %18, align 2
  %19 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 1
  store half %17, ptr %19, align 2
  %20 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 2
  store half %17, ptr %20, align 2
  %21 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 3
  store half %17, ptr %21, align 2
  store i64 0, ptr %4, align 8
  call void @llvm.memcpy.p0.p0.i64(ptr align 2 %3, ptr align 2 %8, i64 2, i1 false)
  br label %24

22:                                               ; preds = %10
  br label %9

23:                                               ; preds = %33
  br label %13

24:                                               ; preds = %27, %15
  %25 = load i64, ptr %4, align 8
  %26 = icmp ult i64 %25, 1
  br i1 %26, label %29, label %33

27:                                               ; preds = %67
  %28 = add nuw i64 %25, 1
  store i64 %28, ptr %4, align 8
  br label %24

29:                                               ; preds = %24
  %30 = getelementptr inbounds [1 x half], ptr %3, i64 0, i64 %25
  %31 = load half, ptr %30, align 2
  %32 = icmp ne i64 %25, 5
  br i1 %32, label %37, label %39

33:                                               ; preds = %24
  br label %23

34:                                               ; preds = %39, %37
  %35 = phi i1 [ %38, %37 ], [ false, %39 ]
  %36 = icmp ult i64 %25, 5
  br i1 %36, label %43, label %44

37:                                               ; preds = %29
  %38 = icmp ne i64 %11, 5
  br label %34

39:                                               ; preds = %29
  br label %34

40:                                               ; preds = %48, %43
  %41 = phi i64 [ %25, %43 ], [ %49, %48 ]
  %42 = icmp ult i64 %11, 5
  br i1 %42, label %60, label %61

43:                                               ; preds = %34
  br label %40

44:                                               ; preds = %34
  %45 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %25, i64 2)
  %46 = extractvalue { i64, i1 } %45, 1
  br i1 %46, label %47, label %48

47:                                               ; preds = %44
  call void @llvm.assume(i1 true) [ "cold"() ]
  call fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"()
  unreachable

48:                                               ; preds = %44
  %49 = extractvalue { i64, i1 } %45, 0
  br label %40

50:                                               ; preds = %65, %60
  %51 = phi i64 [ %11, %60 ], [ %66, %65 ]
  %52 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 0
  store half %31, ptr %52, align 2
  %53 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 1
  store half %31, ptr %53, align 2
  %54 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 2
  store half %31, ptr %54, align 2
  %55 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 3
  store half %31, ptr %55, align 2
  %56 = load <4 x half>, ptr %2, align 8
  %57 = load <4 x half>, ptr %5, align 8
  %58 = fcmp oeq <4 x half> %56, %57
  %59 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %58)
  br i1 %35, label %70, label %72

60:                                               ; preds = %40
  br label %50

61:                                               ; preds = %40
  %62 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %11, i64 2)
  %63 = extractvalue { i64, i1 } %62, 1
  br i1 %63, label %64, label %65

64:                                               ; preds = %61
  call void @llvm.assume(i1 true) [ "cold"() ]
  call fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"()
  unreachable

65:                                               ; preds = %61
  %66 = extractvalue { i64, i1 } %62, 0
  br label %50

67:                                               ; preds = %72, %70
  %68 = phi i1 [ %71, %70 ], [ false, %72 ]
  %69 = icmp eq i1 %59, %68
  store i1 %69, ptr %1, align 1
  br label %27

70:                                               ; preds = %50
  %71 = icmp eq i64 %41, %51
  br label %67

72:                                               ; preds = %50
  br label %67
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #2

; Function Attrs: cold noredzone noreturn nounwind uwtable
define internal fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"() unnamed_addr #3 {
  %1 = alloca { i64, i8, [7 x i8] }, align 8
  %2 = call ptr @llvm.returnaddress(i32 0)
  %3 = ptrtoint ptr %2 to i64
  %4 = getelementptr inbounds { i64, i8, [7 x i8] }, ptr %1, i32 0, i32 0
  store i64 %3, ptr %4, align 8
  %5 = getelementptr inbounds { i64, i8, [7 x i8] }, ptr %1, i32 0, i32 1
  store i8 1, ptr %5, align 1
  %6 = extractvalue { ptr, i64 } { ptr @__anon_628, i64 16 }, 0
  %7 = extractvalue { ptr, i64 } { ptr @__anon_628, i64 16 }, 1
  call fastcc void @debug.defaultPanic(ptr nonnull readonly align 1 %6, i64 %7, ptr nonnull readonly align 8 %1)
  unreachable
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #4

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) #2

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #5

; Function Attrs: cold noredzone noreturn nounwind uwtable
define internal fastcc void @debug.defaultPanic(ptr nonnull readonly align 1 %0, i64 %1, ptr nonnull readonly align 8 %2) unnamed_addr #3 {
  %4 = alloca { ptr, i64 }, align 8
  %5 = insertvalue { ptr, i64 } poison, ptr %0, 0
  %6 = insertvalue { ptr, i64 } %5, i64 %1, 1
  store { ptr, i64 } %6, ptr %4, align 8
  call void @llvm.trap()
  unreachable
}

; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
declare void @llvm.trap() #6

attributes #0 = { noredzone nounwind uwtable "frame-pointer"="all" "target-cpu"="z15" "target-features"="+backchain,+deflate-conversion,+dfp-packed-conversion,+dfp-zoned-conversion,+distinct-ops,+enhanced-dat-2,+enhanced-sort,+execution-hint,+fast-serialization,+fp-extension,+guarded-storage,+high-word,+insert-reference-bits-multiple,+interlocked-access1,+load-and-trap,+load-and-zero-rightmost-byte,+load-store-on-cond,+load-store-on-cond-2,+message-security-assist-extension3,+message-security-assist-extension4,+message-security-assist-extension5,+message-security-assist-extension7,+message-security-assist-extension8,+message-security-assist-extension9,+miscellaneous-extensions,+miscellaneous-extensions-2,+miscellaneous-extensions-3,+population-count,+processor-assist,+reset-reference-bits-multiple,+test-pending-external-interruption,+transactional-execution,+vector,+vector-enhancements-1,+vector-enhancements-2,+vector-packed-decimal,+vector-packed-decimal-enhancement,-bear-enhancement,-concurrent-functions,-message-security-assist-extension12,-miscellaneous-extensions-4,-nnp-assist,-processor-activity-instrumentation,-reset-dat-protection,-soft-float,-unaligned-symbols,-vector-enhancements-3,-vector-packed-decimal-enhancement-2,-vector-packed-decimal-enhancement-3" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #3 = { cold noredzone noreturn nounwind uwtable "frame-pointer"="all" "target-cpu"="z15" "target-features"="+backchain,+deflate-conversion,+dfp-packed-conversion,+dfp-zoned-conversion,+distinct-ops,+enhanced-dat-2,+enhanced-sort,+execution-hint,+fast-serialization,+fp-extension,+guarded-storage,+high-word,+insert-reference-bits-multiple,+interlocked-access1,+load-and-trap,+load-and-zero-rightmost-byte,+load-store-on-cond,+load-store-on-cond-2,+message-security-assist-extension3,+message-security-assist-extension4,+message-security-assist-extension5,+message-security-assist-extension7,+message-security-assist-extension8,+message-security-assist-extension9,+miscellaneous-extensions,+miscellaneous-extensions-2,+miscellaneous-extensions-3,+population-count,+processor-assist,+reset-reference-bits-multiple,+test-pending-external-interruption,+transactional-execution,+vector,+vector-enhancements-1,+vector-enhancements-2,+vector-packed-decimal,+vector-packed-decimal-enhancement,-bear-enhancement,-concurrent-functions,-message-security-assist-extension12,-miscellaneous-extensions-4,-nnp-assist,-processor-activity-instrumentation,-reset-dat-protection,-soft-float,-unaligned-symbols,-vector-enhancements-3,-vector-packed-decimal-enhancement-2,-vector-packed-decimal-enhancement-3" }
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { cold noreturn nounwind memory(inaccessiblemem: write) }

!llvm.module.flags = !{}
```

There is no hang when removing `%59` and replacing its one use with `i1 1`.

`llc test.ll --debug` shows:

```
... snip ...
Legalizing node: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Analyzing result type: i16
Promote integer result: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Creating new node: t122: i32 = extract_vector_elt t105, Constant:i32<0>

Legalizing node: t122: i32 = extract_vector_elt t105, Constant:i32<0>
Analyzing result type: i32
Legal result type
Analyzing operand: t105: v2i16 = setcc t103, t104, setoeq:ch
Widen node operand 0: t122: i32 = extract_vector_elt t105, Constant:i32<0>
Creating new node: t123: i32 = extract_vector_elt t85, Constant:i32<0>

Legalizing node: t123: i32 = extract_vector_elt t85, Constant:i32<0>
Analyzing result type: i32
Legal result type
Analyzing operand: t85: v1i16 = setcc t52, t82, setoeq:ch
Widen node operand 0: t123: i32 = extract_vector_elt t85, Constant:i32<0>
Creating new node: t124: i32 = any_extend t109

Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109

Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109

Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109

Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109
... snip ...
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to