| Issue |
168992
|
| Summary |
[SystemZ] LLVM hangs trying to legalize types in a function using `@llvm.vector.reduce.and.v4i1(<4 x i1>)`
|
| Labels |
backend:SystemZ,
llvm:hang
|
| Assignees |
|
| Reporter |
alexrp
|
```
❯ llc --version | head -n2
LLVM (http://llvm.org/):
LLVM version 21.1.0
❯ timeout 60 llc test.ll; echo $status
124
```
```llvm
; ModuleID = 'BitcodeBuffer'
source_filename = "test"
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "s390x-unknown-unknown-unknown"
%Target.DynamicLinker = type { [255 x i8], i8 }
@0 = private unnamed_addr constant [1 x half] zeroinitializer, align 2
@__anon_628 = internal unnamed_addr constant [17 x i8] c"integer overflow\00", align 1
@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1
@debug.use_trap_panic = internal unnamed_addr constant i1 false, align 1
@builtin.os = internal unnamed_addr constant { { [176 x i8], i3, [7 x i8] }, i6, [7 x i8] } { { [176 x i8], i3, [7 x i8] } { [176 x i8] undef, i3 0, [7 x i8] undef }, i6 0, [7 x i8] undef }, align 8
@Target.DynamicLinker.none = internal unnamed_addr constant %Target.DynamicLinker { [255 x i8] undef, i8 0 }, align 1
; Function Attrs: noredzone nounwind uwtable
define dso_local void @repro() #0 {
%1 = alloca i1, align 1
%2 = alloca <4 x half>, align 8
%3 = alloca [1 x half], align 2
%4 = alloca i64, align 8
%5 = alloca <4 x half>, align 8
%6 = alloca [1 x half], align 2
%7 = alloca i64, align 8
%8 = alloca [1 x half], align 2
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %8, ptr align 2 @0, i64 2, i1 false)
store i64 0, ptr %7, align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %6, ptr align 2 %8, i64 2, i1 false)
br label %10
9: ; preds = %22
ret void
10: ; preds = %13, %0
%11 = load i64, ptr %7, align 8
%12 = icmp ult i64 %11, 1
br i1 %12, label %15, label %22
13: ; preds = %23
%14 = add nuw i64 %11, 1
store i64 %14, ptr %7, align 8
br label %10
15: ; preds = %10
%16 = getelementptr inbounds [1 x half], ptr %6, i64 0, i64 %11
%17 = load half, ptr %16, align 2
%18 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 0
store half %17, ptr %18, align 2
%19 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 1
store half %17, ptr %19, align 2
%20 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 2
store half %17, ptr %20, align 2
%21 = getelementptr inbounds <4 x half>, ptr %5, i64 0, i64 3
store half %17, ptr %21, align 2
store i64 0, ptr %4, align 8
call void @llvm.memcpy.p0.p0.i64(ptr align 2 %3, ptr align 2 %8, i64 2, i1 false)
br label %24
22: ; preds = %10
br label %9
23: ; preds = %33
br label %13
24: ; preds = %27, %15
%25 = load i64, ptr %4, align 8
%26 = icmp ult i64 %25, 1
br i1 %26, label %29, label %33
27: ; preds = %67
%28 = add nuw i64 %25, 1
store i64 %28, ptr %4, align 8
br label %24
29: ; preds = %24
%30 = getelementptr inbounds [1 x half], ptr %3, i64 0, i64 %25
%31 = load half, ptr %30, align 2
%32 = icmp ne i64 %25, 5
br i1 %32, label %37, label %39
33: ; preds = %24
br label %23
34: ; preds = %39, %37
%35 = phi i1 [ %38, %37 ], [ false, %39 ]
%36 = icmp ult i64 %25, 5
br i1 %36, label %43, label %44
37: ; preds = %29
%38 = icmp ne i64 %11, 5
br label %34
39: ; preds = %29
br label %34
40: ; preds = %48, %43
%41 = phi i64 [ %25, %43 ], [ %49, %48 ]
%42 = icmp ult i64 %11, 5
br i1 %42, label %60, label %61
43: ; preds = %34
br label %40
44: ; preds = %34
%45 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %25, i64 2)
%46 = extractvalue { i64, i1 } %45, 1
br i1 %46, label %47, label %48
47: ; preds = %44
call void @llvm.assume(i1 true) [ "cold"() ]
call fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"()
unreachable
48: ; preds = %44
%49 = extractvalue { i64, i1 } %45, 0
br label %40
50: ; preds = %65, %60
%51 = phi i64 [ %11, %60 ], [ %66, %65 ]
%52 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 0
store half %31, ptr %52, align 2
%53 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 1
store half %31, ptr %53, align 2
%54 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 2
store half %31, ptr %54, align 2
%55 = getelementptr inbounds <4 x half>, ptr %2, i64 0, i64 3
store half %31, ptr %55, align 2
%56 = load <4 x half>, ptr %2, align 8
%57 = load <4 x half>, ptr %5, align 8
%58 = fcmp oeq <4 x half> %56, %57
%59 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %58)
br i1 %35, label %70, label %72
60: ; preds = %40
br label %50
61: ; preds = %40
%62 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %11, i64 2)
%63 = extractvalue { i64, i1 } %62, 1
br i1 %63, label %64, label %65
64: ; preds = %61
call void @llvm.assume(i1 true) [ "cold"() ]
call fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"()
unreachable
65: ; preds = %61
%66 = extractvalue { i64, i1 } %62, 0
br label %50
67: ; preds = %72, %70
%68 = phi i1 [ %71, %70 ], [ false, %72 ]
%69 = icmp eq i1 %59, %68
store i1 %69, ptr %1, align 1
br label %27
70: ; preds = %50
%71 = icmp eq i64 %41, %51
br label %67
72: ; preds = %50
br label %67
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #2
; Function Attrs: cold noredzone noreturn nounwind uwtable
define internal fastcc void @"debug.FullPanic((function 'defaultPanic')).integerOverflow"() unnamed_addr #3 {
%1 = alloca { i64, i8, [7 x i8] }, align 8
%2 = call ptr @llvm.returnaddress(i32 0)
%3 = ptrtoint ptr %2 to i64
%4 = getelementptr inbounds { i64, i8, [7 x i8] }, ptr %1, i32 0, i32 0
store i64 %3, ptr %4, align 8
%5 = getelementptr inbounds { i64, i8, [7 x i8] }, ptr %1, i32 0, i32 1
store i8 1, ptr %5, align 1
%6 = extractvalue { ptr, i64 } { ptr @__anon_628, i64 16 }, 0
%7 = extractvalue { ptr, i64 } { ptr @__anon_628, i64 16 }, 1
call fastcc void @debug.defaultPanic(ptr nonnull readonly align 1 %6, i64 %7, ptr nonnull readonly align 8 %1)
unreachable
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #4
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) #2
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
declare ptr @llvm.returnaddress(i32 immarg) #5
; Function Attrs: cold noredzone noreturn nounwind uwtable
define internal fastcc void @debug.defaultPanic(ptr nonnull readonly align 1 %0, i64 %1, ptr nonnull readonly align 8 %2) unnamed_addr #3 {
%4 = alloca { ptr, i64 }, align 8
%5 = insertvalue { ptr, i64 } poison, ptr %0, 0
%6 = insertvalue { ptr, i64 } %5, i64 %1, 1
store { ptr, i64 } %6, ptr %4, align 8
call void @llvm.trap()
unreachable
}
; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
declare void @llvm.trap() #6
attributes #0 = { noredzone nounwind uwtable "frame-pointer"="all" "target-cpu"="z15" "target-features"="+backchain,+deflate-conversion,+dfp-packed-conversion,+dfp-zoned-conversion,+distinct-ops,+enhanced-dat-2,+enhanced-sort,+execution-hint,+fast-serialization,+fp-extension,+guarded-storage,+high-word,+insert-reference-bits-multiple,+interlocked-access1,+load-and-trap,+load-and-zero-rightmost-byte,+load-store-on-cond,+load-store-on-cond-2,+message-security-assist-extension3,+message-security-assist-extension4,+message-security-assist-extension5,+message-security-assist-extension7,+message-security-assist-extension8,+message-security-assist-extension9,+miscellaneous-extensions,+miscellaneous-extensions-2,+miscellaneous-extensions-3,+population-count,+processor-assist,+reset-reference-bits-multiple,+test-pending-external-interruption,+transactional-execution,+vector,+vector-enhancements-1,+vector-enhancements-2,+vector-packed-decimal,+vector-packed-decimal-enhancement,-bear-enhancement,-concurrent-functions,-message-security-assist-extension12,-miscellaneous-extensions-4,-nnp-assist,-processor-activity-instrumentation,-reset-dat-protection,-soft-float,-unaligned-symbols,-vector-enhancements-3,-vector-packed-decimal-enhancement-2,-vector-packed-decimal-enhancement-3" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #3 = { cold noredzone noreturn nounwind uwtable "frame-pointer"="all" "target-cpu"="z15" "target-features"="+backchain,+deflate-conversion,+dfp-packed-conversion,+dfp-zoned-conversion,+distinct-ops,+enhanced-dat-2,+enhanced-sort,+execution-hint,+fast-serialization,+fp-extension,+guarded-storage,+high-word,+insert-reference-bits-multiple,+interlocked-access1,+load-and-trap,+load-and-zero-rightmost-byte,+load-store-on-cond,+load-store-on-cond-2,+message-security-assist-extension3,+message-security-assist-extension4,+message-security-assist-extension5,+message-security-assist-extension7,+message-security-assist-extension8,+message-security-assist-extension9,+miscellaneous-extensions,+miscellaneous-extensions-2,+miscellaneous-extensions-3,+population-count,+processor-assist,+reset-reference-bits-multiple,+test-pending-external-interruption,+transactional-execution,+vector,+vector-enhancements-1,+vector-enhancements-2,+vector-packed-decimal,+vector-packed-decimal-enhancement,-bear-enhancement,-concurrent-functions,-message-security-assist-extension12,-miscellaneous-extensions-4,-nnp-assist,-processor-activity-instrumentation,-reset-dat-protection,-soft-float,-unaligned-symbols,-vector-enhancements-3,-vector-packed-decimal-enhancement-2,-vector-packed-decimal-enhancement-3" }
attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
attributes #5 = { nocallback nofree nosync nounwind willreturn memory(none) }
attributes #6 = { cold noreturn nounwind memory(inaccessiblemem: write) }
!llvm.module.flags = !{}
```
There is no hang when removing `%59` and replacing its one use with `i1 1`.
`llc test.ll --debug` shows:
```
... snip ...
Legalizing node: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Analyzing result type: i16
Promote integer result: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Creating new node: t122: i32 = extract_vector_elt t105, Constant:i32<0>
Legalizing node: t122: i32 = extract_vector_elt t105, Constant:i32<0>
Analyzing result type: i32
Legal result type
Analyzing operand: t105: v2i16 = setcc t103, t104, setoeq:ch
Widen node operand 0: t122: i32 = extract_vector_elt t105, Constant:i32<0>
Creating new node: t123: i32 = extract_vector_elt t85, Constant:i32<0>
Legalizing node: t123: i32 = extract_vector_elt t85, Constant:i32<0>
Analyzing result type: i32
Legal result type
Analyzing operand: t85: v1i16 = setcc t52, t82, setoeq:ch
Widen node operand 0: t123: i32 = extract_vector_elt t85, Constant:i32<0>
Creating new node: t124: i32 = any_extend t109
Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109
Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109
Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109
Legalizing node: t124: i32 = any_extend t109
Analyzing result type: i32
Legal result type
Analyzing operand: t109: i16 = extract_vector_elt t105, Constant:i32<0>
Promote integer operand: t124: i32 = any_extend t109
... snip ...
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs