================
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s
--check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s
--check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone
-Werror -Wall -emit-llvm -o - %s | FileCheck %s
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// ALL-LABEL: @test_svlen_u8
+uint64_t test_svlen_u8(svuint8_t op) MODE_ATTR
+{
+// CIR: %[[VSCALE:.*]] = cir.call_llvm_intrinsic "vscale" : () -> !u64i
+// CIR: %[[C16:.*]] = cir.const #cir.int<16> : !u64i
+// CIR: %[[BINOP:.*]] = cir.binop(mul, %[[VSCALE]], %[[C16]]) nuw : !u64i
+
+// LLVM_OGCG_CIR: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
----------------
banach-space wrote:
I should clarify that the output is identical % SROA. In particular, I only
check the IR generated directly by `emitAArch64SVEBuiltinExpr`, and that
portion is indeed identical in both cases.
**Default Clang path**
```llvm
define dso_local i64 @test_svlen_u8(<vscale x 16 x i8> %op) #0 {
entry:
%op.addr = alloca <vscale x 16 x i8>, align 16
store <vscale x 16 x i8> %op, ptr %op.addr, align 16
%0 = load <vscale x 16 x i8>, ptr %op.addr, align 16
; Tests added here only check these lines
%1 = call i64 @llvm.vscale.i64()
%2 = mul nuw i64 %1, 16
ret i64 %2
}
```
**Path via ClangIR**
```llvm
define dso_local i64 @test_svlen_u8(<vscale x 16 x i8> %0) #0 {
%2 = alloca <vscale x 16 x i8>, i64 1, align 16
%3 = alloca i64, i64 1, align 8
store <vscale x 16 x i8> %0, ptr %2, align 16
; Tests added here only check these lines
%4 = call i64 @llvm.vscale.i64()
%5 = mul nuw i64 %4, 16
store i64 %5, ptr %3, align 8
%6 = load i64, ptr %3, align 8
ret i64 %6
}
```
Do you think we should also be checking the surrounding IR? Alternatively,
should we align the two lowering paths so that they generate identical IR?
https://github.com/llvm/llvm-project/pull/172346
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits