================
@@ -0,0 +1,170 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// ALL-LABEL: @test_svlen_u8
+uint64_t test_svlen_u8(svuint8_t op) MODE_ATTR
+{
+// CIR:     %[[VSCALE:.*]] = cir.call_llvm_intrinsic "vscale"  : () -> !u64i
+// CIR:     %[[C16:.*]] = cir.const #cir.int<16> : !u64i
+// CIR:     %[[BINOP:.*]] = cir.binop(mul, %[[VSCALE]], %[[C16]]) nuw : !u64i
+
+// LLVM_OGCG_CIR:    [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
----------------
banach-space wrote:

I should clarify that the output is identical % SROA. In particular, I only 
check the IR generated directly by `emitAArch64SVEBuiltinExpr`, and that 
portion is indeed identical in both cases.

**Default Clang path**
```llvm
define dso_local i64 @test_svlen_u8(<vscale x 16 x i8> %op) #0 {
entry:
  %op.addr = alloca <vscale x 16 x i8>, align 16
  store <vscale x 16 x i8> %op, ptr %op.addr, align 16
  %0 = load <vscale x 16 x i8>, ptr %op.addr, align 16
  
  ; Tests added here only check these lines
  %1 = call i64 @llvm.vscale.i64()
  %2 = mul nuw i64 %1, 16
  
  ret i64 %2
}
```

**Path via ClangIR**
```llvm
define dso_local i64 @test_svlen_u8(<vscale x 16 x i8> %0) #0 {
  %2 = alloca <vscale x 16 x i8>, i64 1, align 16
  %3 = alloca i64, i64 1, align 8
  store <vscale x 16 x i8> %0, ptr %2, align 16
  
  ; Tests added here only check these lines
  %4 = call i64 @llvm.vscale.i64()
  %5 = mul nuw i64 %4, 16

  store i64 %5, ptr %3, align 8
  %6 = load i64, ptr %3, align 8
  ret i64 %6
}
```

Do you think we should also be checking the surrounding IR? Alternatively, 
should we align the two lowering paths so that they generate identical IR?

https://github.com/llvm/llvm-project/pull/172346
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to