https://github.com/banach-space updated 
https://github.com/llvm/llvm-project/pull/183595

From 82678f2c006b7f6ff107ecf977372ed3110561d7 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <[email protected]>
Date: Thu, 26 Feb 2026 18:44:15 +0000
Subject: [PATCH] [CIR][AArch64] Add lowering for vaba_* and vabd_* builtins
 (1/N)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add CIR lowering for the following AdvSIMD (NEON) intrinsic families:

* vabd_*  – Absolute difference
  
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference

* vaba_*  – Absolute difference and accumulate
  
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#absolute-difference-and-accumulate

Tests for these intrinsics were split out from:
  test/CodeGen/AArch64/neon-intrinsics.c

and moved to:
  test/CodeGen/AArch64/neon/intrinsics.c

The following helper hooks were adapted from the ClangIR project:
  * getNeonType, emitNeonCall, emitNeonCallToOp.

NOTE: Quad-word variants (e.g. vabaq_*) are not included in this change
and will be added in a follow-up patch.

Credit to the ClangIR contributors for the original implementation.
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |   3 +
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 136 +++++++++++
 clang/test/CodeGen/AArch64/neon-intrinsics.c  | 174 --------------
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 213 ++++++++++++++++++
 4 files changed, 352 insertions(+), 174 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index efae3d9d894ed..fb96050964fcc 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -349,6 +349,9 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
                            mlir::IntegerAttr align = {},
                            cir::SyncScopeKindAttr scope = {},
                            cir::MemOrderAttr order = {}) {
+    if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
+        val.getType())
+      dst = createPtrBitcast(dst, val.getType());
     return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope,
                                 order);
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 5129aa75f8f8d..af36be32b2cdd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -290,6 +290,118 @@ static bool hasExtraNeonArgument(unsigned builtinID) {
   return mask != 0;
 }
 
+// TODO: Remove `loc` from the list of arguments once all NYIs are gone.
+static cir::VectorType getNeonType(CIRGenFunction *cgf, NeonTypeFlags 
typeFlags,
+                                   mlir::Location loc,
+                                   bool hasLegalHalfType = true,
+                                   bool v1Ty = false,
+                                   bool allowBFloatArgsAndRet = true) {
+  int isQuad = typeFlags.isQuad();
+  switch (typeFlags.getEltType()) {
+  case NeonTypeFlags::Int8:
+  case NeonTypeFlags::Poly8:
+    return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt8Ty
+                                                       : cgf->sInt8Ty,
+                                v1Ty ? 1 : (8 << isQuad));
+  case NeonTypeFlags::MFloat8:
+    cgf->getCIRGenModule().errorNYI(
+        loc, std::string("unimplemented NEON type: MFloat8"));
+    [[fallthrough]];
+  case NeonTypeFlags::Int16:
+  case NeonTypeFlags::Poly16:
+    return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt16Ty
+                                                       : cgf->sInt16Ty,
+                                v1Ty ? 1 : (4 << isQuad));
+  case NeonTypeFlags::BFloat16:
+    if (allowBFloatArgsAndRet)
+      cgf->getCIRGenModule().errorNYI(
+          loc, std::string("unimplemented NEON type: BFloat16"));
+    else
+      cgf->getCIRGenModule().errorNYI(
+          loc, std::string("unimplemented NEON type: BFloat16"));
+  case NeonTypeFlags::Float16:
+    if (hasLegalHalfType)
+      cgf->getCIRGenModule().errorNYI(
+          loc, std::string("unimplemented NEON type: Float16"));
+    else
+      cgf->getCIRGenModule().errorNYI(
+          loc, std::string("unimplemented NEON type: Float16"));
+    [[fallthrough]];
+  case NeonTypeFlags::Int32:
+    return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt32Ty
+                                                       : cgf->sInt32Ty,
+                                v1Ty ? 1 : (2 << isQuad));
+  case NeonTypeFlags::Int64:
+  case NeonTypeFlags::Poly64:
+    return cir::VectorType::get(typeFlags.isUnsigned() ? cgf->uInt64Ty
+                                                       : cgf->sInt64Ty,
+                                v1Ty ? 1 : (1 << isQuad));
+  case NeonTypeFlags::Poly128:
+    // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
+    // There is a lot of i128 and f128 API missing.
+    // so we use v16i8 to represent poly128 and get pattern matched.
+    cgf->getCIRGenModule().errorNYI(
+        loc, std::string("unimplemented NEON type: Poly128"));
+    [[fallthrough]];
+  case NeonTypeFlags::Float32:
+    return cir::VectorType::get(cgf->getCIRGenModule().floatTy,
+                                v1Ty ? 1 : (2 << isQuad));
+  case NeonTypeFlags::Float64:
+    return cir::VectorType::get(cgf->getCIRGenModule().doubleTy,
+                                v1Ty ? 1 : (1 << isQuad));
+  }
+  llvm_unreachable("Unknown vector element type!");
+}
+
+template <typename Operation>
+static mlir::Value emitNeonCallToOp(
+    CIRGenBuilderTy &builder, llvm::SmallVector<mlir::Type> argTypes,
+    llvm::SmallVectorImpl<mlir::Value> &args,
+    std::optional<llvm::StringRef> intrinsicName, mlir::Type funcResTy,
+    mlir::Location loc, bool isConstrainedFPIntrinsic = false,
+    unsigned shift = 0, bool rightshift = false) {
+  // TODO: Consider removing the following unreachable when we have
+  // emitConstrainedFPCall feature implemented
+  assert(!cir::MissingFeatures::emitConstrainedFPCall());
+  if (isConstrainedFPIntrinsic)
+    llvm_unreachable("isConstrainedFPIntrinsic NYI");
+
+  for (unsigned j = 0; j < argTypes.size(); ++j) {
+    if (isConstrainedFPIntrinsic) {
+      assert(!cir::MissingFeatures::emitConstrainedFPCall());
+    }
+    if (shift > 0 && shift == j) {
+      llvm_unreachable("shift NYI");
+    } else {
+      args[j] = builder.createBitcast(args[j], argTypes[j]);
+    }
+  }
+  if (isConstrainedFPIntrinsic) {
+    assert(!cir::MissingFeatures::emitConstrainedFPCall());
+    return nullptr;
+  }
+  if constexpr (std::is_same_v<Operation, cir::LLVMIntrinsicCallOp>) {
+    return Operation::create(builder, loc,
+                             builder.getStringAttr(intrinsicName.value()),
+                             funcResTy, args)
+        .getResult();
+  } else {
+    return Operation::create(builder, loc, funcResTy, args).getResult();
+  }
+}
+
+static mlir::Value emitNeonCall(CIRGenBuilderTy &builder,
+                                llvm::SmallVector<mlir::Type> argTypes,
+                                llvm::SmallVectorImpl<mlir::Value> &args,
+                                llvm::StringRef intrinsicName,
+                                mlir::Type funcResTy, mlir::Location loc,
+                                bool isConstrainedFPIntrinsic = false,
+                                unsigned shift = 0, bool rightshift = false) {
+  return emitNeonCallToOp<cir::LLVMIntrinsicCallOp>(
+      builder, std::move(argTypes), args, intrinsicName, funcResTy, loc,
+      isConstrainedFPIntrinsic, shift, rightshift);
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
                                           const CallExpr *expr) {
@@ -1454,6 +1566,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
 
   assert(!cir::MissingFeatures::aarch64TblBuiltinExpr());
 
+  const Expr *arg = expr->getArg(expr->getNumArgs() - 1);
+  NeonTypeFlags type(0);
+  // A trailing constant integer is used for discriminating overloaded builtin
+  // calls. Use it to determine the type of this overloaded NEON intrinsic.
+  if (std::optional<llvm::APSInt> result =
+          arg->getIntegerConstantExpr(getContext()))
+    type = NeonTypeFlags(result->getZExtValue());
+
+  bool usgn = type.isUnsigned();
+
   mlir::Location loc = getLoc(expr->getExprLoc());
 
   // Handle non-overloaded intrinsics first.
@@ -1678,6 +1800,12 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
     return mlir::Value{};
   }
 
+  cir::VectorType ty = getNeonType(this, type, loc);
+  if (!ty)
+    return nullptr;
+
+  llvm::StringRef intrName;
+
   switch (builtinID) {
   default:
     return std::nullopt;
@@ -1700,7 +1828,15 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vmin_v:
   case NEON::BI__builtin_neon_vminq_v:
   case NEON::BI__builtin_neon_vminh_f16:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vabd_v:
+    intrName = usgn ? "aarch64.neon.uabd" : "aarch64.neon.sabd";
+    if (cir::isFPOrVectorOfFPType(ty))
+      intrName = "aarch64.neon.fabd";
+    return emitNeonCall(builder, {ty, ty}, ops, intrName, ty, loc);
   case NEON::BI__builtin_neon_vabdq_v:
   case NEON::BI__builtin_neon_vpadal_v:
   case NEON::BI__builtin_neon_vpadalq_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c 
b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 42799d27bba89..909d00630b069 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -1038,88 +1038,6 @@ float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t 
v2) {
   return vdiv_f32(v1, v2);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_s8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x 
i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT:    ret <8 x i8> [[ADD_I]]
-//
-int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
-  return vaba_s8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_s16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 
x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT:    ret <4 x i16> [[ADD_I]]
-//
-int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
-  return vaba_s16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_s32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 
x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT:    ret <2 x i32> [[ADD_I]]
-//
-int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
-  return vaba_s32(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vaba_u8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x 
i8> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
-// CHECK-NEXT:    ret <8 x i8> [[ADD_I]]
-//
-uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
-  return vaba_u8(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vaba_u16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 
x i16> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
-// CHECK-NEXT:    ret <4 x i16> [[ADD_I]]
-//
-uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
-  return vaba_u16(v1, v2, v3);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vaba_u32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 
x i32> noundef [[V3:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
-// CHECK-NEXT:    ret <2 x i32> [[ADD_I]]
-//
-uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
-  return vaba_u32(v1, v2, v3);
-}
-
 // CHECK-LABEL: define dso_local <16 x i8> @test_vabaq_s8(
 // CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]], <16 
x i8> noundef [[V3:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -1202,98 +1120,6 @@ uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, 
uint32x4_t v3) {
   return vabaq_u32(v1, v2, v3);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_s8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
-// CHECK-NEXT:    ret <8 x i8> [[VABD_I]]
-//
-int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
-  return vabd_s8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_s16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT:    ret <4 x i16> [[VABD2_I]]
-//
-int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
-  return vabd_s16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_s32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT:    ret <2 x i32> [[VABD2_I]]
-//
-int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
-  return vabd_s32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vabd_u8(
-// CHECK-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
-// CHECK-NEXT:    ret <8 x i8> [[VABD_I]]
-//
-uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
-  return vabd_u8(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vabd_u16(
-// CHECK-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
-// CHECK-NEXT:    ret <4 x i16> [[VABD2_I]]
-//
-uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
-  return vabd_u16(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vabd_u32(
-// CHECK-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
-// CHECK-NEXT:    ret <2 x i32> [[VABD2_I]]
-//
-uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
-  return vabd_u32(v1, v2);
-}
-
-// CHECK-LABEL: define dso_local <2 x float> @test_vabd_f32(
-// CHECK-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT:    [[VABD2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]])
-// CHECK-NEXT:    ret <2 x float> [[VABD2_I]]
-//
-float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
-  return vabd_f32(v1, v2);
-}
-
 // CHECK-LABEL: define dso_local <16 x i8> @test_vabdq_s8(
 // CHECK-SAME: <16 x i8> noundef [[V1:%.*]], <16 x i8> noundef [[V2:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 039a08c23852e..227b23f532fe1 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -42,3 +42,216 @@ int64_t test_vnegd_s64(int64_t a) {
 // LLVM-NEXT:     ret i64 [[VNEGD_I]]
   return (int64_t)vnegd_s64(a);
 }
+
+//===------------------------------------------------------===//
+// 2.1.1.6.1. Absolute difference
+//===------------------------------------------------------===//
+// LLVM-LABEL: @test_vabd_s8(
+// CIR-LABEL: @vabd_s8(
+int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sabd" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) 
+// LLVM:         [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// LLVM-NEXT:    ret <8 x i8> [[VABD_I]]
+  return vabd_s8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_s16(
+// CIR-LABEL: @vabd_s16(
+int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+// CIR:   [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR:   [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR:   cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT:    ret <4 x i16> [[VABD2_I]]
+  return vabd_s16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_s32(
+// CIR-LABEL: @vabd_s32(
+int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+// CIR:   [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !s32i>
+// CIR:   [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !s32i>
+// CIR:   cir.call_llvm_intrinsic "aarch64.neon.sabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT:    ret <2 x i32> [[VABD2_I]]
+  return vabd_s32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u8(
+// CIR-LABEL: @vabd_u8(
+uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uabd" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]]) 
+// LLVM:         [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+// LLVM-NEXT:    ret <8 x i8> [[VABD_I]]
+  return vabd_u8(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u16(
+// CIR-LABEL: @vabd_u16(
+uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+// CIR:   [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !u16i>
+// CIR:   [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !u16i>
+// CIR:   cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x i16> [[V1]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT:    ret <4 x i16> [[VABD2_I]]
+  return vabd_u16(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_u32(
+// CIR-LABEL: @vabd_u32(
+uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+// CIR:   [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !u32i>
+// CIR:   [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !u32i>
+// CIR:   cir.call_llvm_intrinsic "aarch64.neon.uabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT:    ret <2 x i32> [[VABD2_I]]
+  return vabd_u32(v1, v2);
+}
+
+// LLVM-LABEL: @test_vabd_f32(
+// CIR-LABEL: @vabd_f32(
+float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+// CIR:   [[V1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !cir.float>
+// CIR:   [[V2:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !cir.float>
+// CIR:   cir.call_llvm_intrinsic "aarch64.neon.fabd" [[V1]], [[V2]]
+
+// LLVM-SAME: <2 x float> noundef [[V1:%.*]], <2 x float> noundef [[V2:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <2 x float> [[V1]] to <2 x i32>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[V2]] to <2 x i32>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fabd.v2f32(<2 x float> [[VABD_I]], <2 x float> [[VABD1_I]])
+// LLVM-NEXT:    ret <2 x float> [[VABD2_I]]
+  return vabd_f32(v1, v2);
+}
+
+//===------------------------------------------------------===//
+// 2.1.1.6.3. Absolute difference and accumulate
+//
+// The following builtins expand to a call to vabd_{} builtins,
+// which is reflected in the CIR output.
+//===------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vaba_u8(
+// CIR-LABEL: @vaba_u8(
+uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x 
i8> noundef [[V3:%.*]])
+// LLVM:         [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT:    ret <8 x i8> [[ADD_I]]
+  return vaba_u8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_u16(
+// CIR-LABEL: @vaba_u16(
+uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x 
i16> noundef [[V3:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT:    ret <4 x i16> [[ADD_I]]
+  return vaba_u16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_u32(
+// CIR-LABEL: @vaba_u32(
+uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_u32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x 
i32> noundef [[V3:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT:    ret <2 x i32> [[ADD_I]]
+  return vaba_u32(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s8(
+// CIR-LABEL: @vaba_s8(
+int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s8
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <8 x i8> noundef [[V1:%.*]], <8 x i8> noundef [[V2:%.*]], <8 x 
i8> noundef [[V3:%.*]]) 
+// LLVM:         [[VABD_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V2]], <8 x i8> [[V3]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <8 x i8> [[V1]], [[VABD_I]]
+// LLVM-NEXT:    ret <8 x i8> [[ADD_I]]
+  return vaba_s8(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s16(
+// CIR-LABEL: @vaba_s16(
+int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s16
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <4 x i16> noundef [[V1:%.*]], <4 x i16> noundef [[V2:%.*]], <4 x 
i16> noundef [[V3:%.*]]) 
+// LLVM:         [[TMP0:%.*]] = bitcast <4 x i16> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[V3]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[VABD_I]], <4 x i16> [[VABD1_I]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <4 x i16> [[V1]], [[VABD2_I]]
+// LLVM-NEXT:    ret <4 x i16> [[ADD_I]]
+  return vaba_s16(v1, v2, v3);
+}
+
+// LLVM-LABEL: @test_vaba_s32(
+// CIR-LABEL: @vaba_s32(
+int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+// CIR: [[ABD:%.*]] = cir.call @vabd_s32
+// CIR: [[RES:%.*]] = cir.binop(add, {{.*}}, [[ABD]])
+
+// LLVM-SAME: <2 x i32> noundef [[V1:%.*]], <2 x i32> noundef [[V2:%.*]], <2 x 
i32> noundef [[V3:%.*]])
+// LLVM:         [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[V3]] to <8 x i8>
+// LLVM-NEXT:    [[VABD_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VABD1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VABD2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[VABD_I]], <2 x i32> [[VABD1_I]])
+// LLVM-NEXT:    [[ADD_I:%.*]] = add <2 x i32> [[V1]], [[VABD2_I]]
+// LLVM-NEXT:    ret <2 x i32> [[ADD_I]]
+  return vaba_s32(v1, v2, v3);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to