sdesmalen created this revision.
sdesmalen added reviewers: SjoerdMeijer, efriedma.
Herald added a subscriber: tschuett.
Herald added a project: clang.
sdesmalen added a parent revision: D78674: [SveEmitter] Add builtins for 
contiguous prefetches.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78677

Files:
  clang/include/clang/Basic/TargetBuiltins.h
  clang/include/clang/Basic/arm_sve.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
  clang/utils/TableGen/SveEmitter.cpp

Index: clang/utils/TableGen/SveEmitter.cpp
===================================================================
--- clang/utils/TableGen/SveEmitter.cpp
+++ clang/utils/TableGen/SveEmitter.cpp
@@ -599,6 +599,12 @@
     Float = true;
     ElementBitwidth = 64;
     break;
+  case 'Q':
+    Constant = true;
+    Pointer = true;
+    Void = true;
+    NumVectors = 0;
+    break;
   case 'S':
     Constant = true;
     Pointer = true;
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
@@ -116,3 +116,151 @@
   // CHECK: @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
   return svprfw_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
+
+void test_svprfw_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfw_gather_u32base
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u32base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfw_gather
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfw_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfw_gather_u64base
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u64base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_1(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfw_gather_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfw_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_s32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_s32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_s64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_s64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index_1(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_index_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_u32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index_2(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_index_2
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_u64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index_3(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfw_gather_index_3
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfw_gather_u32base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfw_gather_index_4
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfw_gather_u64base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfw_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfw_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfw_gather_index_5
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 2
+  // CHECK: call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfw_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
@@ -116,3 +116,151 @@
   // CHECK: @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
   return svprfh_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
+
+void test_svprfh_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfh_gather_u32base
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u32base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfh_gather
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfh_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfh_gather_u64base
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u64base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_1(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfh_gather_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfh_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_s32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_s32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_s64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_s64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index_1(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_index_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_u32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index_2(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_index_2
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_u64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index_3(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfh_gather_index_3
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfh_gather_u32base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfh_gather_index_4
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfh_gather_u64base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfh_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfh_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfh_gather_index_5
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 1
+  // CHECK: call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfh_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
@@ -116,3 +116,151 @@
   // CHECK: @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> %[[PG]], i8* %[[I8_BASE]], i32 0)
   return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
+
+void test_svprfd_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfd_gather_u32base
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u32base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfd_gather
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfd_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfd_gather_u64base
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u64base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_1(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfd_gather_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfd_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_s32index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_s32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_s32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index(svbool_t pg, const void *base, svint32_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_s64index(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_s64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_s64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index_1(svbool_t pg, const void *base, svint64_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_index_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u32index(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_u32index
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u32index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index_2(svbool_t pg, const void *base, svuint32_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_index_2
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64index(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_u64index
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u64index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index_3(svbool_t pg, const void *base, svuint64_t indices)
+{
+  // CHECK-LABEL: test_svprfd_gather_index_3
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %indices, i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, base, indices, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u32base_index(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfd_gather_u32base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u32base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index_4(svbool_t pg, svuint32_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfd_gather_index_4
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfd_gather_u64base_index
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfd_gather_u64base_index(pg, bases, index, SV_PLDL1KEEP);
+}
+
+void test_svprfd_gather_index_5(svbool_t pg, svuint64_t bases, int64_t index)
+{
+  // CHECK-LABEL: test_svprfd_gather_index_5
+  // CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK-DAG: %[[SHL:.*]] = shl i64 %index, 3
+  // CHECK: call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %[[SHL]], i32 0)
+  // CHECK: ret void
+  return svprfd_gather_index(pg, bases, index, SV_PLDL1KEEP);
+}
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
@@ -102,3 +102,147 @@
   // CHECK: @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]], i32 0)
   return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
+
+void test_svprfb_gather_u32base(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfb_gather_u32base
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u32base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather(svbool_t pg, svuint32_t bases)
+{
+  // CHECK-LABEL: test_svprfb_gather
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfb_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64base(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfb_gather_u64base
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u64base(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_1(svbool_t pg, svuint64_t bases)
+{
+  // CHECK-LABEL: test_svprfb_gather_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 0, i32 0)
+  // CHECK: ret void
+  return svprfb_gather(pg, bases, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_s32offset(svbool_t pg, const void *base, svint32_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_s32offset
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_s32offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset(svbool_t pg, const void *base, svint32_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_s64offset(svbool_t pg, const void *base, svint64_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_s64offset
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_s64offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset_1(svbool_t pg, const void *base, svint64_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset_1
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u32offset(svbool_t pg, const void *base, svuint32_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_u32offset
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u32offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset_2(svbool_t pg, const void *base, svuint32_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset_2
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %[[PG]], i8* %base, <vscale x 4 x i32> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64offset(svbool_t pg, const void *base, svuint64_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_u64offset
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u64offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset_3(svbool_t pg, const void *base, svuint64_t offsets)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset_3
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.index.nxv2i64(<vscale x 2 x i1> %[[PG]], i8* %base, <vscale x 2 x i64> %offsets, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, base, offsets, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u32base_offset(svbool_t pg, svuint32_t bases, int64_t offset)
+{
+  // CHECK-LABEL: test_svprfb_gather_u32base_offset
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u32base_offset(pg, bases, offset, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset_4(svbool_t pg, svuint32_t bases, int64_t offset)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset_4
+  // CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32(<vscale x 4 x i1> %[[PG]], <vscale x 4 x i32> %bases, i64 %offset, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, bases, offset, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t offset)
+{
+  // CHECK-LABEL: test_svprfb_gather_u64base_offset
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_u64base_offset(pg, bases, offset, SV_PLDL1KEEP);
+}
+
+void test_svprfb_gather_offset_5(svbool_t pg, svuint64_t bases, int64_t offset)
+{
+  // CHECK-LABEL: test_svprfb_gather_offset_5
+  // CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+  // CHECK: call void @llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64(<vscale x 2 x i1> %[[PG]], <vscale x 2 x i64> %bases, i64 %offset, i32 0)
+  // CHECK: ret void
+  return svprfb_gather_offset(pg, bases, offset, SV_PLDL1KEEP);
+}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -3930,6 +3930,9 @@
   llvm::Value *EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
                                    SmallVectorImpl<llvm::Value *> &Ops,
                                    unsigned BuiltinID);
+  llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+                                     SmallVectorImpl<llvm::Value *> &Ops,
+                                     unsigned IntID);
   llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -7714,6 +7714,39 @@
   return Builder.CreateCall(F, Ops);
 }
 
+Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
+                                              SmallVectorImpl<Value *> &Ops,
+                                              unsigned IntID) {
+  // The gather prefetches are overloaded on the vector input - this can either
+  // be the vector of base addresses or vector of offsets.
+  llvm::VectorType *OverloadedTy = dyn_cast<llvm::VectorType>(Ops[1]->getType());
+  if (!OverloadedTy)
+    OverloadedTy = cast<llvm::VectorType>(Ops[2]->getType());
+
+  // Cast the predicate from svbool_t to the right number of elements.
+  Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);
+
+  // vector + imm addressing modes
+  if (Ops[1]->getType()->isVectorTy()) {
+    if (Ops.size() == 3) {
+      // Pass 0 for 'vector+imm' when the index is omitted.
+      Ops.push_back(ConstantInt::get(Int64Ty, 0));
+
+      // The sv_prfop is the last operand in the builtin and IR intrinsic.
+      std::swap(Ops[2], Ops[3]);
+    }
+
+    // Index needs to be passed as scaled offset.
+    llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);
+    unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;
+    Value *Scale = ConstantInt::get(Int64Ty, BytesPerElt);
+    Ops[2] = Builder.CreateMul(Ops[2], Scale);
+  }
+
+  Function *F = CGM.getIntrinsic(IntID, OverloadedTy);
+  return Builder.CreateCall(F, Ops);
+}
+
 Value *CodeGenFunction::EmitSVEPrefetchLoad(SVETypeFlags TypeFlags,
                                             SmallVectorImpl<Value *> &Ops,
                                             unsigned BuiltinID) {
@@ -7869,6 +7902,8 @@
     return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
   else if (TypeFlags.isPrefetch())
     return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
+  else if (TypeFlags.isGatherPrefetch())
+    return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
   else if (Builtin->LLVMIntrinsic != 0) {
     if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
       InsertExplicitZeroOperand(Builder, Ty, Ops);
Index: clang/include/clang/Basic/arm_sve.td
===================================================================
--- clang/include/clang/Basic/arm_sve.td
+++ clang/include/clang/Basic/arm_sve.td
@@ -95,6 +95,8 @@
 // G: pointer to uint32_t
 // H: pointer to uint64_t
 
+// Q: const pointer to void
+
 // S: const pointer to int8_t
 // T: const pointer to int16_t
 // U: const pointer to int32_t
@@ -182,6 +184,7 @@
 def IsAppendSVALL             : FlagType<0x02000000>; // Appends SV_ALL as the last operand.
 def IsInsertOp1SVALL          : FlagType<0x04000000>; // Inserts SV_ALL as the second operand.
 def IsPrefetch                : FlagType<0x08000000>; // Contiguous prefetches.
+def IsGatherPrefetch          : FlagType<0x10000000>;
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
@@ -494,6 +497,39 @@
 def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPclJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">;
 def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPclJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">;
 
+// Prefetch (Vector bases)
+def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_scalar_offset">;
+def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
+def SVPRFW_GATHER_BASES : MInst<"svprfw_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
+def SVPRFD_GATHER_BASES : MInst<"svprfd_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+
+// Prefetch (Scalar base, Vector offsets)
+def SVPRFB_GATHER_32B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "i",  [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_sxtw_index">;
+def SVPRFH_GATHER_32B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index",  "vPQdJ", "i",  [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_sxtw_index">;
+def SVPRFW_GATHER_32B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index",  "vPQdJ", "i",  [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_sxtw_index">;
+def SVPRFD_GATHER_32B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index",  "vPQdJ", "i",  [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_sxtw_index">;
+
+def SVPRFB_GATHER_64B_OFFSETS_S : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "l",  [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_index">;
+def SVPRFH_GATHER_64B_OFFSETS_S : MInst<"svprfh_gather_[{3}]index",  "vPQdJ", "l",  [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">;
+def SVPRFW_GATHER_64B_OFFSETS_S : MInst<"svprfw_gather_[{3}]index",  "vPQdJ", "l",  [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">;
+def SVPRFD_GATHER_64B_OFFSETS_S : MInst<"svprfd_gather_[{3}]index",  "vPQdJ", "l",  [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">;
+
+def SVPRFB_GATHER_32B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_uxtw_index">;
+def SVPRFH_GATHER_32B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index",  "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_uxtw_index">;
+def SVPRFW_GATHER_32B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index",  "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_uxtw_index">;
+def SVPRFD_GATHER_32B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index",  "vPQdJ", "Ui", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_uxtw_index">;
+
+def SVPRFB_GATHER_64B_OFFSETS_U : MInst<"svprfb_gather_[{3}]offset", "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_index">;
+def SVPRFH_GATHER_64B_OFFSETS_U : MInst<"svprfh_gather_[{3}]index",  "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_index">;
+def SVPRFW_GATHER_64B_OFFSETS_U : MInst<"svprfw_gather_[{3}]index",  "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_index">;
+def SVPRFD_GATHER_64B_OFFSETS_U : MInst<"svprfd_gather_[{3}]index",  "vPQdJ", "Ul", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_index">;
+
+// Prefetch (Vector bases, scalar offset)
+def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_scalar_offset">;
+def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
+def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
+def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+
 ////////////////////////////////////////////////////////////////////////////////
 // Integer arithmetic
 
Index: clang/include/clang/Basic/TargetBuiltins.h
===================================================================
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -241,6 +241,7 @@
     bool isAppendSVALL() const { return Flags & IsAppendSVALL; }
     bool isInsertOp1SVALL() const { return Flags & IsInsertOp1SVALL; }
     bool isPrefetch() const { return Flags & IsPrefetch; }
+    bool isGatherPrefetch() const { return Flags & IsGatherPrefetch; }
 
     uint64_t getBits() const { return Flags; }
     bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to