[PATCH] D112102: [RISCV] Reduce the number of RISCV vector builtins by an order of magnitude.

Craig Topper via Phabricator via cfe-commits Wed, 20 Oct 2021 09:05:26 -0700

craig.topper updated this revision to Diff 380982.
craig.topper added a comment.


Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112102/new/

https://reviews.llvm.org/D112102

Files:
  clang/include/clang/Basic/riscv_vector.td
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
  clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
  clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
  clang/utils/TableGen/RISCVVEmitter.cpp
  llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
  llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
  llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll

Index: llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
===================================================================
--- llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
+++ llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
@@ -1320,3 +1320,333 @@
 
   ret <vscale x 8 x i64> %a
 }
+
+declare <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x half> @intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x half> @intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x half> @intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x half> @intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x half> @intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i64);
+
+define <vscale x 32 x half> @intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x float> @intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x float> @intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x float> @intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x float> @intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i64);
+
+define <vscale x 16 x float> @intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i64);
+
+define <vscale x 1 x double> @intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i64);
+
+define <vscale x 2 x double> @intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i64);
+
+define <vscale x 4 x double> @intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i64);
+
+define <vscale x 8 x double> @intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i64 %3)
+
+  ret <vscale x 8 x double> %a
+}
Index: llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
===================================================================
--- llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
+++ llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
@@ -1344,3 +1344,333 @@
 
   ret <vscale x 8 x i64> %a
 }
+
+declare <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+  <vscale x 1 x half>,
+  <vscale x 1 x half>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x half> @intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16(
+    <vscale x 1 x half> %0,
+    <vscale x 1 x half> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+  <vscale x 2 x half>,
+  <vscale x 2 x half>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x half> @intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16(
+    <vscale x 2 x half> %0,
+    <vscale x 2 x half> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+  <vscale x 4 x half>,
+  <vscale x 4 x half>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x half> @intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16(
+    <vscale x 4 x half> %0,
+    <vscale x 4 x half> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+  <vscale x 8 x half>,
+  <vscale x 8 x half>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x half> @intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16(
+    <vscale x 8 x half> %0,
+    <vscale x 8 x half> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+  <vscale x 16 x half>,
+  <vscale x 16 x half>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x half> @intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16(
+    <vscale x 16 x half> %0,
+    <vscale x 16 x half> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+  <vscale x 32 x half>,
+  <vscale x 32 x half>,
+  <vscale x 32 x i1>,
+  i32);
+
+define <vscale x 32 x half> @intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16(
+    <vscale x 32 x half> %0,
+    <vscale x 32 x half> %1,
+    <vscale x 32 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+  <vscale x 1 x float>,
+  <vscale x 1 x float>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x float> @intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32(
+    <vscale x 1 x float> %0,
+    <vscale x 1 x float> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+  <vscale x 2 x float>,
+  <vscale x 2 x float>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x float> @intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(
+    <vscale x 2 x float> %0,
+    <vscale x 2 x float> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+  <vscale x 4 x float>,
+  <vscale x 4 x float>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x float> @intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x float> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+  <vscale x 8 x float>,
+  <vscale x 8 x float>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x float> @intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32(
+    <vscale x 8 x float> %0,
+    <vscale x 8 x float> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+  <vscale x 16 x float>,
+  <vscale x 16 x float>,
+  <vscale x 16 x i1>,
+  i32);
+
+define <vscale x 16 x float> @intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32(
+    <vscale x 16 x float> %0,
+    <vscale x 16 x float> %1,
+    <vscale x 16 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+  <vscale x 1 x double>,
+  <vscale x 1 x double>,
+  <vscale x 1 x i1>,
+  i32);
+
+define <vscale x 1 x double> @intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64(
+    <vscale x 1 x double> %0,
+    <vscale x 1 x double> %1,
+    <vscale x 1 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+  <vscale x 2 x double>,
+  <vscale x 2 x double>,
+  <vscale x 2 x i1>,
+  i32);
+
+define <vscale x 2 x double> @intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64(
+    <vscale x 2 x double> %0,
+    <vscale x 2 x double> %1,
+    <vscale x 2 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+  <vscale x 4 x double>,
+  <vscale x 4 x double>,
+  <vscale x 4 x i1>,
+  i32);
+
+define <vscale x 4 x double> @intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64(
+    <vscale x 4 x double> %0,
+    <vscale x 4 x double> %1,
+    <vscale x 4 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+  <vscale x 8 x double>,
+  <vscale x 8 x double>,
+  <vscale x 8 x i1>,
+  i32);
+
+define <vscale x 8 x double> @intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64(
+    <vscale x 8 x double> %0,
+    <vscale x 8 x double> %1,
+    <vscale x 8 x i1> %2,
+    i32 %3)
+
+  ret <vscale x 8 x double> %a
+}
Index: llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
===================================================================
--- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4574,6 +4574,10 @@
 // 14.15. Vector Floating-Point Merge Instruction
 //===----------------------------------------------------------------------===//
 // We can use vmerge.vvm to support vector-vector vfmerge.
+// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
+// int_riscv_vmerge. Support both for compatibility.
+defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE",
+                      /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
 defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
                       /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
 defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",
Index: clang/utils/TableGen/RISCVVEmitter.cpp
===================================================================
--- clang/utils/TableGen/RISCVVEmitter.cpp
+++ clang/utils/TableGen/RISCVVEmitter.cpp
@@ -150,7 +150,8 @@
 class RVVIntrinsic {
 
 private:
-  std::string Name; // Builtin name
+  std::string BuiltinName; // Builtin name
+  std::string Name;        // C intrinsic name.
   std::string MangledName;
   std::string IRName;
   bool IsMask;
@@ -177,6 +178,7 @@
                StringRef RequiredExtension, unsigned NF);
   ~RVVIntrinsic() = default;
 
+  StringRef getBuiltinName() const { return BuiltinName; }
   StringRef getName() const { return Name; }
   StringRef getMangledName() const { return MangledName; }
   bool hasVL() const { return HasVL; }
@@ -189,6 +191,9 @@
   StringRef getManualCodegen() const { return ManualCodegen; }
   uint8_t getRISCVExtensions() const { return RISCVExtensions; }
   unsigned getNF() const { return NF; }
+  const std::vector<int64_t> &getIntrinsicTypes() const {
+    return IntrinsicTypes;
+  }
 
   // Return the type string for a BUILTIN() macro in Builtins.def.
   std::string getBuiltinTypeStr() const;
@@ -765,8 +770,9 @@
       HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
       ManualCodegen(ManualCodegen.str()), NF(NF) {
 
-  // Init Name and MangledName
-  Name = NewName.str();
+  // Init BuiltinName, Name and MangledName
+  BuiltinName = NewName.str();
+  Name = BuiltinName;
   if (NewMangledName.empty())
     MangledName = NewName.split("_").first.str();
   else
@@ -776,8 +782,10 @@
   if (!MangledSuffix.empty())
     MangledName += "_" + MangledSuffix.str();
   if (IsMask) {
+    BuiltinName += "_m";
     Name += "_m";
   }
+
   // Init RISC-V extensions
   for (const auto &T : OutInTypes) {
     if (T->isFloatVector(16) || T->isFloat(16))
@@ -857,7 +865,7 @@
 
 void RVVIntrinsic::emitIntrinsicFuncDef(raw_ostream &OS) const {
   OS << "__attribute__((__clang_builtin_alias__(";
-  OS << "__builtin_rvv_" << getName() << ")))\n";
+  OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
   OS << OutputType->getTypeStr() << " " << getName() << "(";
   // Emit function arguments
   if (!InputTypes.empty()) {
@@ -870,7 +878,7 @@
 
 void RVVIntrinsic::emitMangledFuncDef(raw_ostream &OS) const {
   OS << "__attribute__((__clang_builtin_alias__(";
-  OS << "__builtin_rvv_" << getName() << ")))\n";
+  OS << "__builtin_rvv_" << getBuiltinName() << ")))\n";
   OS << OutputType->getTypeStr() << " " << getMangledName() << "(";
   // Emit function arguments
   if (!InputTypes.empty()) {
@@ -1012,13 +1020,31 @@
   std::vector<std::unique_ptr<RVVIntrinsic>> Defs;
   createRVVIntrinsics(Defs);
 
+  // Map to keep track of which builtin names have already been emitted.
+  StringMap<RVVIntrinsic *> BuiltinMap;
+
   OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
   OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
         "ATTRS, \"experimental-v\")\n";
   OS << "#endif\n";
   for (auto &Def : Defs) {
-    OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getName() << ",\""
-       << Def->getBuiltinTypeStr() << "\", \"n\")\n";
+    auto P =
+        BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+    if (!P.second) {
+      // Verify that this would have produced the same builtin definition.
+      if (P.first->second->hasAutoDef() != Def->hasAutoDef()) {
+        PrintFatalError("Builtin with same name has different hasAutoDef");
+      } else if (!Def->hasAutoDef() && P.first->second->getBuiltinTypeStr() !=
+                                           Def->getBuiltinTypeStr()) {
+        PrintFatalError("Builtin with same name has different type string");
+      }
+      continue;
+    }
+
+    OS << "RISCVV_BUILTIN(__builtin_rvv_" << Def->getBuiltinName() << ",\"";
+    if (!Def->hasAutoDef())
+      OS << Def->getBuiltinTypeStr();
+    OS << "\", \"n\")\n";
   }
   OS << "#undef RISCVV_BUILTIN\n";
 }
@@ -1031,6 +1057,10 @@
                              const std::unique_ptr<RVVIntrinsic> &B) {
     return A->getIRName() < B->getIRName();
   });
+
+  // Map to keep track of which builtin names have already been emitted.
+  StringMap<RVVIntrinsic *> BuiltinMap;
+
   // Print switch body when the ir name or ManualCodegen changes from previous
   // iteration.
   RVVIntrinsic *PrevDef = Defs.begin()->get();
@@ -1041,7 +1071,29 @@
       PrevDef->emitCodeGenSwitchBody(OS);
     }
     PrevDef = Def.get();
-    OS << "case RISCVVector::BI__builtin_rvv_" << Def->getName() << ":\n";
+
+    auto P =
+        BuiltinMap.insert(std::make_pair(Def->getBuiltinName(), Def.get()));
+    if (P.second) {
+      OS << "case RISCVVector::BI__builtin_rvv_" << Def->getBuiltinName()
+         << ":\n";
+      continue;
+    }
+
+    if (P.first->second->getIRName() != Def->getIRName())
+      PrintFatalError("Builtin with same name has different IRName");
+    else if (P.first->second->getManualCodegen() != Def->getManualCodegen())
+      PrintFatalError("Builtin with same name has different ManualCodegen");
+    else if (P.first->second->getNF() != Def->getNF())
+      PrintFatalError("Builtin with same name has different NF");
+    else if (P.first->second->isMask() != Def->isMask())
+      PrintFatalError("Builtin with same name has different isMask");
+    else if (P.first->second->hasVL() != Def->hasVL())
+      PrintFatalError("Builtin with same name has different HasPolicy");
+    else if (P.first->second->hasPolicy() != Def->hasPolicy())
+      PrintFatalError("Builtin with same name has different HasPolicy");
+    else if (P.first->second->getIntrinsicTypes() != Def->getIntrinsicTypes())
+      PrintFatalError("Builtin with same name has different IntrinsicTypes");
   }
   Defs.back()->emitCodeGenSwitchBody(OS);
   OS << "\n";
Index: clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
===================================================================
--- clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
+++ clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
@@ -888,7 +888,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
@@ -898,7 +898,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
@@ -908,7 +908,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
@@ -918,7 +918,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
@@ -928,7 +928,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
@@ -938,7 +938,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
@@ -948,7 +948,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
@@ -958,7 +958,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
@@ -968,7 +968,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vmerge_vvm_f64m8(vbool8_t mask, vfloat64m8_t op1,
@@ -978,7 +978,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16mf4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[OP1:%.*]], <vscale x 1 x half> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x half> @llvm.riscv.vmerge.nxv1f16.nxv1f16.i64(<vscale x 1 x half> [[OP1:%.*]], <vscale x 1 x half> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP0]]
 //
 vfloat16mf4_t test_vmerge_vvm_f16mf4 (vbool64_t mask, vfloat16mf4_t op1, vfloat16mf4_t op2, size_t vl) {
@@ -987,7 +987,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[OP1:%.*]], <vscale x 2 x half> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x half> @llvm.riscv.vmerge.nxv2f16.nxv2f16.i64(<vscale x 2 x half> [[OP1:%.*]], <vscale x 2 x half> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP0]]
 //
 vfloat16mf2_t test_vmerge_vvm_f16mf2 (vbool32_t mask, vfloat16mf2_t op1, vfloat16mf2_t op2, size_t vl) {
@@ -996,7 +996,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[OP1:%.*]], <vscale x 4 x half> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x half> @llvm.riscv.vmerge.nxv4f16.nxv4f16.i64(<vscale x 4 x half> [[OP1:%.*]], <vscale x 4 x half> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP0]]
 //
 vfloat16m1_t test_vmerge_vvm_f16m1 (vbool16_t mask, vfloat16m1_t op1, vfloat16m1_t op2, size_t vl) {
@@ -1005,7 +1005,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.riscv.vmerge.nxv8f16.nxv8f16.i64(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
 vfloat16m2_t test_vmerge_vvm_f16m2 (vbool8_t mask, vfloat16m2_t op1, vfloat16m2_t op2, size_t vl) {
@@ -1014,7 +1014,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[OP1:%.*]], <vscale x 16 x half> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x half> @llvm.riscv.vmerge.nxv16f16.nxv16f16.i64(<vscale x 16 x half> [[OP1:%.*]], <vscale x 16 x half> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP0]]
 //
 vfloat16m4_t test_vmerge_vvm_f16m4 (vbool4_t mask, vfloat16m4_t op1, vfloat16m4_t op2, size_t vl) {
@@ -1023,7 +1023,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f16m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[OP1:%.*]], <vscale x 32 x half> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x half> @llvm.riscv.vmerge.nxv32f16.nxv32f16.i64(<vscale x 32 x half> [[OP1:%.*]], <vscale x 32 x half> [[OP2:%.*]], <vscale x 32 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP0]]
 //
 vfloat16m8_t test_vmerge_vvm_f16m8 (vbool2_t mask, vfloat16m8_t op1, vfloat16m8_t op2, size_t vl) {
Index: clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
===================================================================
--- clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
+++ clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
@@ -887,7 +887,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32mf2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.vmerge.nxv1f32.nxv1f32.i64(<vscale x 1 x float> [[OP1:%.*]], <vscale x 1 x float> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
 vfloat32mf2_t test_vmerge_vvm_f32mf2(vbool64_t mask, vfloat32mf2_t op1,
@@ -897,7 +897,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32.i64(<vscale x 2 x float> [[OP1:%.*]], <vscale x 2 x float> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
 vfloat32m1_t test_vmerge_vvm_f32m1(vbool32_t mask, vfloat32m1_t op1,
@@ -907,7 +907,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32.nxv4f32.i64(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
 vfloat32m2_t test_vmerge_vvm_f32m2(vbool16_t mask, vfloat32m2_t op1,
@@ -917,7 +917,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.vmerge.nxv8f32.nxv8f32.i64(<vscale x 8 x float> [[OP1:%.*]], <vscale x 8 x float> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
 vfloat32m4_t test_vmerge_vvm_f32m4(vbool8_t mask, vfloat32m4_t op1,
@@ -927,7 +927,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f32m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.vmerge.nxv16f32.nxv16f32.i64(<vscale x 16 x float> [[OP1:%.*]], <vscale x 16 x float> [[OP2:%.*]], <vscale x 16 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
 vfloat32m8_t test_vmerge_vvm_f32m8(vbool4_t mask, vfloat32m8_t op1,
@@ -937,7 +937,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m1(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x double> @llvm.riscv.vmerge.nxv1f64.nxv1f64.i64(<vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP0]]
 //
 vfloat64m1_t test_vmerge_vvm_f64m1(vbool64_t mask, vfloat64m1_t op1,
@@ -947,7 +947,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m2(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.riscv.vmerge.nxv2f64.nxv2f64.i64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
 vfloat64m2_t test_vmerge_vvm_f64m2(vbool32_t mask, vfloat64m2_t op1,
@@ -957,7 +957,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m4(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x double> @llvm.riscv.vmerge.nxv4f64.nxv4f64.i64(<vscale x 4 x double> [[OP1:%.*]], <vscale x 4 x double> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP0]]
 //
 vfloat64m4_t test_vmerge_vvm_f64m4(vbool16_t mask, vfloat64m4_t op1,
@@ -967,7 +967,7 @@
 
 // CHECK-RV64-LABEL: @test_vmerge_vvm_f64m8(
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x double> @llvm.riscv.vmerge.nxv8f64.nxv8f64.i64(<vscale x 8 x double> [[OP1:%.*]], <vscale x 8 x double> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP0]]
 //
 vfloat64m8_t test_vmerge_vvm_f64m8(vbool8_t mask, vfloat64m8_t op1,
Index: clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
===================================================================
--- clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
+++ clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
@@ -10,7 +10,7 @@
 static inline __attribute__((__always_inline__, __nodebug__))
 
 __rvv_generic
-__attribute__((clang_builtin_alias(__builtin_rvv_vadd_vv_i8m1)))
+__attribute__((clang_builtin_alias(__builtin_rvv_vadd_vv)))
 vint8m1_t vadd_generic (vint8m1_t op0, vint8m1_t op1, size_t op2);
 
 // CHECK-LABEL: @test(
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3667,132 +3667,6 @@
   case RISCVVector::BI__builtin_rvv_vsetvlimax:
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) ||
            CheckRISCVLMUL(TheCall, 1);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m2_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m2_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m2_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m2_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m2_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m2_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m2_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m2_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m2_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m2_u64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m4_i8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m4_i16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m4_i32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m4_i64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m4_f32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m4_f64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m4_u8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m4_u16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m4_u32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m4_u64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m4:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m4:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m4_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m4_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m4_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m4_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m4_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m4_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m4_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m4_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m4_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m4_u64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m2:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m2:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
-  case RISCVVector::BI__builtin_rvv_vget_v_i8m8_i8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i16m8_i16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i32m8_i32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_i64m8_i64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f32m8_f32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_f64m8_f64m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u8m8_u8m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u16m8_u16m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u32m8_u32m1:
-  case RISCVVector::BI__builtin_rvv_vget_v_u64m8_u64m1:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m2:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m2_i8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m2_i16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m2_i32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m2_i64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m2_f32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m2_f64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m2_u8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m2_u16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m2_u32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m2_u64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m4_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m4_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m4_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m4_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m4_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m4_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m4_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m4_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m4_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m4_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m4:
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m2_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m2_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m2_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m2_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m2_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m2_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m2_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m2_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m2_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m2_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3);
-  case RISCVVector::BI__builtin_rvv_vset_v_i8m1_i8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i16m1_i16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i32m1_i32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_i64m1_i64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f32m1_f32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_f64m1_f64m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u8m1_u8m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u16m1_u16m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u32m1_u32m8:
-  case RISCVVector::BI__builtin_rvv_vset_v_u64m1_u64m8:
-    return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7);
   }
 
   return false;
Index: clang/include/clang/Basic/riscv_vector.td
===================================================================
--- clang/include/clang/Basic/riscv_vector.td
+++ clang/include/clang/Basic/riscv_vector.td
@@ -1881,7 +1881,7 @@
       std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3);
       IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()};
     }] in {
-  defm vmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
+  defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "xfd",
                                     [["vvm", "v", "vmvv"]]>;
   defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd",
                                     [["vfm", "v", "vmve"]]>;
@@ -2141,7 +2141,12 @@
       ManualCodegen = [{
       {
         ID = Intrinsic::experimental_vector_extract;
-        ScalableVectorType *VecTy = cast<ScalableVectorType>(ResultType);
+        auto *VecTy = cast<ScalableVectorType>(ResultType);
+        auto *OpVecTy = cast<ScalableVectorType>(Ops[0]->getType());
+        // Mask to only valid indices.
+        unsigned MaxIndex = OpVecTy->getMinNumElements() / VecTy->getMinNumElements();
+        assert(isPowerOf2_32(MaxIndex));
+        Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
         Ops[1] = Builder.CreateMul(Ops[1],
                                    ConstantInt::get(Ops[1]->getType(),
                                                     VecTy->getMinNumElements()));
@@ -2160,7 +2165,12 @@
       {
         ID = Intrinsic::experimental_vector_insert;
         IntrinsicTypes = {ResultType, Ops[2]->getType()};
-        ScalableVectorType *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+        auto *ResVecTy = cast<ScalableVectorType>(ResultType);
+        auto *VecTy = cast<ScalableVectorType>(Ops[2]->getType());
+        // Mask to only valid indices.
+        unsigned MaxIndex = ResVecTy->getMinNumElements() / VecTy->getMinNumElements();
+        assert(isPowerOf2_32(MaxIndex));
+        Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1);
         Ops[1] = Builder.CreateMul(Ops[1],
                                    ConstantInt::get(Ops[1]->getType(),
                                                     VecTy->getMinNumElements()));

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D112102: [RISCV] Reduce the number of RISCV vector builtins by an order of magnitude.

Reply via email to