FreddyYe updated this revision to Diff 469529.
FreddyYe marked 3 inline comments as done.
FreddyYe added a comment.

Address comments. THX for review!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D135932/new/

https://reviews.llvm.org/D135932

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Basic/BuiltinsX86.def
  clang/include/clang/Driver/Options.td
  clang/lib/Basic/Targets/X86.cpp
  clang/lib/Basic/Targets/X86.h
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/avxifmaintrin.h
  clang/lib/Headers/cpuid.h
  clang/lib/Headers/immintrin.h
  clang/test/CodeGen/attr-target-x86.c
  clang/test/CodeGen/avxifma-builtins.c
  clang/test/Driver/x86-target-features.c
  clang/test/Preprocessor/x86_target_features.c
  llvm/docs/ReleaseNotes.rst
  llvm/include/llvm/IR/IntrinsicsX86.td
  llvm/include/llvm/Support/X86TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrAVX512.td
  llvm/lib/Target/X86/X86InstrFoldTables.cpp
  llvm/lib/Target/X86/X86InstrInfo.cpp
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSSE.td
  llvm/lib/Target/X86/X86IntrinsicsInfo.h
  llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
  llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
  llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
  llvm/test/MC/X86/avx-ifma-att-32.s
  llvm/test/MC/X86/avx-ifma-att-64.s
  llvm/test/MC/X86/avx-ifma-intel-32.s
  llvm/test/MC/X86/avx-ifma-intel-64.s

Index: llvm/test/MC/X86/avx-ifma-intel-64.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+     {vex} vpmadd52huq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+     {vex} vpmadd52huq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymm14
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+     {vex} vpmadd52luq ymm12, ymm13, ymm14
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmm14
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+     {vex} vpmadd52luq xmm12, xmm13, xmm14
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]
+
+// CHECK: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]
+
+// CHECK: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]
+
Index: llvm/test/MC/X86/avx-ifma-intel-32.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-intel-32.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxifma -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymm4
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0xd4]
+     {vex} vpmadd52huq ymm2, ymm3, ymm4
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmm4
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0xd4]
+     {vex} vpmadd52huq xmm2, xmm3, xmm4
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x10]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x10]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymm4
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0xd4]
+     {vex} vpmadd52luq ymm2, ymm3, ymm4
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmm4
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0xd4]
+     {vex} vpmadd52luq xmm2, xmm3, xmm4
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x10]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x10]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]
+
Index: llvm/test/MC/X86/avx-ifma-att-64.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-att-64.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple=x86_64-unknown-unknown -mattr=+avxifma --show-encoding < %s  | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xe6]
+     {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xe6]
+     {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  291(%r8,%rax,4), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq  291(%r8,%rax,4), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  (%rip), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52huq  (%rip), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  -1024(,%rbp,2), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52huq  -1024(,%rbp,2), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  4064(%rcx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52huq  4064(%rcx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  -4096(%rdx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52huq  -4096(%rdx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52huq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  291(%r8,%rax,4), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq  291(%r8,%rax,4), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  (%rip), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52huq  (%rip), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  -512(,%rbp,2), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52huq  -512(,%rbp,2), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  2032(%rcx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52huq  2032(%rcx), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52huq  -2048(%rdx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52huq  -2048(%rdx), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xe6]
+     {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xe6]
+     {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  291(%r8,%rax,4), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq  291(%r8,%rax,4), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  (%rip), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52luq  (%rip), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  -1024(,%rbp,2), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52luq  -1024(,%rbp,2), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  4064(%rcx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52luq  4064(%rcx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  -4096(%rdx), %ymm13, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52luq  -4096(%rdx), %ymm13, %ymm12
+
+// CHECK: {vex} vpmadd52luq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  291(%r8,%rax,4), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq  291(%r8,%rax,4), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  (%rip), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00]
+     {vex} vpmadd52luq  (%rip), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  -512(,%rbp,2), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52luq  -512(,%rbp,2), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  2032(%rcx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52luq  2032(%rcx), %xmm13, %xmm12
+
+// CHECK: {vex} vpmadd52luq  -2048(%rdx), %xmm13, %xmm12
+// CHECK: encoding: [0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52luq  -2048(%rdx), %xmm13, %xmm12
+
Index: llvm/test/MC/X86/avx-ifma-att-32.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/avx-ifma-att-32.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -mattr=+avxifma --show-encoding %s | FileCheck %s
+
+// CHECK: {vex} vpmadd52huq %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0xd4]
+     {vex} vpmadd52huq %ymm4, %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0xd4]
+     {vex} vpmadd52huq %xmm4, %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x10]
+     {vex} vpmadd52huq  (%eax), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52huq  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52huq  4064(%ecx), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52huq  -4096(%edx), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52huq  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52huq  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52huq  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x10]
+     {vex} vpmadd52huq  (%eax), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52huq  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52huq  2032(%ecx), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52huq  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52huq  -2048(%edx), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0xd4]
+     {vex} vpmadd52luq %ymm4, %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0xd4]
+     {vex} vpmadd52luq %xmm4, %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x10]
+     {vex} vpmadd52luq  (%eax), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff]
+     {vex} vpmadd52luq  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00]
+     {vex} vpmadd52luq  4064(%ecx), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff]
+     {vex} vpmadd52luq  -4096(%edx), %ymm3, %ymm2
+
+// CHECK: {vex} vpmadd52luq  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10]
+     {vex} vpmadd52luq  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00]
+     {vex} vpmadd52luq  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x10]
+     {vex} vpmadd52luq  (%eax), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff]
+     {vex} vpmadd52luq  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00]
+     {vex} vpmadd52luq  2032(%ecx), %xmm3, %xmm2
+
+// CHECK: {vex} vpmadd52luq  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff]
+     {vex} vpmadd52luq  -2048(%edx), %xmm3, %xmm2
+
Index: llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/avx-ifma-64.txt
@@ -0,0 +1,115 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   {vex} vpmadd52huq %ymm14, %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymm14
+0xc4,0x42,0x95,0xb5,0xe6
+
+# ATT:   {vex} vpmadd52huq %xmm14, %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmm14
+0xc4,0x42,0x91,0xb5,0xe6
+
+# ATT:   {vex} vpmadd52huq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0x22,0x95,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52huq  291(%r8,%rax,4), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0x42,0x95,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  (%rip), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rip]
+0xc4,0x62,0x95,0xb5,0x25,0x00,0x00,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -1024(,%rbp,2), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+0xc4,0x62,0x95,0xb5,0x24,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  4064(%rcx), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rcx + 4064]
+0xc4,0x62,0x95,0xb5,0xa1,0xe0,0x0f,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -4096(%rdx), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52huq ymm12, ymm13, ymmword ptr [rdx - 4096]
+0xc4,0x62,0x95,0xb5,0xa2,0x00,0xf0,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0x22,0x91,0xb5,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52huq  291(%r8,%rax,4), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0x42,0x91,0xb5,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  (%rip), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rip]
+0xc4,0x62,0x91,0xb5,0x25,0x00,0x00,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -512(,%rbp,2), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+0xc4,0x62,0x91,0xb5,0x24,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  2032(%rcx), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rcx + 2032]
+0xc4,0x62,0x91,0xb5,0xa1,0xf0,0x07,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -2048(%rdx), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52huq xmm12, xmm13, xmmword ptr [rdx - 2048]
+0xc4,0x62,0x91,0xb5,0xa2,0x00,0xf8,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq %ymm14, %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymm14
+0xc4,0x42,0x95,0xb4,0xe6
+
+# ATT:   {vex} vpmadd52luq %xmm14, %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmm14
+0xc4,0x42,0x91,0xb4,0xe6
+
+# ATT:   {vex} vpmadd52luq  268435456(%rbp,%r14,8), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0x22,0x95,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52luq  291(%r8,%rax,4), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [r8 + 4*rax + 291]
+0xc4,0x42,0x95,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  (%rip), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rip]
+0xc4,0x62,0x95,0xb4,0x25,0x00,0x00,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -1024(,%rbp,2), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [2*rbp - 1024]
+0xc4,0x62,0x95,0xb4,0x24,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  4064(%rcx), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rcx + 4064]
+0xc4,0x62,0x95,0xb4,0xa1,0xe0,0x0f,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -4096(%rdx), %ymm13, %ymm12
+# INTEL: {vex} vpmadd52luq ymm12, ymm13, ymmword ptr [rdx - 4096]
+0xc4,0x62,0x95,0xb4,0xa2,0x00,0xf0,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  268435456(%rbp,%r14,8), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rbp + 8*r14 + 268435456]
+0xc4,0x22,0x91,0xb4,0xa4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52luq  291(%r8,%rax,4), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [r8 + 4*rax + 291]
+0xc4,0x42,0x91,0xb4,0xa4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  (%rip), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rip]
+0xc4,0x62,0x91,0xb4,0x25,0x00,0x00,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -512(,%rbp,2), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [2*rbp - 512]
+0xc4,0x62,0x91,0xb4,0x24,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  2032(%rcx), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rcx + 2032]
+0xc4,0x62,0x91,0xb4,0xa1,0xf0,0x07,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -2048(%rdx), %xmm13, %xmm12
+# INTEL: {vex} vpmadd52luq xmm12, xmm13, xmmword ptr [rdx - 2048]
+0xc4,0x62,0x91,0xb4,0xa2,0x00,0xf8,0xff,0xff
+
Index: llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
===================================================================
--- /dev/null
+++ llvm/test/MC/Disassembler/X86/avx-ifma-32.txt
@@ -0,0 +1,115 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   {vex} vpmadd52huq %ymm4, %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymm4
+0xc4,0xe2,0xe5,0xb5,0xd4
+
+# ATT:   {vex} vpmadd52huq %xmm4, %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmm4
+0xc4,0xe2,0xe1,0xb5,0xd4
+
+# ATT:   {vex} vpmadd52huq  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0xe5,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52huq  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0xe5,0xb5,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  (%eax), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [eax]
+0xc4,0xe2,0xe5,0xb5,0x10
+
+# ATT:   {vex} vpmadd52huq  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0xe5,0xb5,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  4064(%ecx), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0xe5,0xb5,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -4096(%edx), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52huq ymm2, ymm3, ymmword ptr [edx - 4096]
+0xc4,0xe2,0xe5,0xb5,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0xe1,0xb5,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52huq  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0xe1,0xb5,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  (%eax), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [eax]
+0xc4,0xe2,0xe1,0xb5,0x10
+
+# ATT:   {vex} vpmadd52huq  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0xe1,0xb5,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:   {vex} vpmadd52huq  2032(%ecx), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0xe1,0xb5,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:   {vex} vpmadd52huq  -2048(%edx), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52huq xmm2, xmm3, xmmword ptr [edx - 2048]
+0xc4,0xe2,0xe1,0xb5,0x92,0x00,0xf8,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq %ymm4, %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymm4
+0xc4,0xe2,0xe5,0xb4,0xd4
+
+# ATT:   {vex} vpmadd52luq %xmm4, %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmm4
+0xc4,0xe2,0xe1,0xb4,0xd4
+
+# ATT:   {vex} vpmadd52luq  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0xe5,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52luq  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0xe5,0xb4,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  (%eax), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [eax]
+0xc4,0xe2,0xe5,0xb4,0x10
+
+# ATT:   {vex} vpmadd52luq  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0xc4,0xe2,0xe5,0xb4,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  4064(%ecx), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [ecx + 4064]
+0xc4,0xe2,0xe5,0xb4,0x91,0xe0,0x0f,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -4096(%edx), %ymm3, %ymm2
+# INTEL: {vex} vpmadd52luq ymm2, ymm3, ymmword ptr [edx - 4096]
+0xc4,0xe2,0xe5,0xb4,0x92,0x00,0xf0,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0xc4,0xe2,0xe1,0xb4,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:   {vex} vpmadd52luq  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0xc4,0xe2,0xe1,0xb4,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  (%eax), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [eax]
+0xc4,0xe2,0xe1,0xb4,0x10
+
+# ATT:   {vex} vpmadd52luq  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0xc4,0xe2,0xe1,0xb4,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:   {vex} vpmadd52luq  2032(%ecx), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [ecx + 2032]
+0xc4,0xe2,0xe1,0xb4,0x91,0xf0,0x07,0x00,0x00
+
+# ATT:   {vex} vpmadd52luq  -2048(%edx), %xmm3, %xmm2
+# INTEL: {vex} vpmadd52luq xmm2, xmm3, xmmword ptr [edx - 2048]
+0xc4,0xe2,0xe1,0xb4,0x92,0x00,0xf8,0xff,0xff
+
Index: llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/avx-ifma-intrinsics.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxifma --show-mc-encoding | FileCheck %s --check-prefixes=AVX-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma --show-mc-encoding | FileCheck %s --check-prefixes=AVX-X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avxifma,+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=AVX512-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avxifma,+avx512ifma,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=AVX512-X64
+
+declare <2 x i64> @llvm.x86.avx.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+
+define <2 x i64>@test_int_x86_avx_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX-X86-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
+; AVX-X86:       # %bb.0:
+; AVX-X86-NEXT:    {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb5,0xc2]
+; AVX-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX-X64-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
+; AVX-X64:       # %bb.0:
+; AVX-X64-NEXT:    {vex} vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb5,0xc2]
+; AVX-X64-NEXT:    retq # encoding: [0xc3]
+;
+; AVX512-X86-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
+; AVX512-X86:       # %bb.0:
+; AVX512-X86-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xc2]
+; AVX512-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX512-X64-LABEL: test_int_x86_avx_vpmadd52h_uq_128:
+; AVX512-X64:       # %bb.0:
+; AVX512-X64-NEXT:    vpmadd52huq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb5,0xc2]
+; AVX512-X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
+  ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+
+define <4 x i64>@test_int_x86_avx_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX-X86-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
+; AVX-X86:       # %bb.0:
+; AVX-X86-NEXT:    {vex} vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb5,0xc2]
+; AVX-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX-X64-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
+; AVX-X64:       # %bb.0:
+; AVX-X64-NEXT:    {vex} vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb5,0xc2]
+; AVX-X64-NEXT:    retq # encoding: [0xc3]
+;
+; AVX512-X86-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
+; AVX512-X86:       # %bb.0:
+; AVX512-X86-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xc2]
+; AVX512-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX512-X64-LABEL: test_int_x86_avx_vpmadd52h_uq_256:
+; AVX512-X64:       # %bb.0:
+; AVX512-X64-NEXT:    vpmadd52huq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb5,0xc2]
+; AVX512-X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
+  ret <4 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>)
+
+define <2 x i64>@test_int_x86_avx_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
+; AVX-X86-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
+; AVX-X86:       # %bb.0:
+; AVX-X86-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb4,0xc2]
+; AVX-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX-X64-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
+; AVX-X64:       # %bb.0:
+; AVX-X64-NEXT:    {vex} vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xb4,0xc2]
+; AVX-X64-NEXT:    retq # encoding: [0xc3]
+;
+; AVX512-X86-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
+; AVX512-X86:       # %bb.0:
+; AVX512-X86-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xc2]
+; AVX512-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX512-X64-LABEL: test_int_x86_avx_vpmadd52l_uq_128:
+; AVX512-X64:       # %bb.0:
+; AVX512-X64-NEXT:    vpmadd52luq %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0xb4,0xc2]
+; AVX512-X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2)
+  ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>)
+
+define <4 x i64>@test_int_x86_avx_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
+; AVX-X86-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
+; AVX-X86:       # %bb.0:
+; AVX-X86-NEXT:    {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb4,0xc2]
+; AVX-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX-X64-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
+; AVX-X64:       # %bb.0:
+; AVX-X64-NEXT:    {vex} vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xb4,0xc2]
+; AVX-X64-NEXT:    retq # encoding: [0xc3]
+;
+; AVX512-X86-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
+; AVX512-X86:       # %bb.0:
+; AVX512-X86-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xc2]
+; AVX512-X86-NEXT:    retl # encoding: [0xc3]
+;
+; AVX512-X64-LABEL: test_int_x86_avx_vpmadd52l_uq_256:
+; AVX512-X64:       # %bb.0:
+; AVX512-X64-NEXT:    vpmadd52luq %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0xb4,0xc2]
+; AVX512-X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2)
+  ret <4 x i64> %res
+}
Index: llvm/lib/Target/X86/X86IntrinsicsInfo.h
===================================================================
--- llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -368,6 +368,10 @@
   X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(avx_vpermilvar_ps,     INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx_vpmadd52h_uq_128, IFMA_OP, X86ISD::VPMADD52H, 0),
+  X86_INTRINSIC_DATA(avx_vpmadd52h_uq_256, IFMA_OP, X86ISD::VPMADD52H, 0),
+  X86_INTRINSIC_DATA(avx_vpmadd52l_uq_128, IFMA_OP, X86ISD::VPMADD52L, 0),
+  X86_INTRINSIC_DATA(avx_vpmadd52l_uq_256, IFMA_OP, X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
Index: llvm/lib/Target/X86/X86InstrSSE.td
===================================================================
--- llvm/lib/Target/X86/X86InstrSSE.td
+++ llvm/lib/Target/X86/X86InstrSSE.td
@@ -8123,3 +8123,40 @@
                                              X86GF2P8affineqb>, TAPD;
 }
 
+let Predicates = [HasAVXIFMA, NoVLX_Or_NoIFMA], Constraints = "$src1 = $dst",
+    checkVEXPredicate = 1 in
+multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  // NOTE: The SDNode have the multiply operands first with the add last.
+  // This enables commuted load patterns to be autogenerated by tablegen.
+  let isCommutable = 1 in {
+    def rr  : AVX8I<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+               [(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
+                                         VR128:$src3, VR128:$src1)))]>,
+               VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+  }
+    def rm  : AVX8I<opc, MRMSrcMem, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+               [(set VR128:$dst, (v2i64 (OpNode VR128:$src2,
+                                        (loadv2i64 addr:$src3), VR128:$src1)))]>,
+               VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+  let isCommutable = 1 in {
+    def Yrr  : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
+               (ins VR256:$src1, VR256:$src2, VR256:$src3),
+               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+               [(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
+                                         VR256:$src3, VR256:$src1)))]>,
+               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+  }
+    def Yrm  : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
+               (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+               !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+               [(set VR256:$dst, (v4i64 (OpNode VR256:$src2,
+                                        (loadv4i64 addr:$src3), VR256:$src1)))]>,
+               VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+}
+
+defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix;
+defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix;
Index: llvm/lib/Target/X86/X86InstrInfo.td
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.td
+++ llvm/lib/Target/X86/X86InstrInfo.td
@@ -949,6 +949,8 @@
 def HasVBMI      : Predicate<"Subtarget->hasVBMI()">;
 def HasVBMI2     : Predicate<"Subtarget->hasVBMI2()">;
 def HasIFMA      : Predicate<"Subtarget->hasIFMA()">;
+def HasAVXIFMA   : Predicate<"Subtarget->hasAVXIFMA()">;
+def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">;
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;
 def HasADX       : Predicate<"Subtarget->hasADX()">;
 def HasSHA       : Predicate<"Subtarget->hasSHA()">;
Index: llvm/lib/Target/X86/X86InstrInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86InstrInfo.cpp
+++ llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2568,6 +2568,8 @@
   case X86::VPDPWSSDSZr:
   case X86::VPDPWSSDSZrk:
   case X86::VPDPWSSDSZrkz:
+  case X86::VPMADD52HUQrr:
+  case X86::VPMADD52HUQYrr:
   case X86::VPMADD52HUQZ128r:
   case X86::VPMADD52HUQZ128rk:
   case X86::VPMADD52HUQZ128rkz:
@@ -2577,6 +2579,8 @@
   case X86::VPMADD52HUQZr:
   case X86::VPMADD52HUQZrk:
   case X86::VPMADD52HUQZrkz:
+  case X86::VPMADD52LUQrr:
+  case X86::VPMADD52LUQYrr:
   case X86::VPMADD52LUQZ128r:
   case X86::VPMADD52LUQZ128rk:
   case X86::VPMADD52LUQZ128rkz:
Index: llvm/lib/Target/X86/X86InstrFoldTables.cpp
===================================================================
--- llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -4103,12 +4103,16 @@
   { X86::VPLZCNTQZ128rrk,            X86::VPLZCNTQZ128rmk,            0 },
   { X86::VPLZCNTQZ256rrk,            X86::VPLZCNTQZ256rmk,            0 },
   { X86::VPLZCNTQZrrk,               X86::VPLZCNTQZrmk,               0 },
+  { X86::VPMADD52HUQYrr,             X86::VPMADD52HUQYrm,             0 },
   { X86::VPMADD52HUQZ128r,           X86::VPMADD52HUQZ128m,           0 },
   { X86::VPMADD52HUQZ256r,           X86::VPMADD52HUQZ256m,           0 },
   { X86::VPMADD52HUQZr,              X86::VPMADD52HUQZm,              0 },
+  { X86::VPMADD52HUQrr,              X86::VPMADD52HUQrm,              0 },
+  { X86::VPMADD52LUQYrr,             X86::VPMADD52LUQYrm,             0 },
   { X86::VPMADD52LUQZ128r,           X86::VPMADD52LUQZ128m,           0 },
   { X86::VPMADD52LUQZ256r,           X86::VPMADD52LUQZ256m,           0 },
   { X86::VPMADD52LUQZr,              X86::VPMADD52LUQZm,              0 },
+  { X86::VPMADD52LUQrr,              X86::VPMADD52LUQrm,              0 },
   { X86::VPMADDUBSWZ128rrkz,         X86::VPMADDUBSWZ128rmkz,         0 },
   { X86::VPMADDUBSWZ256rrkz,         X86::VPMADDUBSWZ256rmkz,         0 },
   { X86::VPMADDUBSWZrrkz,            X86::VPMADDUBSWZrmkz,            0 },
Index: llvm/lib/Target/X86/X86InstrAVX512.td
===================================================================
--- llvm/lib/Target/X86/X86InstrAVX512.td
+++ llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7464,10 +7464,10 @@
 
 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
                                          SchedWriteVecIMul, avx512vl_i64_info>,
-                                         VEX_W;
+                                         VEX_W, NotEVEX2VEXConvertible;
 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
                                          SchedWriteVecIMul, avx512vl_i64_info>,
-                                         VEX_W;
+                                         VEX_W, NotEVEX2VEXConvertible;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  Scalar convert from sign integer to float/double
Index: llvm/lib/Target/X86/X86.td
===================================================================
--- llvm/lib/Target/X86/X86.td
+++ llvm/lib/Target/X86/X86.td
@@ -155,6 +155,9 @@
 def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
                       "Enable AVX-512 further Vector Byte Manipulation Instructions",
                                       [FeatureBWI]>;
+def FeatureAVXIFMA    : SubtargetFeature<"avxifma", "HasAVXIFMA", "true",
+                           "Enable AVX-IFMA",
+                           [FeatureAVX2]>;
 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
                       "Enable AVX-512 Integer Fused Multiple-Add",
                                       [FeatureAVX512]>;
Index: llvm/lib/Support/X86TargetParser.cpp
===================================================================
--- llvm/lib/Support/X86TargetParser.cpp
+++ llvm/lib/Support/X86TargetParser.cpp
@@ -582,6 +582,7 @@
 constexpr FeatureBitset ImpliedFeaturesHRESET = {};
 
 constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
+constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
 constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
     FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
 // Key Locker Features
Index: llvm/lib/Support/Host.cpp
===================================================================
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -1808,6 +1808,7 @@
   Features["avxvnni"]    = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
+  Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
   Features["prefetchi"]  = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&
Index: llvm/include/llvm/Support/X86TargetParser.def
===================================================================
--- llvm/include/llvm/Support/X86TargetParser.def
+++ llvm/include/llvm/Support/X86TargetParser.def
@@ -202,6 +202,7 @@
 X86_FEATURE       (XSAVES,          "xsaves")
 X86_FEATURE       (HRESET,          "hreset")
 X86_FEATURE       (AVX512FP16,      "avx512fp16")
+X86_FEATURE       (AVXIFMA,         "avxifma")
 X86_FEATURE       (AVXVNNI,         "avxvnni")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
Index: llvm/include/llvm/IR/IntrinsicsX86.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsX86.td
+++ llvm/include/llvm/IR/IntrinsicsX86.td
@@ -1874,6 +1874,22 @@
               ClangBuiltin<"__builtin_ia32_vpmadd52luq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                          llvm_v8i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpmadd52h_uq_128 :
+              ClangBuiltin<"__builtin_ia32_vpmadd52huqvex128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpmadd52l_uq_128 :
+              ClangBuiltin<"__builtin_ia32_vpmadd52luqvex128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpmadd52h_uq_256 :
+              ClangBuiltin<"__builtin_ia32_vpmadd52huqvex256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpmadd52l_uq_256 :
+              ClangBuiltin<"__builtin_ia32_vpmadd52luqvex256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                         llvm_v4i64_ty], [IntrNoMem]>;
 }
 
 // VNNI
Index: llvm/docs/ReleaseNotes.rst
===================================================================
--- llvm/docs/ReleaseNotes.rst
+++ llvm/docs/ReleaseNotes.rst
@@ -135,6 +135,7 @@
 
 Changes to the X86 Backend
 --------------------------
+* Support ISA of ``AVX-IFMA``.
 
 * Add support for the ``RDMSRLIST and WRMSRLIST`` instructions.
 * Add support for the ``WRMSRNS`` instruction.
Index: clang/test/Preprocessor/x86_target_features.c
===================================================================
--- clang/test/Preprocessor/x86_target_features.c
+++ clang/test/Preprocessor/x86_target_features.c
@@ -581,6 +581,16 @@
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512DQ__ 1
 // AVX512FP16NOAVX512DQ-NOT: #define __AVX512FP16__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavxifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXIFMA %s
+
+// AVXIFMA: #define __AVX2__ 1
+// AVXIFMA: #define __AVXIFMA__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mavxifma -mno-avx2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXIFMANOAVX2 %s
+
+// AVXIFMANOAVX2-NOT: #define __AVX2__ 1
+// AVXIFMANOAVX2-NOT: #define __AVXIFMA__ 1
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
 
 // CRC32: #define __CRC32__ 1
Index: clang/test/Driver/x86-target-features.c
===================================================================
--- clang/test/Driver/x86-target-features.c
+++ clang/test/Driver/x86-target-features.c
@@ -310,6 +310,11 @@
 // AVX512FP16: "-target-feature" "+avx512fp16"
 // NO-AVX512FP16: "-target-feature" "-avx512fp16"
 
+// RUN: %clang -target i386-linux-gnu -mavxifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=AVXIFMA %s
+// RUN: %clang -target i386-linux-gnu -mno-avxifma %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AVXIFMA %s
+// AVXIFMA: "-target-feature" "+avxifma"
+// NO-AVXIFMA: "-target-feature" "-avxifma"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"
Index: clang/test/CodeGen/avxifma-builtins.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/avxifma-builtins.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxifma -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128i test_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+// CHECK-LABEL: @test_mm_madd52hi_avx_epu64
+// CHECK:    call <2 x i64> @llvm.x86.avx.vpmadd52h.uq.128
+  return _mm_madd52hi_avx_epu64(__X, __Y, __Z);
+}
+
+__m256i test_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+// CHECK-LABEL: @test_mm256_madd52hi_avx_epu64
+// CHECK:    call <4 x i64> @llvm.x86.avx.vpmadd52h.uq.256
+  return _mm256_madd52hi_avx_epu64(__X, __Y, __Z);
+}
+
+__m128i test_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+// CHECK-LABEL: @test_mm_madd52lo_avx_epu64
+// CHECK:    call <2 x i64> @llvm.x86.avx.vpmadd52l.uq.128
+  return _mm_madd52lo_avx_epu64(__X, __Y, __Z);
+}
+
+__m256i test_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+// CHECK-LABEL: @test_mm256_madd52lo_avx_epu64
+// CHECK:    call <4 x i64> @llvm.x86.avx.vpmadd52l.uq.256
+  return _mm256_madd52lo_avx_epu64(__X, __Y, __Z);
+}
Index: clang/test/CodeGen/attr-target-x86.c
===================================================================
--- clang/test/CodeGen/attr-target-x86.c
+++ clang/test/CodeGen/attr-target-x86.c
@@ -54,9 +54,9 @@
 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="i686"
 // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
 // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxvnni,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
 // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
 // CHECK-NOT: tune-cpu
 // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87,-3dnow,-3dnowa,-mmx"
Index: clang/lib/Headers/immintrin.h
===================================================================
--- clang/lib/Headers/immintrin.h
+++ clang/lib/Headers/immintrin.h
@@ -189,6 +189,11 @@
 #include <avx512ifmavlintrin.h>
 #endif
 
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__AVXIFMA__)
+#include <avxifmaintrin.h>
+#endif
+
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__AVX512VBMI__)
 #include <avx512vbmiintrin.h>
Index: clang/lib/Headers/cpuid.h
===================================================================
--- clang/lib/Headers/cpuid.h
+++ clang/lib/Headers/cpuid.h
@@ -203,6 +203,7 @@
 #define bit_AVXVNNI       0x00000010
 #define bit_AVX512BF16    0x00000020
 #define bit_HRESET        0x00400000
+#define bit_AVXIFMA       0x00800000
 
 /* Features in %edx for leaf 7 sub-leaf 1 */
 #define bit_PREFETCHI     0x00004000
Index: clang/lib/Headers/avxifmaintrin.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/avxifmaintrin.h
@@ -0,0 +1,177 @@
+/*===----------------- avxifmaintrin.h - IFMA intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <avxifmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVXIFMAINTRIN_H
+#define __AVXIFMAINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avxifma"),        \
+                 __min_vector_width__(256)))
+
+// must vex-encoding
+
+/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
+/// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
+/// unsigned integer from the intermediate result with the corresponding
+/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m128i
+/// _mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
+///
+/// \return
+/// 	return __m128i dst.
+/// \param __X
+/// 	A 128-bit vector of [2 x i64]
+/// \param __Y
+/// 	A 128-bit vector of [2 x i64]
+/// \param __Z
+/// 	A 128-bit vector of [2 x i64]
+///
+/// \code{.operation}
+/// FOR j := 0 to 1
+/// 	i := j*64
+/// 	tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
+/// 	dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_madd52hi_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_vpmadd52huqvex128((__v2di)__X, (__v2di)__Y,
+                                                   (__v2di)__Z);
+}
+
+/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
+/// and \a __Z to form a 104-bit intermediate result. Add the high 52-bit
+/// unsigned integer from the intermediate result with the corresponding
+/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m256i
+/// _mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPMADD52HUQ instruction.
+///
+/// \return
+/// 	return __m256i dst.
+/// \param __X
+/// 	A 256-bit vector of [4 x i64]
+/// \param __Y
+/// 	A 256-bit vector of [4 x i64]
+/// \param __Z
+/// 	A 256-bit vector of [4 x i64]
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	i := j*64
+/// 	tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
+/// 	dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[103:52])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_madd52hi_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_vpmadd52huqvex256((__v4di)__X, (__v4di)__Y,
+                                                   (__v4di)__Z);
+}
+
+/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
+/// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
+/// unsigned integer from the intermediate result with the corresponding
+/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m128i
+/// _mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
+///
+/// \return
+/// 	return __m128i dst.
+/// \param __X
+/// 	A 128-bit vector of [2 x i64]
+/// \param __Y
+/// 	A 128-bit vector of [2 x i64]
+/// \param __Z
+/// 	A 128-bit vector of [2 x i64]
+///
+/// \code{.operation}
+/// FOR j := 0 to 1
+/// 	i := j*64
+/// 	tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
+/// 	dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
+/// ENDFOR
+/// dst[MAX:128] := 0
+/// \endcode
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_madd52lo_avx_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
+  return (__m128i)__builtin_ia32_vpmadd52luqvex128((__v2di)__X, (__v2di)__Y,
+                                                   (__v2di)__Z);
+}
+
+/// Multiply packed unsigned 52-bit integers in each 64-bit element of \a __Y
+/// and \a __Z to form a 104-bit intermediate result. Add the low 52-bit
+/// unsigned integer from the intermediate result with the corresponding
+/// unsigned 64-bit integer in \a __X, and store the results in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m256i
+/// _mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VPMADD52LUQ instruction.
+///
+/// \return
+/// 	return __m256i dst.
+/// \param __X
+/// 	A 256-bit vector of [4 x i64]
+/// \param __Y
+/// 	A 256-bit vector of [4 x i64]
+/// \param __Z
+/// 	A 256-bit vector of [4 x i64]
+///
+/// \code{.operation}
+/// FOR j := 0 to 3
+/// 	i := j*64
+/// 	tmp[127:0] := ZeroExtend64(__Y[i+51:i]) * ZeroExtend64(__Z[i+51:i])
+/// 	dst[i+63:i] := __X[i+63:i] + ZeroExtend64(tmp[51:0])
+/// ENDFOR
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_madd52lo_avx_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
+  return (__m256i)__builtin_ia32_vpmadd52luqvex256((__v4di)__X, (__v4di)__Y,
+                                                   (__v4di)__Z);
+}
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif // __AVXIFMAINTRIN_H
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -141,6 +141,7 @@
   avx512vp2intersectintrin.h
   avx512vpopcntdqintrin.h
   avx512vpopcntdqvlintrin.h
+  avxifmaintrin.h
   avxintrin.h
   avxvnniintrin.h
   bmi2intrin.h
Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -104,6 +104,7 @@
   bool HasAVX512VL = false;
   bool HasAVX512VBMI = false;
   bool HasAVX512VBMI2 = false;
+  bool HasAVXIFMA = false;
   bool HasAVX512IFMA = false;
   bool HasAVX512VP2INTERSECT = false;
   bool HasSHA = false;
Index: clang/lib/Basic/Targets/X86.cpp
===================================================================
--- clang/lib/Basic/Targets/X86.cpp
+++ clang/lib/Basic/Targets/X86.cpp
@@ -332,6 +332,8 @@
       HasAMXINT8 = true;
     } else if (Feature == "+amx-tile") {
       HasAMXTILE = true;
+    } else if (Feature == "+avxifma") {
+      HasAVXIFMA = true;
     } else if (Feature == "+avxvnni") {
       HasAVXVNNI = true;
     } else if (Feature == "+serialize") {
@@ -778,6 +780,8 @@
     Builder.defineMacro("__AMXINT8__");
   if (HasAMXBF16)
     Builder.defineMacro("__AMXBF16__");
+  if (HasAVXIFMA)
+    Builder.defineMacro("__AVXIFMA__");
   if (HasAVXVNNI)
     Builder.defineMacro("__AVXVNNI__");
   if (HasSERIALIZE)
@@ -901,6 +905,7 @@
       .Case("avx512vbmi2", true)
       .Case("avx512ifma", true)
       .Case("avx512vp2intersect", true)
+      .Case("avxifma", true)
       .Case("avxvnni", true)
       .Case("bmi", true)
       .Case("bmi2", true)
@@ -978,7 +983,6 @@
       .Case("amx-bf16", HasAMXBF16)
       .Case("amx-int8", HasAMXINT8)
       .Case("amx-tile", HasAMXTILE)
-      .Case("avxvnni", HasAVXVNNI)
       .Case("avx", SSELevel >= AVX)
       .Case("avx2", SSELevel >= AVX2)
       .Case("avx512f", SSELevel >= AVX512F)
@@ -997,6 +1001,8 @@
       .Case("avx512vbmi2", HasAVX512VBMI2)
       .Case("avx512ifma", HasAVX512IFMA)
       .Case("avx512vp2intersect", HasAVX512VP2INTERSECT)
+      .Case("avxifma", HasAVXIFMA)
+      .Case("avxvnni", HasAVXVNNI)
       .Case("bmi", HasBMI)
       .Case("bmi2", HasBMI2)
       .Case("cldemote", HasCLDEMOTE)
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -4581,6 +4581,8 @@
 def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
 def mavx512vp2intersect : Flag<["-"], "mavx512vp2intersect">, Group<m_x86_Features_Group>;
 def mno_avx512vp2intersect : Flag<["-"], "mno-avx512vp2intersect">, Group<m_x86_Features_Group>;
+def mavxifma : Flag<["-"], "mavxifma">, Group<m_x86_Features_Group>;
+def mno_avxifma : Flag<["-"], "mno-avxifma">, Group<m_x86_Features_Group>;
 def mavxvnni : Flag<["-"], "mavxvnni">, Group<m_x86_Features_Group>;
 def mno_avxvnni : Flag<["-"], "mno-avxvnni">, Group<m_x86_Features_Group>;
 def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -1326,6 +1326,10 @@
 TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4OiV4OiC*V4OiUc", "nV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2Oi*V2OiUc", "nV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4Oi*V4OiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huqvex128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avxifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huqvex256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avxifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luqvex128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avxifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luqvex256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avxifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8OiV8OiV8OiV8Oi", "ncV:512:", "avx512ifma")
 TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "avx512ifma,avx512vl")
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -592,6 +592,9 @@
 - Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
 - Switch ``AVX512-BF16`` intrinsics types from ``short`` to ``__bf16``.
 - Add support for ``PREFETCHI`` instructions.
+- Support ISA of ``AVX-IFMA``.
+  * Support intrinsic of ``_mm(256)_madd52hi_avx_epu64``.
+  * Support intrinsic of ``_mm(256)_madd52lo_avx_epu64``.
 
 DWARF Support in Clang
 ----------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to