from:"Luke Geeson via Phabricator via cfe\-commits"

[PATCH] D85009: [Sema][BFloat] Forbid arithmetic on vectors of bfloat.

2020-07-31 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D85009#2187621 , @jfb wrote:

> In D85009#2187603 , @simon_tatham 
> wrote:
>
>> In D85009#2187549 , @jfb wrote:
>>
>>> Is that true of all vector bfloat implementations? It seems like arithmetic 
>>> on these types is something implementations would likely support.
>>
>> As I understand it, Arm currently has the only implementation in clang so 
>> far. But if other targets disagree, we can make this conditional on 
>> `getVectorKind()`, so that `VectorType::NeonVector` gets this restriction 
>> and other vector types get whatever they need.
>
> You mean: only aarch64 backend supports lowering bfloat16 vectors at the 
> moment? Because the clang support isn't "ARM bfloat", it's just bfloat. The 
> tests are ARM bfloat and I think that's fine (i.e. Sema should be able to 
> check ISA-specific problems), but in general this property your checking for 
> seems like a target property.
>
> If I write C or C++ code using bfloat, I'd like to know what that type 
> actually means and what I can do with it. As a developer, it'll be super 
> frustrating once other targets support bfloat... should those target have 
> their own bfloat (because it won't be compatible with ARM's), or should 
> bfloat work differently on different targets?
>
> I actually don't know what the intended approach is here, which is why I'm 
> asking :)

Yes there is an Intel bfloat type too, however we are the only target for the 
bfloat c/ir type so far. The jury is also out as far as the standards are 
concerned too, the best we can do now is prevent behavior we know is not 
compatible, and like Simon says, add some predication later


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85009/new/

https://reviews.llvm.org/D85009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D85009: [Sema][BFloat] Forbid arithmetic on vectors of bfloat.

2020-07-31 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson accepted this revision.
LukeGeeson added a comment.
This revision is now accepted and ready to land.

This seems sensible and benign, LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D85009/new/

https://reviews.llvm.org/D85009

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG954db63cd149: [ARM] Add Cortex-A78 and Cortex-X1 Support for 
Clang and LLVM (authored by LukeGeeson).

Changed prior to commit:
  https://reviews.llvm.org/D83206?vs=277074=277093#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | ARM::AEK_FP16 |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -864,6 +876,20 @@
   AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 277074.
LukeGeeson added a comment.

removed FP16FML feature


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | ARM::AEK_FP16 |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -3,6 +3,8

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 277036.
LukeGeeson added a comment.

- Added FP16 to ARM a78


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | ARM::AEK_FP16 |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -3,6 +3,8

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 277033.
LukeGeeson marked an inline comment as done.
LukeGeeson added a comment.

- removed missed RAS


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | 
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 277032.
LukeGeeson added a comment.

- removed RAS as it's in 8.2a


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | 
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -3,6 +3,8 @@
 //

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 277019.
LukeGeeson added a comment.

- Added FP16 to Cortex-X1 set of features


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | 
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -3,6

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 276977.
LukeGeeson marked 3 inline comments as done.
LukeGeeson added a comment.

- Addresses dmgreens comments
  - reordered CPUs in the right places
  - added code/tests in all files that exist in the a77 patch

made minor adjustments including:

- adding FeatureRCPC to the missing CPUs

Please let me know if there is anything else


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_DOTPROD | 
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -5,6 +5,8 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a78 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-x1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s |

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-08 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 276383.
LukeGeeson marked 2 inline comments as done.
LukeGeeson added a comment.

Addressed Mikhail's feedback: Sorted CPU lists accordingly


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83206/new/

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | 
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -62,6 +62,7 @@
 CortexA75,
 CortexA76,
 CortexA77,
+CortexA78,
 CortexA8,
 CortexA9,
 CortexM3,
@@ -70,6 +71,7 @@
 CortexR5,
 CortexR52,
 CortexR7,
+CortexX1,
 Exynos,
 Krait,
 Kryo,
Index: llvm/lib/Target/ARM/ARMSubtarget.cpp
===
--- llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -319,6 +319,9 @@
 PreISelOperandLatencyAdjustment = 1;
 PartialUpdateClearance = 12;
 break;
+  case CortexX1:
+  case CortexA78:
+break;
   }
 }
 
Index: llvm/lib/Target/ARM/ARM.td
===
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -596,6 +596,10 @@
"Cortex-A76 ARM processors", []>;
 def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", []>;
+def ProcX1  : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+   "Cortex-X1 ARM processors", []>;
+def ProcA78 : SubtargetFeature<"cortex-a78",

[PATCH] D83206: [PATCH] [ARM] Add Cortex-A78 and Cortex-X1 Support for Clang and LLVM

2020-07-06 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: t.p.northover.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.

This patch adds support for the Arm-v8 Cortex-A78 and Cortex-X1
processors for AArch64 and ARM.

In detail:

- Adding cortex-a78 and cortex-x1 as cpu options for aarch64 and arm targets in 
clang
- Adding Cortex-A78 and Cortex-X1 CPU names and ProcessorModels in llvm

details of the CPU can be found here:
https://www.arm.com/products/cortex-x

https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78

The following people contributed to this patch:

- Luke Geeson
- Mikhail Maltsev


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83206

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -262,6 +262,18 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_RAS | ARM::AEK_DOTPROD |
+ ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT |
+ ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | 
+ ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -310,7 +322,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 87;
+static constexpr unsigned NumARMCPUArchs = 89;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -865,6 +877,20 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a78", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO  | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
+  "cortex-x1", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS,
+  "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -1002,7 +1028,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 40;
+static constexpr unsigned NumAArch64CPUArchs = 42;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,9 @@
 Krait,
 Kryo,
 NeoverseN1,
-Swift
+Swift,
+CortexX1,
+CortexA78
   };
   enum ARMProcClassEnum {
 None,
Index: llvm/lib/Target/ARM/ARMSubtarget.cpp
===
--- llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -319,6 +319,9 @@
 PreISelOperandLatencyAdjustment = 1;
 PartialUpdateClearance = 12;
 break;
+  case CortexX1:
+  case CortexA78:
+break;
   }
 }
 
Index: llvm/lib/Target/ARM/ARM.td
===
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -596,6 +596,10 @@

[PATCH] D82887: [ARM] Add Cortex-A77 Support for Clang and LLVM

2020-07-03 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG8bf99f1e6f0f: [ARM] Add Cortex-A77 Support for Clang and 
LLVM (authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82887/new/

https://reviews.llvm.org/D82887

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -256,6 +256,12 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
+ ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
+ ARM::AEK_RAS | ARM::AEK_DOTPROD,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -304,7 +310,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 86;
+static constexpr unsigned NumARMCPUArchs = 87;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -853,6 +859,12 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC| AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -990,7 +1002,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 39;
+static constexpr unsigned NumAArch64CPUArchs = 40;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -4,6 +4,7 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -2,6 +2,7 @@
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a55 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a75 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a76 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
+// RUN: llvm-mc -triple thumb -mcpu=cortex-a77 -show-encoding < %s | FileCheck %s --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=neoverse-n1 -show-encoding < %s | FileCheck %s --check-prefix=CHECK
 
 // RUN: not llvm-mc -triple thumb -mattr=-dotprod -show-encoding < %s 2> %t
Index: llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
===
---

[PATCH] D82887: [ARM] Add Cortex-A77 Support for Clang and LLVM

2020-07-01 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 274791.
LukeGeeson marked 5 inline comments as done.
LukeGeeson added a comment.

- Added A77 CPU Part Number to Host.cpp
- added case to AArch64Subtarget::initializeProperties
- moved  a77 test up in AArch64-cpu
- formatted line for AArch64 Target Parser
- removed whitespace in ARM Target Parser
- refactored ProcessorModel  + sorted next to A76
- Added ProcA77

This should address everything David, please let me know if there is anything 
else


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82887/new/

https://reviews.llvm.org/D82887

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64Subtarget.cpp
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMSubtarget.cpp
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -256,6 +256,12 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
+ ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
+ ARM::AEK_RAS | ARM::AEK_DOTPROD,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -304,7 +310,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 86;
+static constexpr unsigned NumARMCPUArchs = 87;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -853,6 +859,12 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC| AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -990,7 +1002,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 39;
+static constexpr unsigned NumAArch64CPUArchs = 40;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -4,6 +4,7 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -2,6 +2,7 @@
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a55 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a75 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a76 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
+// RUN: llvm-mc -triple thumb -mcpu=cortex-a77 -show-encoding < %s | FileCheck %s --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb

[PATCH] D82887: [ARM] Add Cortex-A77 Support for Clang and LLVM

2020-06-30 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D82887#2123364 , @dmgreen wrote:

> Please make sure the switch in AArch64Subtarget::initializeProperties has an 
> entry too.
>
> Do you happen to know the cpu id for host.cpp?

do you mean the CPU part number used in e.g Host.cpp? then chasing the GCC link 
above points to 0xd0d which is also what is also in the TRM 
https://developer.arm.com/documentation/10/0101

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82887/new/

https://reviews.llvm.org/D82887

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82887: [ARM] Add Cortex-A77 Support for Clang and LLVM

2020-06-30 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: t.p.northover.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.

This patch upstreams support for the Arm-v8 Cortex-A77
processor for AArch64 and ARM.

In detail:

- Adding cortex-a77 as a cpu option for aarch64 and arm targets in clang
- Cortex-A77 CPU name and ProcessorModel in llvm

details of the CPU can be found here:
https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a77

and a similar submission to GCC can be found here:
https://github.com/gcc-mirror/gcc/commit/e0664b7a63ed8305e9f8539309df7fb3eb13babe

The following people contributed to this patch:

- Luke Geeson
- Mikhail Maltsev


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82887

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/ARM/ARM.td
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/test/MC/AArch64/armv8.2a-dotprod.s
  llvm/test/MC/ARM/armv8.2a-dotprod-a32.s
  llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -256,6 +256,12 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
  ARM::AEK_RAS | ARM::AEK_DOTPROD,
  "8.2-A"));
+  EXPECT_TRUE(testARMCPU("cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+ ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
+ ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_FP16 |
+ ARM::AEK_RAS | ARM::AEK_DOTPROD,
+ "8.2-A"));
   EXPECT_TRUE(testARMCPU("neoverse-n1", "armv8.2-a", "crypto-neon-fp-armv8",
 ARM::AEK_CRC | ARM::AEK_SEC | ARM::AEK_MP |
 ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
@@ -304,7 +310,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 86;
+static constexpr unsigned NumARMCPUArchs = 87;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
@@ -853,6 +859,12 @@
   AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
   AArch64::AEK_RCPC| AArch64::AEK_SSBS, "8.2-A"));
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a77", "armv8.2-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_RDM | AArch64::AEK_SIMD | AArch64::AEK_RAS |
+  AArch64::AEK_LSE | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
+  AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cyclone", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, "8-A"));
   EXPECT_TRUE(testAArch64CPU(
@@ -990,7 +1002,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 39;
+static constexpr unsigned NumAArch64CPUArchs = 40;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
Index: llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
===
--- llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
+++ llvm/test/MC/Disassembler/AArch64/armv8.3a-rcpc.txt
@@ -4,6 +4,7 @@
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a65ae --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a75 --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=cortex-a77 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-e1 --disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple aarch64-none-linux-gnu -mcpu=neoverse-n1 --disassemble < %s | FileCheck %s
 
Index: llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
===
--- llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
+++ llvm/test/MC/ARM/armv8.2a-dotprod-t32.s
@@ -2,6 +2,7 @@
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a55 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a75 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
 // RUN: llvm-mc -triple thumb -mcpu=cortex-a76 -show-encoding < %s | FileCheck %s  --check-prefix=CHECK
+// RUN: llvm-mc -triple thumb -mcpu=cortex-a77 -show-encoding < %s | FileCheck %s --check-prefix=CHECK
 // RUN:

[PATCH] D81847: [ARM] Improve diagnostics message when Neon is unsupported

2020-06-16 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson accepted this revision.
LukeGeeson added a comment.
This revision is now accepted and ready to land.

Very simple change, looks good to me.

Please wait a week or so before submitting, so that non-arm folks have time to 
raise issues


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81847/new/

https://reviews.llvm.org/D81847



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-16 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG10b6567f4977: [AArch64]: BFloat MatMul 
IntrinsicsCodeGen (authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.4h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.2s, v1.4h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.4s, v1.8h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmmlaq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfmmla v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmmla1.i
+}
+
+define <4 x float>

[PATCH] D81740: [AArch32]: BFloat MatMul Intrinsics

2020-06-15 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 270720.
LukeGeeson added a comment.

removed redundancy in patch


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81740/new/

https://reviews.llvm.org/D81740

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,150 @@
+; RUN: llc -mtriple armv8.6a-arm-none-eabi  -mattr=+bf16 -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK
+
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+define arm_aapcs_vfpcc <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.arm.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1) #3
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_f32
+; CHECK: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.arm.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1) #3
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_lane_f32
+; CHECK: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[0]
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.arm.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2) #3
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_laneq_f32
+; CHECK: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[1]
+; CHECK: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.arm.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2) #3
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_laneq_f32
+; CHECK: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.arm.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2) #3
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_lane_f32
+; CHECK: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[0]
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.arm.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2) #3
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfmmlaq_f32
+; CHECK: vmmla.bf16  q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.arm.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1) #3
+  ret <4 x float> %vbfmmla1.i
+}
+
+; CHECK-LABEL: test_vbfmlalbq_f32
+; CHECK: vfmab.bf16  q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfmlalbq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmlalb1.i = tail call <4 x float>

[PATCH] D81740: [AArch32]: BFloat MatMul Intrinsics

2020-06-12 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: stuij, t.p.northover, SjoerdMeijer, sdesmalen, 
fpetrogalli.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.
LukeGeeson added a parent revision: D81486: [ARM][BFloat] Implement lowering of 
bf16 load/store intrinsics.

This patch upstreams support for BFloat Matrix Multiplication Intrinsics
and Code Generation from __bf16 to AArch32. This includes IR intrinsics. 
Unittests are
provided as needed.

This patch is part of a series implementing the Bfloat16 extension of
the
Armv8.6-a architecture, as detailed here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

The bfloat type, and its properties are specified in the Arm
Architecture
Reference Manual:

https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile

The following people contributed to this patch:

- Luke Geeson
- Momchil Velikov
- Mikhail Maltsev
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81740

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/arm-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple armv8.6a-arm-none-eabi  -mattr=+bf16 -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BF16
+; RUN: llc -mtriple armv8.6a-arm-none-eabi  -mattr=+bf16 -float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NO-BF16
+
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK-NO-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+define arm_aapcs_vfpcc <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.arm.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1) #3
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_f32
+; CHECK-BF16: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+; CHECK-NO-BF16: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.arm.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1) #3
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_lane_f32
+; CHECK-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[0]
+; CHECK-NO-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[0]
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.arm.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2) #3
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_laneq_f32
+; CHECK-BF16: vdup.32 q{{[0-9]+}}, d{{[0-9]+}}[1]
+; CHECK-BF16: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+; CHECK-NO-BF16: vdot.bf16   q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.arm.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2) #3
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_laneq_f32
+; CHECK-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+; CHECK-NO-BF16: vdot.bf16   d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float>

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-12 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 270387.
LukeGeeson added a comment.

- removed unnecessary contents of test


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.4h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.2s, v1.4h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.4s, v1.8h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmmlaq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfmmla v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmmla1.i
+}
+
+define <4 x float> @test_vbfmlalbq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmlalbq_f32:
+;

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked 2 inline comments as done.
LukeGeeson added inline comments.



Comment at: llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll:264
+; Function Attrs: argmemonly nounwind readonly
+declare { <8 x bfloat>, <8 x bfloat> } 
@llvm.aarch64.neon.ld2lane.v8bf16.p0i8(<8 x bfloat>, <8 x bfloat>, i64, i8*) #3
+

SjoerdMeijer wrote:
> LukeGeeson wrote:
> > arsenm wrote:
> > > Why is the IR type name bfloat and not bfloat16?
> > The naming for the IR type was agreed upon here after quite a big 
> > discussion. 
> > https://reviews.llvm.org/D78190
> I regret very much that I didn't notice this earlier... I.e., I noticed this 
> in D76077 and wrote that I am relatively unhappy about this (I think I 
> mentioned this on another ticket too).
> Because like @arsenm , I would expect the IR type name to be bfloat16.
> 
> Correct me if I am wrong, but I don't see a big discussion about this in 
> D78190. I only see 1 or 2 comments about `BFloat` vs `Bfloat`.
I cannot see a discussion about the IR type name per-se but I can see you were 
both involved in the discussion more generally.

I am concerned that this patch is the wrong place to discuss such issues, and 
that we should bring this up in a more appropriate place as you mention so that 
this patch isn't held back.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked 2 inline comments as done.
LukeGeeson added inline comments.



Comment at: llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll:264
+; Function Attrs: argmemonly nounwind readonly
+declare { <8 x bfloat>, <8 x bfloat> } 
@llvm.aarch64.neon.ld2lane.v8bf16.p0i8(<8 x bfloat>, <8 x bfloat>, i64, i8*) #3
+

arsenm wrote:
> Why is the IR type name bfloat and not bfloat16?
The naming for the IR type was agreed upon here after quite a big discussion. 
https://reviews.llvm.org/D78190


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 269833.
LukeGeeson added a comment.

addressed review comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.4h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.2s, v1.4h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK:bfdot v0.4s, v1.8h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmmlaq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfmmla v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmmla1.i
+}
+
+define <4 x float> @test_vbfmlalbq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmlalbq_f32:
+; CHECK:   //

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-09 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D80716#2082356 , @LukeGeeson wrote:

> In D80716#2074883 , @stuij wrote:
>
> > In D80716#2073251 , @LukeGeeson 
> > wrote:
> >
> > > Besides from rebasing to get @pratlucas changes upstream.
> > >
> > > @stuij  please could you confirm if you are happy with this, so I can 
> > > merge
> >
> >
> > Hi Luke,
> >
> > For the backend tests it would be good if you would use `CHECK-NEXT` from 
> > label to ret, like I believe you did in the other patch, using 
> > `-asm-verbose=0` to get rid of the cruft.
>
>
> Similar to my other comment in the [[ https://reviews.llvm.org/D80752 | other 
>  ]]patch:
>
> This isn't how to get rid of `kill` statements. In particular if you pass 
> `-asm-verbose=0` to `llc` in the `RUN` statement then no `CHECK`s are 
> generated, let alone `kill` statements.
>
> Instead to get this desired result you run `llc` without that argument, and 
> then manually remove these unnecessary `kill` lines. This is what I have done 
> and this should fix this. Patch incoming


Further, you cannot use `CHECK-NEXT` if your test function contains such a 
`kill` statement, unless you manually remove it, and use `CHECK` in place (it 
fails when running FileCheck as it sees such lines in the output and hence 
check-next fails if it doesn't expect it). This is just something we must 
balance if we want clear tests and direct 1-1 correspondence with the result. 
I've used `CHECK-NEXT` where I can, but `CHECK` where I must


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-09 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D80716#2074883 , @stuij wrote:

> In D80716#2073251 , @LukeGeeson 
> wrote:
>
> > Besides from rebasing to get @pratlucas changes upstream.
> >
> > @stuij  please could you confirm if you are happy with this, so I can merge
>
>
> Hi Luke,
>
> For the backend tests it would be good if you would use `CHECK-NEXT` from 
> label to ret, like I believe you did in the other patch, using 
> `-asm-verbose=0` to get rid of the cruft.

Similar to my other comment in the [[ https://reviews.llvm.org/D80752 | other  
]]patch:

This isn't how to get rid of `kill` statements. In particular if you pass 
`-asm-verbose=0` to `llc` in the `RUN` statement then no `CHECK`s are 
generated, let alone `kill` statements.

Instead to get this desired result you run `llc` without that argument, and 
then manually remove these unnecessary `kill` lines. This is what I have done 
and this should fix this. Patch incoming

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-09 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D80752#2074882 , @stuij wrote:

> For the backend tests, I suggest using `-asm-verbose=0` with llc to only 
> print instructions and get rid of `// kill: ..` and friends. Use 
> `update_cc_test_checks.py` again to regenerate the testing.

This isn't how to get rid of `kill` statements. In particular if you pass 
`-asm-verbose=0` to `llc` in the `RUN` statement then no `CHECK`s are 
generated, let alone `kill` statements.

Instead to get this desired result you run `llc` without that argument, and 
then manually remove these unnecessary `kill` lines. This is what I have done 
and this should fix this. Patch incoming

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-04 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 268520.
LukeGeeson added a comment.

- used `update_cc_test_checks.py` to generate correct checks


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.4h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmmlaq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfmmla v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmmla1.i

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-04 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

Besides from rebasing to get @pratlucas changes upstream.

@stuij  please could you confirm if you are happy with this, so I can merge


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-03 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 268165.
LukeGeeson added a comment.

ran `llvm/utils/update_llc_test_checks.py` on test to get proper `CHECK`s


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.4h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdot_laneq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfdot v0.2s, v1.4h, v2.2h[3]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+; CHECK-LABEL: test_vbfdotq_lane_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:// kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:bfdot v0.4s, v1.8h, v2.2h[0]
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CHECK-LABEL: test_vbfmmlaq_f32:
+; CHECK:   // %bb.0: // %entry
+; CHECK-NEXT:bfmmla v0.4s, v1.8h, v2.8h
+; CHECK-NEXT:ret
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float>

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-03 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.



Comment at: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll:4
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: bfdot   v0.2s, v1.4h, v2.4h
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x 
bfloat> %b) {

miyuki wrote:
> LukeGeeson wrote:
> > LukeGeeson wrote:
> > > miyuki wrote:
> > > > Would it make sense to check the whole body of the compiled function?
> > > Oops sorry, having all kinds of issues with my commit history here, give 
> > > me a moment to address this
> > I would say it's not worth testing the whole function here, the only code 
> > emitted for each is the instruction mentioned in the `CHECK` and a `ret` 
> > surrounded by lot's of compiler labels and directives that we don't need to 
> > test here
> I meant, just the code from the first BB label to ret (inclusive), without 
> directives. I suggest using `llvm/utils/update_llc_test_checks.py` to 
> generate the checks.
Hopefully this is everything now, please let me know if there is anything else 
:) 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.



Comment at: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll:4
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: bfdot   v0.2s, v1.4h, v2.4h
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x 
bfloat> %b) {

LukeGeeson wrote:
> miyuki wrote:
> > Would it make sense to check the whole body of the compiled function?
> Oops sorry, having all kinds of issues with my commit history here, give me a 
> moment to address this
I would say it's not worth testing the whole function here, the only code 
emitted for each is the instruction mentioned in the `CHECK` and a `ret` 
surrounded by lot's of compiler labels and directives that we don't need to 
test here


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.



Comment at: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll:4
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: bfdot   v0.2s, v1.4h, v2.4h
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x 
bfloat> %b) {

miyuki wrote:
> Would it make sense to check the whole body of the compiled function?
Oops sorry, having all kinds of issues with my commit history here, give me a 
moment to address this


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:10366
+auto Alignment = CGM.getNaturalPointeeTypeAlignment(
+E->getArg(0)->IgnoreParenCasts()->getType());
 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));

pratlucas wrote:
> LukeGeeson wrote:
> > simon_tatham wrote:
> > > What effect is this change of strategy having on the alignment 
> > > computation, for the already-supported instances of this builtin?
> > > 
> > > It //looks// to me as if `__builtin_neon_vld1_v` with (say) a `uint8_t *` 
> > > pointer argument will now compute `Alignment=1` (the natural alignment of 
> > > the pointee type), whereas it would previously have computed 
> > > `Alignment=8` (the size of the whole vector being loaded or stored).
> > > 
> > > Is that intentional? Or accidental? Or have I completely misunderstood 
> > > what's going on?
> > > 
> > > (Whichever of the three, some discussion in the commit message would be a 
> > > good idea, explaining why this change does or does not make a difference, 
> > > as appropriate.)
> > Clang was incorrectly assuming that all the pointers from which loads were 
> > being generated for vld1 intrinsics were aligned according to according to 
> > the intrinsics result type. This causes alignment faults on the code 
> > generated by the backend.
> > This fixes the issue so that alignment is based on the type of the pointer 
> > provided to as input to the intrinsic.
> >  
> > @pratlucas has done some work on this in parallel 
> > https://reviews.llvm.org/D79721 which has been approved and may overrule 
> > this particular line of code. 
> > 
> > I shall add a note to the commit message, and tentatively mark this as 
> > fixed, given it's liable to adopt the work of Lucas.
> > 
> Hi @LukeGeeson ,
> Just as a heads up, some changes to this implementations were requested on 
> D79721.
> The usage of `CGM.getNaturalPointeeTypeAlignment` and `IgnoreParenCasts()` 
> was causing problems on certain argument types, so the alignment is now 
> captured from the expression itself when it is emitted.
Thanks @pratlucas yeah I plan to rebase my code onto upstream when you have 
made those changes (and fix whatever still breaks) before I push :) 


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 267909.
LukeGeeson added a comment.

Added CHECK-NEXT lines, tested whole functions


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80752/new/

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,149 @@
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: bfdot   v0.2s, v1.4h, v2.4h
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_f32
+; CHECK: bfdot   v0.4s, v1.8h, v2.8h
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_lane_f32
+; CHECK: bfdot v0.2s, v1.4h, v2.2h[0]
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_laneq_f32
+; CHECK: bfdot v0.4s, v1.8h, v2.2h[3]
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_laneq_f32
+; CHECK: bfdot v0.2s, v1.4h, v2.2h[3]
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_lane_f32
+; CHECK: bfdot  v0.4s, v1.8h, v2.2h[0]
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfmmlaq_f32
+; CHECK: bfmmla v0.4s, v1.8h, v2.8h
+define <4 x float> @test_vbfmmlaq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmmla1.i = tail call <4 x float> @llvm.aarch64.neon.bfmmla.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmmla1.i
+}
+
+; CHECK-LABEL: test_vbfmlalbq_f32
+; CHECK: bfmlalb v0.4s, v1.8h, v2.8h
+define <4 x float> @test_vbfmlalbq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfmlalb1.i = tail call <4 x float> @llvm.aarch64.neon.bfmlalb.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfmlalb1.i
+}
+
+; CHECK-LABEL: test_vbfmlaltq_f32
+; CHECK: bfmlalt v0.4s, v1.8h, v2.8h
+define <4 x float> @test_vbfmlaltq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D80716#2059977 , @stuij wrote:

> We need testing for the backend code.


@stuij  I have added `aarch64-bf16-ldst-intrinsics.ll` to test the backend. 
Please let me know if this is ok :)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 267896.
LukeGeeson marked 4 inline comments as done.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
  llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll
@@ -0,0 +1,826 @@
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+%struct.bfloat16x4x2_t = type { [2 x <4 x bfloat>] }
+%struct.bfloat16x8x2_t = type { [2 x <8 x bfloat>] }
+%struct.bfloat16x4x3_t = type { [3 x <4 x bfloat>] }
+%struct.bfloat16x8x3_t = type { [3 x <8 x bfloat>] }
+%struct.bfloat16x4x4_t = type { [4 x <4 x bfloat>] }
+%struct.bfloat16x8x4_t = type { [4 x <8 x bfloat>] }
+
+; CHECK-LABEL: test_vld1_bf16
+; CHECK: ldr	d0, [x0]
+define <4 x bfloat> @test_vld1_bf16(bfloat* nocapture readonly %ptr) local_unnamed_addr #0 {
+entry:
+  %0 = bitcast bfloat* %ptr to <4 x bfloat>*
+  %1 = load <4 x bfloat>, <4 x bfloat>* %0, align 2
+  ret <4 x bfloat> %1
+}
+
+; CHECK-LABEL: test_vld1q_bf16
+; CHECK: ldr	q0, [x0]
+define <8 x bfloat> @test_vld1q_bf16(bfloat* nocapture readonly %ptr) local_unnamed_addr #1 {
+entry:
+  %0 = bitcast bfloat* %ptr to <8 x bfloat>*
+  %1 = load <8 x bfloat>, <8 x bfloat>* %0, align 2
+  ret <8 x bfloat> %1
+}
+
+; CHECK-LABEL: test_vld1_lane_bf16
+; CHECK: ld1	{ v0.h }[0], [x0]
+define <4 x bfloat> @test_vld1_lane_bf16(bfloat* nocapture readonly %ptr, <4 x bfloat> %src) local_unnamed_addr #0 {
+entry:
+  %0 = load bfloat, bfloat* %ptr, align 2
+  %vld1_lane = insertelement <4 x bfloat> %src, bfloat %0, i32 0
+  ret <4 x bfloat> %vld1_lane
+}
+
+; CHECK-LABEL: test_vld1q_lane_bf16
+; CHECK: ld1	{ v0.h }[7], [x0]
+define <8 x bfloat> @test_vld1q_lane_bf16(bfloat* nocapture readonly %ptr, <8 x bfloat> %src) local_unnamed_addr #1 {
+entry:
+  %0 = load bfloat, bfloat* %ptr, align 2
+  %vld1_lane = insertelement <8 x bfloat> %src, bfloat %0, i32 7
+  ret <8 x bfloat> %vld1_lane
+}
+
+; CHECK-LABEL: test_vld1_dup_bf16
+; CHECK: ld1r	{ v0.4h }, [x0]
+define <4 x bfloat> @test_vld1_dup_bf16(bfloat* nocapture readonly %ptr) local_unnamed_addr #0 {
+entry:
+  %0 = load bfloat, bfloat* %ptr, align 2
+  %1 = insertelement <4 x bfloat> undef, bfloat %0, i32 0
+  %lane = shufflevector <4 x bfloat> %1, <4 x bfloat> undef, <4 x i32> zeroinitializer
+  ret <4 x bfloat> %lane
+}
+
+; CHECK-LABEL: test_vld1_bf16_x2
+; CHECK: ld1	{ v0.4h, v1.4h }, [x0]
+define %struct.bfloat16x4x2_t @test_vld1_bf16_x2(bfloat* %ptr) local_unnamed_addr #2 {
+entry:
+  %vld1xN = tail call { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0bf16(bfloat* %ptr)
+  %vld1xN.fca.0.extract = extractvalue { <4 x bfloat>, <4 x bfloat> } %vld1xN, 0
+  %vld1xN.fca.1.extract = extractvalue { <4 x bfloat>, <4 x bfloat> } %vld1xN, 1
+  %.fca.0.0.insert = insertvalue %struct.bfloat16x4x2_t undef, <4 x bfloat> %vld1xN.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.bfloat16x4x2_t %.fca.0.0.insert, <4 x bfloat> %vld1xN.fca.1.extract, 0, 1
+  ret %struct.bfloat16x4x2_t %.fca.0.1.insert
+}
+
+declare { <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x2.v4bf16.p0bf16(bfloat*) #3
+
+; CHECK-LABEL: test_vld1q_bf16_x2
+; CHECK: ld1	{ v0.8h, v1.8h }, [x0]
+define %struct.bfloat16x8x2_t @test_vld1q_bf16_x2(bfloat* %ptr) local_unnamed_addr #2 {
+entry:
+  %vld1xN = tail call { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0bf16(bfloat* %ptr)
+  %vld1xN.fca.0.extract = extractvalue { <8 x bfloat>, <8 x bfloat> } %vld1xN, 0
+  %vld1xN.fca.1.extract = extractvalue { <8 x bfloat>, <8 x bfloat> } %vld1xN, 1
+  %.fca.0.0.insert = insertvalue %struct.bfloat16x8x2_t undef, <8 x bfloat> %vld1xN.fca.0.extract, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.bfloat16x8x2_t %.fca.0.0.insert, <8 x bfloat> %vld1xN.fca.1.extract, 0, 1
+  ret %struct.bfloat16x8x2_t %.fca.0.1.insert
+}
+
+; Function Attrs: argmemonly nounwind readonly
+declare { <8 x bfloat>, <8 x bfloat> } @llvm.aarch64.neon.ld1x2.v8bf16.p0bf16(bfloat*) #3
+
+; CHECK-LABEL: test_vld1_bf16_x3
+; CHECK: ld1	{ v0.4h, v1.4h, v2.4h }, [x0]
+define %struct.bfloat16x4x3_t @test_vld1_bf16_x3(bfloat* %ptr) local_unnamed_addr #2 {
+entry:
+  %vld1xN = tail call { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } @llvm.aarch64.neon.ld1x3.v4bf16.p0bf16(bfloat* %ptr)
+  %vld1xN.fca.0.extract = extractvalue { <4 x bfloat>, <4 x bfloat>, <4 x bfloat> } %vld1xN, 0
+  %vld1xN.fca.1.extract =

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-06-02 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a subscriber: pratlucas.
LukeGeeson added inline comments.



Comment at: clang/lib/CodeGen/CGBuiltin.cpp:10366
+auto Alignment = CGM.getNaturalPointeeTypeAlignment(
+E->getArg(0)->IgnoreParenCasts()->getType());
 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));

simon_tatham wrote:
> What effect is this change of strategy having on the alignment computation, 
> for the already-supported instances of this builtin?
> 
> It //looks// to me as if `__builtin_neon_vld1_v` with (say) a `uint8_t *` 
> pointer argument will now compute `Alignment=1` (the natural alignment of the 
> pointee type), whereas it would previously have computed `Alignment=8` (the 
> size of the whole vector being loaded or stored).
> 
> Is that intentional? Or accidental? Or have I completely misunderstood what's 
> going on?
> 
> (Whichever of the three, some discussion in the commit message would be a 
> good idea, explaining why this change does or does not make a difference, as 
> appropriate.)
Clang was incorrectly assuming that all the pointers from which loads were 
being generated for vld1 intrinsics were aligned according to according to the 
intrinsics result type. This causes alignment faults on the code generated by 
the backend.
This fixes the issue so that alignment is based on the type of the pointer 
provided to as input to the intrinsic.
 
@pratlucas has done some work on this in parallel 
https://reviews.llvm.org/D79721 which has been approved and may overrule this 
particular line of code. 

I shall add a note to the commit message, and tentatively mark this as fixed, 
given it's liable to adopt the work of Lucas.




Comment at: clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c:36
+// CHECK32: ret <4 x bfloat> %vld1_lane
+
+bfloat16x8_t test_vld1q_lane_bf16(bfloat16_t const *ptr, bfloat16x8_t src) {

labrinea wrote:
> CHECK-NEXT or CHECK-DAG are preferable for sequences.
Added to be consistent with the rest of the file (ie no CHECK-NEXT, but 
CHECK32/64)



Comment at: clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c:180
+// %.fca.0.2.insert = insertvalue %struct.bfloat16x4x3_t %.fca.0.1.insert, <4 
x bfloat> %vld3_lane.fca.2.extract, 0, 2
+
+bfloat16x8x3_t test_vld3q_lane_bf16(bfloat16_t const *ptr, bfloat16x8x3_t src) 
{

labrinea wrote:
> where are the check lines?
Added to be consistent with the rest of the file


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80716/new/

https://reviews.llvm.org/D80716



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80928: [BFloat] Add convert/copy instrinsic support

2020-06-01 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

A few nits, it looks largely uncontroversial but I'll let someone else give the 
ok since an external eye is always good :)




Comment at: clang/utils/TableGen/NeonEmitter.cpp:1066
 
   if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
+  Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||

I always wondered why we need special treatment for cvt intrinsics here, is 
there a better place to put this?



Comment at: llvm/test/CodeGen/ARM/bf16-intrinsics-nofp16.ll:1
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv8.6a-arm-none-eabi 
-mattr=+neon -mattr=+bf16 | FileCheck %s
+

These files are very small, it's almost not worth having them on their own, can 
you merge them into 1 file unless we need 2 for a specific reason



Comment at: llvm/test/CodeGen/ARM/bf16-intrinsics.ll:1
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv8.6a-arm-none-eabi 
-mattr=+fullfp16 -mattr=+neon -mattr=+bf16 | FileCheck %s
+

same here


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D80928/new/

https://reviews.llvm.org/D80928



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D80752: [AArch64]: BFloat MatMul Intrinsics

2020-05-28 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: SjoerdMeijer, t.p.northover, sdesmalen, labrinea.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.
LukeGeeson added a parent revision: D80716: [AArch64]: BFloat Load/Store 
Intrinsics

This patch upstreams support for BFloat Matrix Multiplication Intrinsics
and Code Generation from __bf16 to AArch64. This includes IR intrinsics. 
Unittests are
provided as needed. AArch32 Intrinsics + CodeGen will come after this
patch.

This patch is part of a series implementing the Bfloat16 extension of
the
Armv8.6-a architecture, as detailed here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

The bfloat type, and its properties are specified in the Arm
Architecture
Reference Manual:

https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile

The following people contributed to this patch:

- Luke Geeson
- Momchil Velikov
- Mikhail Maltsev
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80752

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-dotprod-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll

Index: llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -0,0 +1,149 @@
+; RUN: llc -mtriple aarch64-arm-none-eabi  -mattr=+bf16 %s -o - | FileCheck %s
+
+; CHECK-LABEL: test_vbfdot_f32
+; CHECK: bfdot   v0.2s, v1.4h, v2.4h
+define <2 x float> @test_vbfdot_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %a to <8 x i8>
+  %1 = bitcast <4 x bfloat> %b to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %0, <8 x i8> %1)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_f32
+; CHECK: bfdot   v0.4s, v1.8h, v2.8h
+define <4 x float> @test_vbfdotq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %a to <16 x i8>
+  %1 = bitcast <8 x bfloat> %b to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %0, <16 x i8> %1)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_lane_f32
+; CHECK: bfdot v0.2s, v1.4h, v2.2h[0]
+define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_laneq_f32
+; CHECK: bfdot v0.4s, v1.8h, v2.2h[3]
+define <4 x float> @test_vbfdotq_laneq_f32(<4 x float> %r, <8 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> 
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdot_laneq_f32
+; CHECK: bfdot v0.2s, v1.4h, v2.2h[3]
+define <2 x float> @test_vbfdot_laneq_f32(<2 x float> %r, <4 x bfloat> %a, <8 x bfloat> %b) {
+entry:
+  %0 = bitcast <8 x bfloat> %b to <4 x float>
+  %shuffle = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> 
+  %1 = bitcast <4 x bfloat> %a to <8 x i8>
+  %2 = bitcast <2 x float> %shuffle to <8 x i8>
+  %vbfdot1.i = tail call <2 x float> @llvm.aarch64.neon.bfdot.v2f32.v8i8(<2 x float> %r, <8 x i8> %1, <8 x i8> %2)
+  ret <2 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfdotq_lane_f32
+; CHECK: bfdot  v0.4s, v1.8h, v2.2h[0]
+define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
+entry:
+  %0 = bitcast <4 x bfloat> %b to <2 x float>
+  %shuffle = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <8 x bfloat> %a to <16 x i8>
+  %2 = bitcast <4 x float> %shuffle to <16 x i8>
+  %vbfdot1.i = tail call <4 x float> @llvm.aarch64.neon.bfdot.v4f32.v16i8(<4 x float> %r, <16 x i8> %1, <16 x i8> %2)
+  ret <4 x float> %vbfdot1.i
+}
+
+; CHECK-LABEL: test_vbfmmlaq_f32
+; CHECK: bfmmla v0.4s, v1.8h, v2.8h
+define <4 x float>

[PATCH] D80716: [AArch64]: BFloat Load/Store Intrinsics

2020-05-28 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: fpetrogalli, SjoerdMeijer.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, 
kristof.beyls.
Herald added projects: clang, LLVM.
LukeGeeson added a parent revision: D79869: [clang][BFloat] Add reinterpret 
cast intrinsics.

This patch upstreams support for ld / st variants of BFloat intrinsics
in from __bf16 to AArch64. This includes IR intrinsics. Unittests are
provided as needed.

This patch is part of a series implementing the Bfloat16 extension of
the
Armv8.6-a architecture, as detailed here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

The bfloat type, and its properties are specified in the Arm
Architecture
Reference Manual:

https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile

The following people contributed to this patch:

- Luke Geeson
- Momchil Velikov
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D80716

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-bf16-ldst-intrinsics.c
  clang/test/Sema/aarch64-bf16-ldst-intrinsics.c
  llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.td

Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2233,6 +2233,7 @@
   defm : VecROLoadPat;
   defm : VecROLoadPat;
   defm : VecROLoadPat;
+  defm : VecROLoadPat;
 }
 
 defm : VecROLoadPat;
@@ -2247,6 +2248,7 @@
   defm : VecROLoadPat;
   defm : VecROLoadPat;
   defm : VecROLoadPat;
+  defm : VecROLoadPat;
   defm : VecROLoadPat;
 }
 } // AddedComplexity = 10
@@ -2382,6 +2384,8 @@
 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
   def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
 }
 def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
   (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
@@ -2405,6 +2409,8 @@
 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
   def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
 }
 def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
   (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
@@ -2903,6 +2909,7 @@
   defm : VecROStorePat;
   defm : VecROStorePat;
   defm : VecROStorePat;
+  defm : VecROStorePat;
 }
 
 defm : VecROStorePat;
@@ -2918,6 +2925,7 @@
   defm : VecROStorePat;
   defm : VecROStorePat;
   defm : VecROStorePat;
+  defm : VecROStorePat;
 }
 } // AddedComplexity = 10
 
@@ -3010,6 +3018,9 @@
   def : Pat<(store (v4f16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+  def : Pat<(store (v4bf16 FPR64:$Rt),
+   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
 }
 
 // Match all store 128 bits width whose type is compatible with FPR128
@@ -3040,6 +3051,9 @@
   def : Pat<(store (v8f16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+  def : Pat<(store (v8bf16 FPR128:$Rt),
+   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
 }
 
 // truncstore i64
@@ -3147,6 +3161,9 @@
   def : Pat<(store (v4f16 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(store (v4bf16 FPR64:$Rt),
+   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
 }
 
 // Match all store 128 bits width whose type is compatible with FPR128
@@ -3179,6 +3196,9 @@
   def : Pat<(store (v8f16 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+  def : Pat<(store (v8bf16 FPR128:$Rt),
+   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
 }
 
 } // AddedComplexity = 10
@@ -6316,6 +6336,10 @@
   (LD1Rv4h GPR64sp:$Rn)>;
 def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn,
   (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v4bf16 (AArch64dup

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-05-21 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added inline comments.



Comment at: clang/test/CodeGen/arm-mangle-16bit-float.cpp:4
+
+// CHECK64: define {{.*}}void @_Z3foou6__bf16(half %b)
+// CHECK32: define {{.*}}void @_Z3foou6__bf16(i32 %b.coerce)

SjoerdMeijer wrote:
> LukeGeeson wrote:
> > craig.topper wrote:
> > > How can bfloat16 be passed as half? Don't they have a different format?
> > see the above comment about soft-abis
> Probably I am bit confused too now... are the three possible types that we 
> are expecing not bfloat, i32, or i16? 
Hi Sjoerd, Good spot here I forgot to add a default case somewhere, which means 
AArch32 didn't observe `-mfloat-abi softfp` by default - and hence used bfloat 
where i32 was expected. 

I have a local patch for this and will send to Ties to merge into this one :) 


We're not sure what you mean by i16 however?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D79710: [clang][BFloat] add create/set/get/dup intrinsics

2020-05-18 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

Can you update the commit message in this differential as well please? Same for 
the other commits :)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79710/new/

https://reviews.llvm.org/D79710



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-05-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

I authored some of the code for this patch, please update the commit message 
with my name on the list :)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D79710: [clang][BFloat] add create/set/get/dup intrinsics

2020-05-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

I was an author for part of this patch. Please add all authors as a list of 
authors to this commit message. Thanks!

As an aside, it would be worth doing this for all the patches in this series


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79710/new/

https://reviews.llvm.org/D79710



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D79869: [clang][BFloat] Add reinterpret cast intrinsics

2020-05-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

As I was an author of this patch (among others) please could you add a list of 
authors to the commit message. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D79869/new/

https://reviews.llvm.org/D79869



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-25 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.

Comment at: llvm/lib/Target/ARM/ARMInstrNEON.td:4846
+  VDOT {
+  let hasNoSchedulingInfo = 1;
+

dmgreen wrote:
> I don't think that hasNoSchedulingInfo is necessarily the best way to handle 
> this. That flag is intended for instructions that will never be scheduled, 
> like Pseudo instructions.
> 
> If you are running into "Complete Schedule" problems, they might need 
> HasMatMulInt8 added to the list of unsupported features instead.
Since there are no 8.6a cpus in llvm that support this extension, the default 
behaviour is to use Cortex-A57 scheduling - this also has no notion of 8.6a 
matmul.

This falls back to SchedMachineModel in `llvm/lib/Target/ARM/ARMScheduleA57.td` 
and in particular UnsupportedFeatures would be a candidate place to put 
unsupported features like matmul. I took issue with putting all new extensions 
here because there should be a separation of concerns between a particular 
scheduling model, and supporting new behaviour (which could go in a generic 
catch all location that might be slightly more informative ).

Did you have something in particular in mind?

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77872/new/

https://reviews.llvm.org/D77872

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-24 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson abandoned this revision.
LukeGeeson added a comment.

Sub-issues all closed


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77875: [ARM] Armv8.6-a Matrix Mul cmd line support

2020-04-24 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG740a1dd050ee: [ARM] Armv8.6-a Matrix Mul cmd line support 
(authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77875/new/

https://reviews.llvm.org/D77875

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-matrix-multiply.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/AArch64TargetParser.h
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.h
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -636,6 +636,7 @@
   {"maverick", "maverick", nullptr, nullptr},
   {"xscale", "noxscale", nullptr, nullptr},
   {"sb", "nosb", "+sb", "-sb"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
   {"mve", "nomve", "+mve", "-mve"},
   {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
@@ -1230,7 +1231,10 @@
   {"tme", "notme", "+tme", "-tme"},
   {"ssbs", "nossbs", "+ssbs", "-ssbs"},
   {"sb", "nosb", "+sb", "-sb"},
-  {"predres", "nopredres", "+predres", "-predres"}
+  {"predres", "nopredres", "+predres", "-predres"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
+  {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
+  {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
 };
 
   for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
Index: llvm/include/llvm/Support/ARMTargetParser.h
===
--- llvm/include/llvm/Support/ARMTargetParser.h
+++ llvm/include/llvm/Support/ARMTargetParser.h
@@ -47,14 +47,15 @@
   AEK_FP_DP   = 1 << 18,
   AEK_LOB = 1 << 19,
   AEK_BF16= 1 << 20,
-  AEK_CDECP0 =  1 << 21,
-  AEK_CDECP1 =  1 << 22,
-  AEK_CDECP2 =  1 << 23,
-  AEK_CDECP3 =  1 << 24,
-  AEK_CDECP4 =  1 << 25,
-  AEK_CDECP5 =  1 << 26,
-  AEK_CDECP6 =  1 << 27,
-  AEK_CDECP7 =  1 << 28,
+  AEK_I8MM= 1 << 21,
+  AEK_CDECP0 =  1 << 22,
+  AEK_CDECP1 =  1 << 23,
+  AEK_CDECP2 =  1 << 24,
+  AEK_CDECP3 =  1 << 25,
+  AEK_CDECP4 =  1 << 26,
+  AEK_CDECP5 =  1 << 27,
+  AEK_CDECP6 =  1 << 28,
+  AEK_CDECP7 =  1 << 29,
 
   // Unsupported extensions.
   AEK_OS   =1ULL << 59,
Index: llvm/include/llvm/Support/ARMTargetParser.def
===
--- llvm/include/llvm/Support/ARMTargetParser.def
+++ llvm/include/llvm/Support/ARMTargetParser.def
@@ -116,7 +116,8 @@
  ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
  (ARM::AEK_SEC| ARM::AEK_MP   | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
   ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP  | ARM::AEK_CRC  | ARM::AEK_RAS |
-  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
+  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+  ARM::AEK_I8MM))
 ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
   FK_NEON_FP_ARMV8,
   (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -171,6 +172,7 @@
 ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16","-bf16")
 ARM_ARCH_EXT_NAME("sb",   ARM::AEK_SB,   "+sb",  "-sb")
+ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm","-i8mm")
 ARM_ARCH_EXT_NAME("lob",  ARM::AEK_LOB,  "+lob",   "-lob")
 ARM_ARCH_EXT_NAME("cdecp0",   ARM::AEK_CDECP0,   "+cdecp0",  "-cdecp0")
 ARM_ARCH_EXT_NAME("cdecp1",   ARM::AEK_CDECP1,   "+cdecp1",  "-cdecp1")
Index: llvm/include/llvm/Support/AArch64TargetParser.h
===
--- llvm/include/llvm/Support/AArch64TargetParser.h
+++ llvm/include/llvm/Support/AArch64TargetParser.h
@@ -24,7 +24,7 @@
 namespace AArch64 {
 
 // Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
+enum ArchExtKind : uint64_t {
   AEK_INVALID = 0,
   AEK_NONE =1,
   AEK_CRC = 1 << 1,
@@ -57,6 +57,8 @@
   AEK_TME = 1 << 28,
   AEK_BF16 =1 << 29,
   AEK_I8MM =1 << 30,
+  AEK_F32MM =   1ULL << 31,
+  AEK_F64MM =   1ULL << 32,
 };
 
 enum class ArchKind {
Index: llvm/include/llvm/Support/AArch64TargetParser.def

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-24 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
LukeGeeson marked an inline comment as done.
Closed by commit rG7da190512532: [AArch32] Armv8.6-a Matrix Mult Assembly + 
Intrinsics (authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77872/new/

https://reviews.llvm.org/D77872

Files:
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/lib/Target/ARM/ARMPredicates.td
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/ARM/arm-matmul.ll

Index: llvm/test/CodeGen/ARM/arm-matmul.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/arm-matmul.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=arm-none-linux-gnu -mattr=+neon,+i8mm -float-abi=hard < %s -o -| FileCheck %s
+
+define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: smmla.v4i32.v16i8
+; CHECK:vsmmla.s8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: ummla.v4i32.v16i8
+; CHECK:vummla.u8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: usmmla.v4i32.v16i8
+; CHECK:vusmmla.s8   q0, q1, q2
+  %vusmmla1.i = tail call <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vusmmla1.i
+}
+
+define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot_lane.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %1) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudot_lane.v2i32.v8i8
+; CHECK:vsudot.u8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %1, <8 x i8> %a) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <4 x i32> @usdotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdotq_lane.v4i32.v16i8
+; CHECK:vusdot.s8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %1) #3
+  ret <4 x i32> %vusdot1.i
+}
+
+define <4 x i32> @sudotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudotq_lane.v4i32.v16i8
+; CHECK:vsudot.u8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %1, <16 x i8> %a) #3
+  ret <4 x i32> %vusdot1.i
+}
+
+declare <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -260,6 +260,9 @@
   /// HasBF16 - True if subtarget supports BFloat16 floating point operations

[PATCH] D77871: [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-24 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG832cd749131b: [AArch64] Armv8.6-a Matrix Mult Assembly + 
Intrinsics (authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77871/new/

https://reviews.llvm.org/D77871

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-matmul.cpp
  clang/test/CodeGen/aarch64-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/test/CodeGen/AArch64/aarch64-matmul.ll
  llvm/test/MC/AArch64/armv8.6a-simd-matmul-error.s
  llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
  llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt

Index: llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
===
--- /dev/null
+++ llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc -triple=aarch64  -mattr=+i8mm -disassemble < %s   | FileCheck %s
+# RUN: llvm-mc -triple=aarch64  -mattr=+v8.6a -disassemble < %s  | FileCheck %s
+# RUN: not llvm-mc -triple=aarch64  -mattr=+v8.5a -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+
+[0x01,0xa6,0x9f,0x4e]
+[0x01,0xa6,0x9f,0x6e]
+[0x01,0xae,0x9f,0x4e]
+# CHECK: smmla   v1.4s, v16.16b, v31.16b
+# CHECK: ummla   v1.4s, v16.16b, v31.16b
+# CHECK: usmmla  v1.4s, v16.16b, v31.16b
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0xe3,0x9d,0x9e,0x0e]
+[0xe3,0x9d,0x9e,0x4e]
+# CHECK: usdot   v3.2s, v15.8b, v30.8b
+# CHECK: usdot   v3.4s, v15.16b, v30.16b
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0xa2,0x0f]
+[0x3f,0xf8,0xa2,0x4f]
+# CHECK: usdot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: usdot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0x22,0x0f]
+[0x3f,0xf8,0x22,0x4f]
+# CHECK: sudot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: sudot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
Index: llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
===
--- /dev/null
+++ llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+i8mm   < %s  | FileCheck %s
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a  < %s  | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a-i8mm < %s 2>&1 | FileCheck %s --check-prefix=NOMATMUL
+
+smmla  v1.4s, v16.16b, v31.16b
+ummla  v1.4s, v16.16b, v31.16b
+usmmla v1.4s, v16.16b, v31.16b
+// CHECK: smmla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x4e]
+// CHECK: ummla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x6e]
+// CHECK: usmmla  v1.4s, v16.16b, v31.16b // encoding: [0x01,0xae,0x9f,0x4e]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: smmla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: ummla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usmmla  v1.4s, v16.16b, v31.16b
+
+usdot v3.2s, v15.8b, v30.8b
+usdot v3.4s, v15.16b, v30.16b
+// CHECK: usdot   v3.2s, v15.8b, v30.8b   // encoding: [0xe3,0x9d,0x9e,0x0e]
+// CHECK: usdot   v3.4s, v15.16b, v30.16b // encoding: [0xe3,0x9d,0x9e,0x4e]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot v3.2s, v15.8b, v30.8b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot v3.4s, v15.16b, v30.16b
+
+usdot v31.2s, v1.8b,  v2.4b[3]
+usdot v31.4s, v1.16b, v2.4b[3]
+// CHECK: usdot   v31.2s, v1.8b, v2.4b[3] // encoding: [0x3f,0xf8,0xa2,0x0f]
+// CHECK: usdot   v31.4s, v1.16b, v2.4b[3] // encoding: [0x3f,0xf8,0xa2,0x4f]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot   v31.2s, v1.8b, v2.4b[3]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot   v31.4s, v1.16b, v2.4b[3]
+
+sudot v31.2s, v1.8b,  v2.4b[3]
+sudot v31.4s, v1.16b, v2.4b[3]
+// CHECK: sudot   v31.2s, v1.8b, v2.4b[3] // encoding: [0x3f,0xf8,0x22,0x0f]
+// CHECK: sudot   v31.4s, v1.16b, v2.4b[3] // encoding: [0x3f,0xf8,0x22,0x4f]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: sudot   v31.2s, v1.8b,

[PATCH] D77871: [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-22 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 259327.
LukeGeeson marked 5 inline comments as done.
LukeGeeson added a comment.

- fixed typos
- added sroa as mem2reg arg to reduce redundant mem accesses in tests, 
refactored test
- addressed other comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77871/new/

https://reviews.llvm.org/D77871

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-matmul.cpp
  clang/test/CodeGen/aarch64-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/test/CodeGen/AArch64/aarch64-matmul.ll
  llvm/test/MC/AArch64/armv8.6a-simd-matmul-error.s
  llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
  llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt

Index: llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
===
--- /dev/null
+++ llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc -triple=aarch64  -mattr=+i8mm -disassemble < %s   | FileCheck %s
+# RUN: llvm-mc -triple=aarch64  -mattr=+v8.6a -disassemble < %s  | FileCheck %s
+# RUN: not llvm-mc -triple=aarch64  -mattr=+v8.5a -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+
+[0x01,0xa6,0x9f,0x4e]
+[0x01,0xa6,0x9f,0x6e]
+[0x01,0xae,0x9f,0x4e]
+# CHECK: smmla   v1.4s, v16.16b, v31.16b
+# CHECK: ummla   v1.4s, v16.16b, v31.16b
+# CHECK: usmmla  v1.4s, v16.16b, v31.16b
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0xe3,0x9d,0x9e,0x0e]
+[0xe3,0x9d,0x9e,0x4e]
+# CHECK: usdot   v3.2s, v15.8b, v30.8b
+# CHECK: usdot   v3.4s, v15.16b, v30.16b
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0xa2,0x0f]
+[0x3f,0xf8,0xa2,0x4f]
+# CHECK: usdot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: usdot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0x22,0x0f]
+[0x3f,0xf8,0x22,0x4f]
+# CHECK: sudot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: sudot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
Index: llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
===
--- /dev/null
+++ llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+i8mm   < %s  | FileCheck %s
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a  < %s  | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a-i8mm < %s 2>&1 | FileCheck %s --check-prefix=NOMATMUL
+
+smmla  v1.4s, v16.16b, v31.16b
+ummla  v1.4s, v16.16b, v31.16b
+usmmla v1.4s, v16.16b, v31.16b
+// CHECK: smmla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x4e]
+// CHECK: ummla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x6e]
+// CHECK: usmmla  v1.4s, v16.16b, v31.16b // encoding: [0x01,0xae,0x9f,0x4e]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: smmla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: ummla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usmmla  v1.4s, v16.16b, v31.16b
+
+usdot v3.2s, v15.8b, v30.8b
+usdot v3.4s, v15.16b, v30.16b
+// CHECK: usdot   v3.2s, v15.8b, v30.8b   // encoding: [0xe3,0x9d,0x9e,0x0e]
+// CHECK: usdot   v3.4s, v15.16b, v30.16b // encoding: [0xe3,0x9d,0x9e,0x4e]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot v3.2s, v15.8b, v30.8b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot v3.4s, v15.16b, v30.16b
+
+usdot v31.2s, v1.8b,  v2.4b[3]
+usdot v31.4s, v1.16b, v2.4b[3]
+// CHECK: usdot   v31.2s, v1.8b, v2.4b[3] // encoding: [0x3f,0xf8,0xa2,0x0f]
+// CHECK: usdot   v31.4s, v1.16b, v2.4b[3] // encoding: [0x3f,0xf8,0xa2,0x4f]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot   v31.2s, v1.8b, v2.4b[3]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usdot   v31.4s, v1.16b, v2.4b[3]
+
+sudot v31.2s, v1.8b,  v2.4b[3]
+sudot v31.4s, v1.16b, v2.4b[3]
+// CHECK: sudot   v31.2s, v1.8b, v2.4b[3] // encoding: [0x3f,0xf8,0x22,0x0f]
+// CHECK: sudot   v31.4s, v1.16b, v2.4b[3] // encoding: [0x3f,0xf8,0x22,0x4f]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT:

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-16 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked 3 inline comments as done.
LukeGeeson added inline comments.



Comment at: clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c:3
+// RUN: -fallow-half-arguments-and-returns -S -disable-O0-optnone -emit-llvm 
-o - %s \
+// RUN: | opt -S -mem2reg \
+// RUN: | FileCheck %s

miyuki wrote:
> Can you try -sroa after -mem2reg? I think it should eliminate some more 
> useless stores and loads.
added, test updated



Comment at: llvm/lib/Target/ARM/ARMInstrNEON.td:4839
   let Constraints = "$dst = $Vd";
+  let hasNoSchedulingInfo = 1;
 }

miyuki wrote:
> Why was this added?
Added a vusdot class to handle this


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77872/new/

https://reviews.llvm.org/D77872



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-16 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 258030.
LukeGeeson added a comment.

- added -sroa to arm-v8.6a-neon-intrinsics test, updated test to remove 
redundant memory accesses
- made class vusdot for new vusdot instructions with hasNoSchedulinginfo for 
new instructions. Existing instructions retain scheduling via use of VDOT class


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77872/new/

https://reviews.llvm.org/D77872

Files:
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/lib/Target/ARM/ARMPredicates.td
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/ARM/arm-matmul.ll

Index: llvm/test/CodeGen/ARM/arm-matmul.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/arm-matmul.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=arm-none-linux-gnu -mattr=+neon,+i8mm -float-abi=hard < %s -o -| FileCheck %s
+
+define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: smmla.v4i32.v16i8
+; CHECK:vsmmla.s8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: ummla.v4i32.v16i8
+; CHECK:vummla.u8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: usmmla.v4i32.v16i8
+; CHECK:vusmmla.s8   q0, q1, q2
+  %vusmmla1.i = tail call <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vusmmla1.i
+}
+
+define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot_lane.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %1) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudot_lane.v2i32.v8i8
+; CHECK:vsudot.u8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %1, <8 x i8> %a) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <4 x i32> @usdotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdotq_lane.v4i32.v16i8
+; CHECK:vusdot.s8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %1) #3
+  ret <4 x i32> %vusdot1.i
+}
+
+define <4 x i32> @sudotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudotq_lane.v4i32.v16i8
+; CHECK:vsudot.u8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %1, <16 x i8> %a) #3
+  ret <4 x i32> %vusdot1.i
+}
+
+declare <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
+declare <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32>, <8 x i8>, <8 x i8>) #2
+declare <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32>, <16 x i8>, <16 x i8>) #2
Index: llvm/lib/Target/ARM/ARMSubtarget.h
===
--- llvm/lib/Target/ARM/ARMSubtarget.h
+++ llvm/lib/Target/ARM/ARMSubtarget.h
@@ -260,6 +260,9 @@
   /// HasBF16

[PATCH] D77875: [ARM] Armv8.6-a Matrix Mul cmd line support

2020-04-14 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 257362.
LukeGeeson added a comment.

- updated commit message to reflect sve=>f32/64 implication
- added comment in AArch64.cpp to that effect


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77875/new/

https://reviews.llvm.org/D77875

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-matrix-multiply.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/AArch64TargetParser.h
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.h
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -636,6 +636,7 @@
   {"maverick", "maverick", nullptr, nullptr},
   {"xscale", "noxscale", nullptr, nullptr},
   {"sb", "nosb", "+sb", "-sb"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
   {"mve", "nomve", "+mve", "-mve"},
   {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
@@ -1230,7 +1231,10 @@
   {"tme", "notme", "+tme", "-tme"},
   {"ssbs", "nossbs", "+ssbs", "-ssbs"},
   {"sb", "nosb", "+sb", "-sb"},
-  {"predres", "nopredres", "+predres", "-predres"}
+  {"predres", "nopredres", "+predres", "-predres"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
+  {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
+  {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
 };
 
   for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
Index: llvm/include/llvm/Support/ARMTargetParser.h
===
--- llvm/include/llvm/Support/ARMTargetParser.h
+++ llvm/include/llvm/Support/ARMTargetParser.h
@@ -47,14 +47,15 @@
   AEK_FP_DP   = 1 << 18,
   AEK_LOB = 1 << 19,
   AEK_BF16= 1 << 20,
-  AEK_CDECP0 =  1 << 21,
-  AEK_CDECP1 =  1 << 22,
-  AEK_CDECP2 =  1 << 23,
-  AEK_CDECP3 =  1 << 24,
-  AEK_CDECP4 =  1 << 25,
-  AEK_CDECP5 =  1 << 26,
-  AEK_CDECP6 =  1 << 27,
-  AEK_CDECP7 =  1 << 28,
+  AEK_I8MM= 1 << 21,
+  AEK_CDECP0 =  1 << 22,
+  AEK_CDECP1 =  1 << 23,
+  AEK_CDECP2 =  1 << 24,
+  AEK_CDECP3 =  1 << 25,
+  AEK_CDECP4 =  1 << 26,
+  AEK_CDECP5 =  1 << 27,
+  AEK_CDECP6 =  1 << 28,
+  AEK_CDECP7 =  1 << 29,
 
   // Unsupported extensions.
   AEK_OS   =1ULL << 59,
Index: llvm/include/llvm/Support/ARMTargetParser.def
===
--- llvm/include/llvm/Support/ARMTargetParser.def
+++ llvm/include/llvm/Support/ARMTargetParser.def
@@ -116,7 +116,8 @@
  ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
  (ARM::AEK_SEC| ARM::AEK_MP   | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
   ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP  | ARM::AEK_CRC  | ARM::AEK_RAS |
-  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
+  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+  ARM::AEK_I8MM))
 ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
   FK_NEON_FP_ARMV8,
   (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -171,6 +172,7 @@
 ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16","-bf16")
 ARM_ARCH_EXT_NAME("sb",   ARM::AEK_SB,   "+sb",  "-sb")
+ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm","-i8mm")
 ARM_ARCH_EXT_NAME("lob",  ARM::AEK_LOB,  "+lob",   "-lob")
 ARM_ARCH_EXT_NAME("cdecp0",   ARM::AEK_CDECP0,   "+cdecp0",  "-cdecp0")
 ARM_ARCH_EXT_NAME("cdecp1",   ARM::AEK_CDECP1,   "+cdecp1",  "-cdecp1")
Index: llvm/include/llvm/Support/AArch64TargetParser.h
===
--- llvm/include/llvm/Support/AArch64TargetParser.h
+++ llvm/include/llvm/Support/AArch64TargetParser.h
@@ -24,7 +24,7 @@
 namespace AArch64 {
 
 // Arch extension modifiers for CPUs.
-enum ArchExtKind : unsigned {
+enum ArchExtKind : uint64_t {
   AEK_INVALID = 0,
   AEK_NONE =1,
   AEK_CRC = 1 << 1,
@@ -57,6 +57,8 @@
   AEK_TME = 1 << 28,
   AEK_BF16 =1 << 29,
   AEK_I8MM =1 << 30,
+  AEK_F32MM =   1ULL << 31,
+  AEK_F64MM =   1ULL << 32,
 };
 
 enum class ArchKind {
Index: llvm/include/llvm/Support/AArch64TargetParser.def

[PATCH] D77875: [ARM] Armv8.6-a Matrix Mul cmd line support

2020-04-14 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D77875#1979980 , @DavidSpickett 
wrote:

> > Note: +f32mm and +f64mm are optional and so have to be enabled by default
>
> I think I know what you mean, but "and so are not enabled by default" would 
> be clearer.
>
> Also to double check, does f64mm imply f32mm? (not sure if there's an updated 
> ARMARM yet that would tell me that)


Since these options can be enabled independently pre-8.6a in general the answer 
is no. I'll add a comment noting why we only only imply f32mm in AArch64.cpp

In D77875#1980488 , @DavidSpickett 
wrote:

> Or in other words, does amrv8.6-a+sve imply +f32mm *and* +f64mm?


Also no here, as an example the xml has different checks for fmmla 
https://developer.arm.com/docs/ddi0596/latest/a64-sve-instructions-alphabetic-order/fmmla-floating-point-matrix-multiply-accumulate


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77875/new/

https://reviews.llvm.org/D77875



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77875: [ARM] Armv8.6-a Matrix Mul cmd line support

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

harbour master builds, unit tests passing, failures down to linting


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77875/new/

https://reviews.llvm.org/D77875



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77871: [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

Removed reliance on parent revision, harbormaster now builds with unit tests 
passing


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77871/new/

https://reviews.llvm.org/D77871



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

Harbourmaster now biulds, and unit tests pass, errors related to linting


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77872/new/

https://reviews.llvm.org/D77872



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

Hi folks,
Thank you for your help, I have split the patches up as follows:

1. https://reviews.llvm.org/D77871 [AArch64] Armv8.6-a Matrix Mult Assembly + 
Intrinsics
2. https://reviews.llvm.org/D77872 [AArch32] Armv8.6-a Matrix Mult Assembly + 
Intrinsics
3. https://reviews.llvm.org/D77873 [AArch64] Armv8.6-A Mat Mul SVE Assembly
4. https://reviews.llvm.org/D77874 [AArch32] Armv8.6a Matrix Mul Assembly
5. https://reviews.llvm.org/D77875 [ARM] Armv8.6-a Matrix Mul cmd line support

In detail this covers the intrinsics (with minimum llvm backend needed to build 
and pass tests), SVE Assembly, (non-SVE) Assembly, and command-line support to 
enable the use of this extension. I mentioned above that there was so little 
intrinsics work needed and so the natural way to split up this patch was to 
include the llvm code needed to get each patch to build in the order above.

I'd appreciate any help you can give reviewing these!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77875: [ARM] Armv8.6-a Matrix Mul cmd line support

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: t.p.northover.
Herald added subscribers: cfe-commits, danielkiss, kristof.beyls.
Herald added a project: clang.
LukeGeeson added a parent revision: D77874:  [AArch32] Armv8.6a Matrix Mul 
Assembly.

This patch upstreams support for the Armv8.6-a Matrix Multiplication
Extension. A summary of the features can be found here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

This patch includes:

- Command line options to enable these features with +i8mm, +f32mm, or f64mm

Note: +f32mm and +f64mm are optional and so have
to be enabled by default

This is part of a patch series, starting with BFloat16 support and
the other components in the armv8.6a extension (in previous patches
linked in phabricator)

Based on work by:

- Luke Geeson
- Oliver Stannard
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77875

Files:
  clang/lib/Driver/ToolChains/Arch/AArch64.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-matrix-multiply.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/AArch64TargetParser.h
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.h
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -636,6 +636,7 @@
   {"maverick", "maverick", nullptr, nullptr},
   {"xscale", "noxscale", nullptr, nullptr},
   {"sb", "nosb", "+sb", "-sb"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
   {"mve", "nomve", "+mve", "-mve"},
   {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
@@ -1230,7 +1231,10 @@
   {"tme", "notme", "+tme", "-tme"},
   {"ssbs", "nossbs", "+ssbs", "-ssbs"},
   {"sb", "nosb", "+sb", "-sb"},
-  {"predres", "nopredres", "+predres", "-predres"}
+  {"predres", "nopredres", "+predres", "-predres"},
+  {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
+  {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
+  {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
 };
 
   for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
Index: llvm/include/llvm/Support/ARMTargetParser.h
===
--- llvm/include/llvm/Support/ARMTargetParser.h
+++ llvm/include/llvm/Support/ARMTargetParser.h
@@ -47,14 +47,15 @@
   AEK_FP_DP   = 1 << 18,
   AEK_LOB = 1 << 19,
   AEK_BF16= 1 << 20,
-  AEK_CDECP0 =  1 << 21,
-  AEK_CDECP1 =  1 << 22,
-  AEK_CDECP2 =  1 << 23,
-  AEK_CDECP3 =  1 << 24,
-  AEK_CDECP4 =  1 << 25,
-  AEK_CDECP5 =  1 << 26,
-  AEK_CDECP6 =  1 << 27,
-  AEK_CDECP7 =  1 << 28,
+  AEK_I8MM= 1 << 21,
+  AEK_CDECP0 =  1 << 22,
+  AEK_CDECP1 =  1 << 23,
+  AEK_CDECP2 =  1 << 24,
+  AEK_CDECP3 =  1 << 25,
+  AEK_CDECP4 =  1 << 26,
+  AEK_CDECP5 =  1 << 27,
+  AEK_CDECP6 =  1 << 28,
+  AEK_CDECP7 =  1 << 29,
 
   // Unsupported extensions.
   AEK_OS   =1ULL << 59,
Index: llvm/include/llvm/Support/ARMTargetParser.def
===
--- llvm/include/llvm/Support/ARMTargetParser.def
+++ llvm/include/llvm/Support/ARMTargetParser.def
@@ -116,7 +116,8 @@
  ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
  (ARM::AEK_SEC| ARM::AEK_MP   | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
   ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP  | ARM::AEK_CRC  | ARM::AEK_RAS |
-  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES))
+  ARM::AEK_DOTPROD| ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES |
+  ARM::AEK_I8MM))
 ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
   FK_NEON_FP_ARMV8,
   (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -171,6 +172,7 @@
 ARM_ARCH_EXT_NAME("fp16fml",  ARM::AEK_FP16FML,  "+fp16fml", "-fp16fml")
 ARM_ARCH_EXT_NAME("bf16", ARM::AEK_BF16, "+bf16","-bf16")
 ARM_ARCH_EXT_NAME("sb",   ARM::AEK_SB,   "+sb",  "-sb")
+ARM_ARCH_EXT_NAME("i8mm", ARM::AEK_I8MM, "+i8mm","-i8mm")
 ARM_ARCH_EXT_NAME("lob",  ARM::AEK_LOB,  "+lob",   "-lob")
 ARM_ARCH_EXT_NAME("cdecp0",   ARM::AEK_CDECP0,   "+cdecp0",  "-cdecp0")
 ARM_ARCH_EXT_NAME("cdecp1",   ARM::AEK_CDECP1,   "+cdecp1",  "-cdecp1")
Index:

[PATCH] D77872: [AArch32] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: t.p.northover.
Herald added subscribers: cfe-commits, danielkiss, hiraditya, kristof.beyls.
Herald added a project: clang.
LukeGeeson added a parent revision: D77871: [AArch64] Armv8.6-a Matrix Mult 
Assembly + Intrinsics.
LukeGeeson added a child revision: D77873: [AArch64] Armv8.6-A Mat Mul SVE 
Assembly.

This patch upstreams support for the Armv8.6-a Matrix Multiplication
Extension. A summary of the features can be found here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

This patch includes:

- Assembly support for AArch32
- Intrinsics Support for AArch32 Neon Intrinsics for Matrix Multiplication

Note: these extensions are optional in the 8.6a architecture and so have
to be enabled by default

No additional IR types or C Types are needed for this extension.

This is part of a patch series, starting with BFloat16 support and
the other components in the armv8.6a extension (in previous patches
linked in phabricator)

Based on work by:

- Luke Geeson
- Oliver Stannard
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77872

Files:
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/arm-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsARM.td
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/lib/Target/ARM/ARMPredicates.td
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/test/CodeGen/ARM/arm-matmul.ll

Index: llvm/test/CodeGen/ARM/arm-matmul.ll
===
--- /dev/null
+++ llvm/test/CodeGen/ARM/arm-matmul.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mtriple=arm-none-linux-gnu -mattr=+neon,+i8mm -float-abi=hard < %s -o -| FileCheck %s
+
+define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: smmla.v4i32.v16i8
+; CHECK:vsmmla.s8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: ummla.v4i32.v16i8
+; CHECK:vummla.u8   q0, q1, q2
+  %vmmla1.i = tail call <4 x i32> @llvm.arm.neon.ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vmmla1.i
+}
+
+define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) {
+entry:
+; CHECK-LABEL: usmmla.v4i32.v16i8
+; CHECK:vusmmla.s8   q0, q1, q2
+  %vusmmla1.i = tail call <4 x i32> @llvm.arm.neon.usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) #3
+  ret <4 x i32> %vusmmla1.i
+}
+
+define <2 x i32> @usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @usdot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdot_lane.v2i32.v8i8
+; CHECK:vusdot.s8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %1) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <2 x i32> @sudot_lane.v2i32.v8i8(<2 x i32> %r, <8 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudot_lane.v2i32.v8i8
+; CHECK:vsudot.u8   d0, d1, d2[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+  %1 = bitcast <2 x i32> %shuffle to <8 x i8>
+  %vusdot1.i = tail call <2 x i32> @llvm.arm.neon.usdot.v2i32.v8i8(<2 x i32> %r, <8 x i8> %1, <8 x i8> %a) #3
+  ret <2 x i32> %vusdot1.i
+}
+
+define <4 x i32> @usdotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: usdotq_lane.v4i32.v16i8
+; CHECK:vusdot.s8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail call <4 x i32> @llvm.arm.neon.usdot.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %1) #3
+  ret <4 x i32> %vusdot1.i
+}
+
+define <4 x i32> @sudotq_lane.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <8 x i8> %b) {
+entry:
+; CHECK-LABEL: sudotq_lane.v4i32.v16i8
+; CHECK:vsudot.u8   q0, q1, d4[0]
+  %0 = bitcast <8 x i8> %b to <2 x i32>
+  %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> zeroinitializer
+  %1 = bitcast <4 x i32> %shuffle to <16 x i8>
+  %vusdot1.i = tail

[PATCH] D77871: [AArch64] Armv8.6-a Matrix Mult Assembly + Intrinsics

2020-04-10 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: ostannard, t.p.northover.
Herald added subscribers: cfe-commits, danielkiss, hiraditya, kristof.beyls.
Herald added a reviewer: rengolin.
Herald added a project: clang.
LukeGeeson added a parent revision: D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul 
Asm and Intrinsics Support.
LukeGeeson added a child revision: D77872: [AArch32] Armv8.6-a Matrix Mult 
Assembly + Intrinsics.

This patch upstreams support for the Armv8.6-a Matrix Multiplication
Extension. A summary of the features can be found here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

This patch includes:

- Assembly support for AArch64 only (no SVE or Neon)
- Intrinsics Support for AArch64 Armv8.6a Matrix Multiplication Instructions 
(No bfloat16 matrix multiplication)

No IR types or C Types are needed for this extension.

This is part of a patch series, starting with BFloat16 support and
the other components in the armv8.6a extension (in previous patches
linked in phabricator)

Based on work by:

- Luke Geeson
- Oliver Stannard
- Luke Cheeseman


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D77871

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-matmul.cpp
  clang/test/CodeGen/aarch64-v8.6a-neon-intrinsics.c
  llvm/include/llvm/IR/IntrinsicsAArch64.td
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/test/CodeGen/AArch64/aarch64-matmul.ll
  llvm/test/MC/AArch64/armv8.6a-simd-matmul-error.s
  llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
  llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt

Index: llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
===
--- /dev/null
+++ llvm/test/MC/Disassembler/AArch64/armv8.6a-simd-matmul.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc -triple=aarch64  -mattr=+i8mm -disassemble < %s   | FileCheck %s
+# RUN: llvm-mc -triple=aarch64  -mattr=+v8.6a -disassemble < %s  | FileCheck %s
+# RUN: not llvm-mc -triple=aarch64  -mattr=+v8.5a -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOI8MM
+
+[0x01,0xa6,0x9f,0x4e]
+[0x01,0xa6,0x9f,0x6e]
+[0x01,0xae,0x9f,0x4e]
+# CHECK: smmla   v1.4s, v16.16b, v31.16b
+# CHECK: ummla   v1.4s, v16.16b, v31.16b
+# CHECK: usmmla  v1.4s, v16.16b, v31.16b
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-6]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0xe3,0x9d,0x9e,0x0e]
+[0xe3,0x9d,0x9e,0x4e]
+# CHECK: usdot   v3.2s, v15.8b, v30.8b
+# CHECK: usdot   v3.4s, v15.16b, v30.16b
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0xa2,0x0f]
+[0x3f,0xf8,0xa2,0x4f]
+# CHECK: usdot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: usdot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+
+[0x3f,0xf8,0x22,0x0f]
+[0x3f,0xf8,0x22,0x4f]
+# CHECK: sudot   v31.2s, v1.8b, v2.4b[3]
+# CHECK: sudot   v31.4s, v1.16b, v2.4b[3]
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
+# NOI8MM: [[@LINE-4]]:{{[0-9]+}}: warning: invalid instruction encoding
Index: llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
===
--- /dev/null
+++ llvm/test/MC/AArch64/armv8.6a-simd-matmul.s
@@ -0,0 +1,43 @@
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+i8mm   < %s  | FileCheck %s
+// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a  < %s  | FileCheck %s
+// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+v8.6a-i8mm < %s 2>&1 | FileCheck %s --check-prefix=NOMATMUL
+
+smmla  v1.4s, v16.16b, v31.16b
+ummla  v1.4s, v16.16b, v31.16b
+usmmla v1.4s, v16.16b, v31.16b
+// CHECK: smmla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x4e]
+// CHECK: ummla   v1.4s, v16.16b, v31.16b // encoding: [0x01,0xa6,0x9f,0x6e]
+// CHECK: usmmla  v1.4s, v16.16b, v31.16b // encoding: [0x01,0xae,0x9f,0x4e]
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: smmla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: ummla  v1.4s, v16.16b, v31.16b
+// NOMATMUL: instruction requires: i8mm
+// NOMATMUL-NEXT: usmmla  v1.4s, v16.16b, v31.16b
+
+usdot v3.2s, v15.8b, v30.8b
+usdot v3.4s, v15.16b, v30.16b
+// CHECK: usdot   v3.2s, v15.8b, v30.8b   // encoding: [0xe3,0x9d,0x9e,0x0e]
+// CHECK: usdot   v3.4s, v15.16b, v30.16b // encoding:

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-08 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D77540#1969873 , @fhahn wrote:

> This patch is quite big and I think it would be easier to review if it would 
> be split up into distinct clang/llvm parts and maybe NEON/SVE parts on the 
> LLVM side.

That's a good suggestion, I shouldn't have pushed my luck!

Luckily this patch doesn't introduce any new C types or IR types and so the 
frontend code is light, relatively speaking. That said, there is so little 
intrinsics work needed here that it might make sense to split the subsequent 
patches up to include intrinsics out-of-the-box as AArch64 asm+intrinsics, 
AArch32 asm+intrinsics, AArch64SVE asm + intrinsics etc... instead.

Does that sound ok?

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76062: [PATCH] [ARM] ARMv8.6-a command-line + BFloat16 Asm Support

2020-04-08 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

apologies please ignore adding here, added reviewers to the wrong diff


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76062/new/

https://reviews.llvm.org/D76062



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-08 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked 2 inline comments as done.
LukeGeeson added inline comments.



Comment at: clang/lib/Basic/Targets/AArch64.cpp:284
+  if (HasMatMul)
+Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1");
+

DavidSpickett wrote:
> I don't see specific tests for this #define
test added in clang/test/CodeGen/aarch64-matmul.cpp


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-08 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added inline comments.



Comment at: clang/lib/Basic/Targets/AArch64.h:39
   bool HasTME;
+  unsigned HasMatMul;
 

DavidSpickett wrote:
> Why is this feature a number for AArch64, does >1 mean something?
It seems this was an artifact from how this is handled in ARM.h where they do 
have special meaning. I'll modify this to be boolean in line with the others in 
AArch64.h


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D77540: [PATCH] [ARM]: Armv8.6-a Matrix Mul Asm and Intrinsics Support

2020-04-06 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

build failures related to linting, unit tests passing


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77540/new/

https://reviews.llvm.org/D77540



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-03-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 250267.
LukeGeeson added a comment.

Added a file from a downstream cleanup of the branch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang-c/Index.h
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/BuiltinTypes.def
  clang/include/clang/AST/Type.h
  clang/include/clang/Basic/Specifiers.h
  clang/include/clang/Basic/TargetBuiltins.h
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Basic/TokenKinds.def
  clang/include/clang/Sema/DeclSpec.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/AST/NSAPI.cpp
  clang/lib/AST/PrintfFormatString.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypeLoc.cpp
  clang/lib/Basic/TargetInfo.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/lib/CodeGen/CodeGenTypes.cpp
  clang/lib/CodeGen/ItaniumCXXABI.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Format/FormatToken.cpp
  clang/lib/Index/USRGeneration.cpp
  clang/lib/Parse/ParseDecl.cpp
  clang/lib/Parse/ParseExpr.cpp
  clang/lib/Parse/ParseExprCXX.cpp
  clang/lib/Parse/ParseTentative.cpp
  clang/lib/Sema/DeclSpec.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/lib/Sema/SemaTemplateVariadic.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Serialization/ASTCommon.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/test/CodeGen/arm-mangle-16bit-float.cpp
  clang/test/Sema/arm-bfloat.cpp
  clang/tools/libclang/CXType.cpp

Index: clang/tools/libclang/CXType.cpp
===
--- clang/tools/libclang/CXType.cpp
+++ clang/tools/libclang/CXType.cpp
@@ -607,6 +607,7 @@
 TKIND(Elaborated);
 TKIND(Pipe);
 TKIND(Attributed);
+TKIND(Bfloat16);
 #define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) TKIND(Id);
 #include "clang/Basic/OpenCLImageTypes.def"
 #undef IMAGE_TYPE
Index: clang/test/Sema/arm-bfloat.cpp
===
--- /dev/null
+++ clang/test/Sema/arm-bfloat.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 \
+// RUN: -triple aarch64-arm-none-eabi -target-cpu cortex-a75 \
+// RUN: -target-feature +bf16 -target-feature +neon %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 \
+// RUN: -triple arm-arm-none-eabi -target-cpu cortex-a53 \
+// RUN: -target-feature +bf16 -target-feature +neon %s
+
+void test(bool b) {
+  __bf16 bf16;
+
+  bf16 + bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 - bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 * bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 / bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+
+  __fp16 fp16;
+
+  bf16 + fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 + bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 - fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 - bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 * fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 * bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 / fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 / bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 = fp16; // expected-error {{assigning to '__bf16' from incompatible type '__fp16'}}
+  fp16 = bf16; // expected-error {{assigning to '__fp16' from incompatible type '__bf16'}}
+  bf16 + (b ? fp16 : bf16); // expected-error {{incompatible operand types ('__fp16' and '__bf16')}}
+}
+
+#include 
+
+void test_vector(bfloat16x4_t a, bfloat16x4_t b, float16x4_t c) {
+  a + b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a - b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a * b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a / b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+
+  a +

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-03-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D76077#1919861 , @rjmccall wrote:

> I don't understand why you wouldn't add a new IR type for this; doing so 
> should be totally mechanical.

We had a few reasons for doing it this way.

By adding a new IR type we would need to consider calling conventions, and IR 
optimizations for what is essentially an opaque storage-only type.

Bfloat has no soft ABI, and all the supported operations can be handled through 
intrinsics (in a later patch, removed here due to bloat). If we were to add a 
new IR type, then we would need to handle many operations which would extend 
beyond what the type is supported for. For instance in GCC we had to add a new 
mode in RTL to handle inline memcopy.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-03-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added inline comments.



Comment at: clang/test/CodeGen/arm-mangle-16bit-float.cpp:4
+
+// CHECK64: define {{.*}}void @_Z3foou6__bf16(half %b)
+// CHECK32: define {{.*}}void @_Z3foou6__bf16(i32 %b.coerce)

craig.topper wrote:
> How can bfloat16 be passed as half? Don't they have a different format?
see the above comment about soft-abis


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-03-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

In D76077#1919897 , @rjmccall wrote:

> Note that we have an IR type for the PPC double-double format, which isn't 
> even hardware-supported.  This is literally just an IEEE floating-point 
> format with non-standard parameters, right?


Indeed yeah, the details are laid out in a blog post here: 
https://community.arm.com/developer/ip-products/processors/b/ml-ip-blog/posts/bfloat16-processing-for-neural-networks-on-armv8_2d00_a


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76077/new/

https://reviews.llvm.org/D76077



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D76077: [ARM] Add __bf16 as new Bfloat16 C Type

2020-03-12 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: SjoerdMeijer, stuij, rjmccall, rsmith, liutianle, 
RKSimon, craig.topper.
Herald added subscribers: cfe-commits, danielkiss, arphaman, kristof.beyls.
Herald added a project: clang.
LukeGeeson added a parent revision: D76062: [PATCH] [ARM] ARMv8.6-a 
command-line + BFloat16 Asm Support.

This patch upstreams support for a new storage only bfloat16 C type.
This type is used to implement primitive support for bfloat16 data, in
line with the Bfloat16 extension of the Armv8.6-a architecture, as
detailed here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

The bfloat type, and its properties is specified in the Arm C language
extension specification:

https://developer.arm.com/docs/ihi0055/d/procedure-call-standard-for-the-arm-64-bit-architecture

In detail this patch:

- introduces an opaque, storage-only C-type __bf16, which does not introduce a 
new LLVM IR type, but maps it to either i16 or half type.

This is part of a patch series, starting with command-line and Bfloat16
assembly support. The subsequent patches will upstream intrinsics
support for BFloat16, followed by Matrix Multiplication and the
remaining Virtualization features of the armv8.6-a architecture.

Based on work by:

- Luke Cheeseman
- Momchil Velikov
- labrinea


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76077

Files:
  clang/docs/LanguageExtensions.rst
  clang/include/clang-c/Index.h
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/BuiltinTypes.def
  clang/include/clang/AST/Type.h
  clang/include/clang/Basic/Specifiers.h
  clang/include/clang/Basic/TargetBuiltins.h
  clang/include/clang/Basic/TargetInfo.h
  clang/include/clang/Basic/TokenKinds.def
  clang/include/clang/Sema/DeclSpec.h
  clang/include/clang/Serialization/ASTBitCodes.h
  clang/lib/AST/ASTContext.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/AST/NSAPI.cpp
  clang/lib/AST/PrintfFormatString.cpp
  clang/lib/AST/Type.cpp
  clang/lib/AST/TypeLoc.cpp
  clang/lib/Basic/TargetInfo.cpp
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Basic/Targets/ARM.cpp
  clang/lib/Basic/Targets/ARM.h
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/CodeGen/CGDebugInfo.cpp
  clang/lib/CodeGen/CodeGenTypes.cpp
  clang/lib/CodeGen/ItaniumCXXABI.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Format/FormatToken.cpp
  clang/lib/Index/USRGeneration.cpp
  clang/lib/Parse/ParseDecl.cpp
  clang/lib/Parse/ParseExpr.cpp
  clang/lib/Parse/ParseExprCXX.cpp
  clang/lib/Parse/ParseTentative.cpp
  clang/lib/Sema/DeclSpec.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/lib/Sema/SemaOverload.cpp
  clang/lib/Sema/SemaTemplateVariadic.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Serialization/ASTCommon.cpp
  clang/lib/Serialization/ASTReader.cpp
  clang/test/CodeGen/arm-mangle-16bit-float.cpp
  clang/test/Sema/arm-bfloat.cpp

Index: clang/test/Sema/arm-bfloat.cpp
===
--- /dev/null
+++ clang/test/Sema/arm-bfloat.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 \
+// RUN: -triple aarch64-arm-none-eabi -target-cpu cortex-a75 \
+// RUN: -target-feature +bf16 -target-feature +neon %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 \
+// RUN: -triple arm-arm-none-eabi -target-cpu cortex-a53 \
+// RUN: -target-feature +bf16 -target-feature +neon %s
+
+void test(bool b) {
+  __bf16 bf16;
+
+  bf16 + bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 - bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 * bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+  bf16 / bf16; // expected-error {{invalid operands to binary expression ('__bf16' and '__bf16')}}
+
+  __fp16 fp16;
+
+  bf16 + fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 + bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 - fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 - bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 * fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 * bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 / fp16; // expected-error {{invalid operands to binary expression ('__bf16' and '__fp16')}}
+  fp16 / bf16; // expected-error {{invalid operands to binary expression ('__fp16' and '__bf16')}}
+  bf16 = fp16; // expected-error {{assigning to '__bf16' from incompatible type '__fp16'}}
+  fp16 = bf16; // expected-error {{assigning to '__fp16' from

[PATCH] D76062: [PATCH] [ARM] ARMv8.6-a command-line + BFloat16 Asm Support

2020-03-12 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added reviewers: SjoerdMeijer, craig.topper, rjmccall.
Herald added subscribers: llvm-commits, cfe-commits, danielkiss, dexonsmith, 
hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.
LukeGeeson edited the summary of this revision.
LukeGeeson edited the summary of this revision.

This patch introduces command-line support for the Armv8.6-a architecture and 
assembly support for BFloat16. Details can be found here:

community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a




in addition to the GCC patch for the 8..6-a CLI:

  
https://gcc.gnu.org/legacy-ml/gcc-patches/2019-11/msg02647.html

In detail this patch

- march options for armv8.6-a
- BFloat16 assembly

This is part of a patch series, starting with command-line and Bfloat16 

  
assembly support. The subsequent patches will upstream intrinsics   

  
support for BFloat16, followed by Matrix Multiplication and the 

  
remaining Virtualization features of the armv8.6-a architecture.

Based on work by:

- labrinea
- MarkMurrayARM
- Luke Cheeseman
- Javed Asbar
- Mikhail Maltsev


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76062

Files:
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/Basic/Targets/AArch64.h
  clang/lib/Basic/Targets/ARM.cpp
  clang/test/Driver/aarch64-cpus.c
  clang/test/Driver/arm-cortex-cpus.c
  clang/test/Preprocessor/arm-target-features.c
  llvm/include/llvm/ADT/Triple.h
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/include/llvm/Support/AArch64TargetParser.h
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/include/llvm/Support/ARMTargetParser.h
  llvm/lib/Support/AArch64TargetParser.cpp
  llvm/lib/Support/ARMTargetParser.cpp
  llvm/lib/Support/Triple.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/lib/Target/AArch64/AArch64InstrInfo.td
  llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
  llvm/lib/Target/AArch64/AArch64Subtarget.h
  llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
  llvm/lib/Target/AArch64/SVEInstrFormats.td
  llvm/lib/Target/ARM/ARM.td
  llvm/lib/Target/ARM/ARMInstrNEON.td
  llvm/lib/Target/ARM/ARMInstrVFP.td
  llvm/lib/Target/ARM/ARMPredicates.td
  llvm/lib/Target/ARM/ARMSubtarget.h
  llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
  llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
  llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s
  llvm/test/MC/AArch64/SVE/bfcvt.s
  llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
  llvm/test/MC/AArch64/SVE/bfcvtnt.s
  llvm/test/MC/AArch64/SVE/bfdot-diagnostics.s
  llvm/test/MC/AArch64/SVE/bfdot.s
  llvm/test/MC/AArch64/SVE/bfmlal-diagnostics.s
  llvm/test/MC/AArch64/SVE/bfmlal.s
  llvm/test/MC/AArch64/SVE/bfmmla-diagnostics.s
  llvm/test/MC/AArch64/SVE/bfmmla.s
  llvm/test/MC/AArch64/armv8.6a-bf16.s
  llvm/test/MC/ARM/bfloat16-a32-errors.s
  llvm/test/MC/ARM/bfloat16-a32-errors2.s
  llvm/test/MC/ARM/bfloat16-a32.s
  llvm/test/MC/ARM/bfloat16-t32-errors.s
  llvm/test/MC/ARM/bfloat16-t32.s
  llvm/test/MC/Disassembler/AArch64/armv8.6a-bf16.txt
  llvm/test/MC/Disassembler/ARM/bfloat16-a32_1.txt
  llvm/test/MC/Disassembler/ARM/bfloat16-a32_2.txt
  llvm/test/MC/Disassembler/ARM/bfloat16-t32.txt
  llvm/test/MC/Disassembler/ARM/bfloat16-t32_errors.txt
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -26,9 +26,9 @@
 "armv7e-m","armv7em",  "armv8-a", "armv8","armv8a",
 "armv8l",  "armv8.1-a","armv8.1a","armv8.2-a","armv8.2a",
 "armv8.3-a",   "armv8.3a", "armv8.4-a",   "armv8.4a", "armv8.5-a",
-"armv8.5a", "armv8-r", "armv8r",  "armv8-m.base", "armv8m.base",
-"armv8-m.main", "armv8m.main", "iwmmxt",  "iwmmxt2",  "xscale",
-"armv8.1-m.main",
+"armv8.5a", "armv8.6-a",   "armv8.6a", "armv8-r", "armv8r",
+"armv8-m.base", "armv8m.base", "armv8-m.main", "armv8m.main", "iwmmxt",
+"iwmmxt2",  "xscale",  "armv8.1-m.main",
 };
 
 bool testARMCPU(StringRef CPUName, StringRef

[PATCH] D74966: [PATCH] [ARM] Add Cortex-M55 Support for clang and llvm

2020-03-02 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
Closed by commit rG7d594cf003d1: [ARM] Add Cortex-M55 Support for clang and 
llvm (authored by LukeGeeson).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74966/new/

https://reviews.llvm.org/D74966

Files:
  clang/test/CodeGen/arm-target-features.c
  clang/test/Driver/arm-cortex-cpus.c
  clang/test/Preprocessor/arm-target-features.c
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/ARM/ARM.td
  llvm/test/CodeGen/ARM/build-attributes.ll
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -290,6 +290,11 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
   EXPECT_TRUE(testARMCPU("cortex-m35p", "armv8-m.main", "fpv5-sp-d16",
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
+  EXPECT_TRUE(testARMCPU("cortex-m55", "armv8.1-m.main", "fp-armv8-fullfp16-d16",
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_SIMD |
+ ARM::AEK_FP | ARM::AEK_RAS | ARM::AEK_LOB |
+ ARM::AEK_FP16,
+"8.1-M.Mainline"));
   EXPECT_TRUE(testARMCPU("iwmmxt", "iwmmxt", "none",
  ARM::AEK_NONE, "iwmmxt"));
   EXPECT_TRUE(testARMCPU("xscale", "xscale", "none",
@@ -299,7 +304,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 85;
+static constexpr unsigned NumARMCPUArchs = 86;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
Index: llvm/test/CodeGen/ARM/build-attributes.ll
===
--- llvm/test/CodeGen/ARM/build-attributes.ll
+++ llvm/test/CodeGen/ARM/build-attributes.ll
@@ -233,6 +233,7 @@
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi | FileCheck %s --check-prefix=ARMv81M-MAIN
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEINT
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEFP
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-m55 | FileCheck %s --check-prefix=CORTEX-M55
 
 ; CPU-SUPPORTED-NOT: is not a recognized processor for this target
 
@@ -1722,6 +1723,28 @@
 ; ARMv81M-MAIN-MVEINT: .eabi_attribute 48, 1 @ Tag_MVE_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 6, 21 @ Tag_CPU_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 48, 2 @ Tag_MVE_arch
+
+; CORTEX-M55: .cpu cortex-m55
+; CORTEX-M55: .eabi_attribute 6, 21
+; CORTEX-M55: .eabi_attribute 7, 77
+; CORTEX-M55: .eabi_attribute 8, 0
+; CORTEX-M55: .eabi_attribute 9, 3
+; CORTEX-M55: .fpu fpv5-d16
+; CORTEX-M55: .eabi_attribute 36, 1
+; CORTEX-M55-NOT: .eabi_attribute 44
+; CORTEX-M55: .eabi_attribute 46, 1
+; CORTEX-M55: .eabi_attribute 34, 1
+; CORTEX-M55: .eabi_attribute 17, 1
+; CORTEX-M55-NOT: .eabi_attribute 19
+; CORTEX-M55: .eabi_attribute 20, 1
+; CORTEX-M55: .eabi_attribute 21, 1
+; CORTEX-M55: .eabi_attribute 23, 3
+; CORTEX-M55: .eabi_attribute 24, 1
+; CORTEX-M55: .eabi_attribute 25, 1
+; CORTEX-M55-NOT: .eabi_attribute 28
+; CORTEX-M55: .eabi_attribute 38, 1
+; CORTEX-M55: .eabi_attribute 14, 0
+
 define i32 @f(i64 %z) {
 ret i32 0
 }
Index: llvm/lib/Target/ARM/ARM.td
===
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -1124,6 +1124,14 @@
  FeatureUseMISched,
  FeatureHasNoBranchPredictor]>;
 
+def : ProcessorModel<"cortex-m55", CortexM4Model,  [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched,
+ FeatureHasNoBranchPredictor,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ HasMVEFloatOps]>;
 
 def : ProcNoItin<"cortex-a32",   [ARMv8a,
  FeatureHWDivThumb,
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -192,6 +192,7 @@
 .Case("0xc20", "cortex-m0")
 .Case("0xc23", "cortex-m3")
 .Case("0xc24", "cortex-m4")
+

[PATCH] D74966: [PATCH] [ARM] Add Cortex-M55 Support for clang and llvm

2020-02-25 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 246427.
LukeGeeson marked an inline comment as done.
LukeGeeson added a comment.

- Addressed dmgreen's comments (removed whitespace, added m55 test)
- Added CPU Part number to Host.cpp


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74966/new/

https://reviews.llvm.org/D74966

Files:
  clang/test/CodeGen/arm-target-features.c
  clang/test/Driver/arm-cortex-cpus.c
  clang/test/Preprocessor/arm-target-features.c
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/ARM/ARM.td
  llvm/test/CodeGen/ARM/build-attributes.ll
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -290,6 +290,11 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
   EXPECT_TRUE(testARMCPU("cortex-m35p", "armv8-m.main", "fpv5-sp-d16",
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
+  EXPECT_TRUE(testARMCPU("cortex-m55", "armv8.1-m.main", "fp-armv8-fullfp16-d16",
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_SIMD |
+ ARM::AEK_FP | ARM::AEK_RAS | ARM::AEK_LOB |
+ ARM::AEK_FP16,
+"8.1-M.Mainline"));
   EXPECT_TRUE(testARMCPU("iwmmxt", "iwmmxt", "none",
  ARM::AEK_NONE, "iwmmxt"));
   EXPECT_TRUE(testARMCPU("xscale", "xscale", "none",
@@ -299,7 +304,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 85;
+static constexpr unsigned NumARMCPUArchs = 86;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
Index: llvm/test/CodeGen/ARM/build-attributes.ll
===
--- llvm/test/CodeGen/ARM/build-attributes.ll
+++ llvm/test/CodeGen/ARM/build-attributes.ll
@@ -233,6 +233,7 @@
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi | FileCheck %s --check-prefix=ARMv81M-MAIN
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEINT
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEFP
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-m55 | FileCheck %s --check-prefix=CORTEX-M55
 
 ; CPU-SUPPORTED-NOT: is not a recognized processor for this target
 
@@ -1722,6 +1723,28 @@
 ; ARMv81M-MAIN-MVEINT: .eabi_attribute 48, 1 @ Tag_MVE_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 6, 21 @ Tag_CPU_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 48, 2 @ Tag_MVE_arch
+
+; CORTEX-M55: .cpu cortex-m55
+; CORTEX-M55: .eabi_attribute 6, 21
+; CORTEX-M55: .eabi_attribute 7, 77
+; CORTEX-M55: .eabi_attribute 8, 0
+; CORTEX-M55: .eabi_attribute 9, 3
+; CORTEX-M55: .fpu fpv5-d16
+; CORTEX-M55: .eabi_attribute 36, 1
+; CORTEX-M55-NOT: .eabi_attribute 44
+; CORTEX-M55: .eabi_attribute 46, 1
+; CORTEX-M55: .eabi_attribute 34, 1
+; CORTEX-M55: .eabi_attribute 17, 1
+; CORTEX-M55-NOT: .eabi_attribute 19
+; CORTEX-M55: .eabi_attribute 20, 1
+; CORTEX-M55: .eabi_attribute 21, 1
+; CORTEX-M55: .eabi_attribute 23, 3
+; CORTEX-M55: .eabi_attribute 24, 1
+; CORTEX-M55: .eabi_attribute 25, 1
+; CORTEX-M55-NOT: .eabi_attribute 28
+; CORTEX-M55: .eabi_attribute 38, 1
+; CORTEX-M55: .eabi_attribute 14, 0
+
 define i32 @f(i64 %z) {
 ret i32 0
 }
Index: llvm/lib/Target/ARM/ARM.td
===
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -1124,6 +1124,14 @@
  FeatureUseMISched,
  FeatureHasNoBranchPredictor]>;
 
+def : ProcessorModel<"cortex-m55", CortexM4Model,  [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched,
+ FeatureHasNoBranchPredictor,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ HasMVEFloatOps]>;
 
 def : ProcNoItin<"cortex-a32",   [ARMv8a,
  FeatureHWDivThumb,
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -192,6 +192,7 @@
 .Case("0xc20", "cortex-m0")
 .Case("0xc23", "cortex-m3")
 .Case("0xc24", "cortex-m4")
+

[PATCH] D74966: [PATCH] [ARM] Add Cortex-M55 Support for clang and llvm

2020-02-21 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.

This patch upstreams support for the ARM Armv8.1m cpu Cortex-M55.

In detail adding support for:

- mcpu option in clang
- Arm Target Features in clang
- llvm Arm TargetParser definitions

details of the CPU can be found here:
https://developer.arm.com/ip-products/processors/cortex-m/cortex-m55


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74966

Files:
  clang/test/CodeGen/arm-target-features.c
  clang/test/Driver/arm-cortex-cpus.c
  clang/test/Preprocessor/arm-target-features.c
  llvm/include/llvm/Support/ARMTargetParser.def
  llvm/lib/Target/ARM/ARM.td
  llvm/test/CodeGen/ARM/build-attributes.ll
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -290,6 +290,11 @@
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
   EXPECT_TRUE(testARMCPU("cortex-m35p", "armv8-m.main", "fpv5-sp-d16",
  ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP, "8-M.Mainline"));
+  EXPECT_TRUE(testARMCPU("cortex-m55", "armv8.1-m.main", "fp-armv8-fullfp16-d16",
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_SIMD |
+ ARM::AEK_FP | ARM::AEK_RAS | ARM::AEK_LOB |
+ ARM::AEK_FP16,
+"8.1-M.Mainline"));
   EXPECT_TRUE(testARMCPU("iwmmxt", "iwmmxt", "none",
  ARM::AEK_NONE, "iwmmxt"));
   EXPECT_TRUE(testARMCPU("xscale", "xscale", "none",
@@ -299,7 +304,7 @@
  "7-S"));
 }
 
-static constexpr unsigned NumARMCPUArchs = 85;
+static constexpr unsigned NumARMCPUArchs = 86;
 
 TEST(TargetParserTest, testARMCPUArchList) {
   SmallVector List;
Index: llvm/test/CodeGen/ARM/build-attributes.ll
===
--- llvm/test/CodeGen/ARM/build-attributes.ll
+++ llvm/test/CodeGen/ARM/build-attributes.ll
@@ -233,6 +233,7 @@
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi | FileCheck %s --check-prefix=ARMv81M-MAIN
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEINT
 ; RUN: llc < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp | FileCheck %s --check-prefix=ARMv81M-MAIN-MVEFP
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-m55 | FileCheck %s --check-prefix=CORTEX-M55
 
 ; CPU-SUPPORTED-NOT: is not a recognized processor for this target
 
@@ -1722,6 +1723,9 @@
 ; ARMv81M-MAIN-MVEINT: .eabi_attribute 48, 1 @ Tag_MVE_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 6, 21 @ Tag_CPU_arch
 ; ARMv81M-MAIN-MVEFP: .eabi_attribute 48, 2 @ Tag_MVE_arch
+
+; CORTEX-M55: .cpu cortex-m55
+
 define i32 @f(i64 %z) {
 ret i32 0
 }
Index: llvm/lib/Target/ARM/ARM.td
===
--- llvm/lib/Target/ARM/ARM.td
+++ llvm/lib/Target/ARM/ARM.td
@@ -622,7 +622,6 @@
 def ProcM3  : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
"Cortex-M3 ARM processors", []>;
 
-
 //===--===//
 // ARM Helper classes.
 //
@@ -1124,6 +1123,14 @@
  FeatureUseMISched,
  FeatureHasNoBranchPredictor]>;
 
+def : ProcessorModel<"cortex-m55", CortexM4Model,  [ARMv81mMainline,
+ FeatureDSP,
+ FeatureFPARMv8_D16,
+ FeatureUseMISched,
+ FeatureHasNoBranchPredictor,
+ FeaturePrefLoopAlign32,
+ FeatureHasSlowFPVMLx,
+ HasMVEFloatOps]>;
 
 def : ProcNoItin<"cortex-a32",   [ARMv8a,
  FeatureHWDivThumb,
Index: llvm/include/llvm/Support/ARMTargetParser.def
===
--- llvm/include/llvm/Support/ARMTargetParser.def
+++ llvm/include/llvm/Support/ARMTargetParser.def
@@ -268,6 +268,8 @@
 ARM_CPU_NAME("cortex-m23", ARMV8MBaseline, FK_NONE, false, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m33", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
 ARM_CPU_NAME("cortex-m35p", ARMV8MMainline, FK_FPV5_SP_D16, false, ARM::AEK_DSP)
+ARM_CPU_NAME("cortex-m55", ARMV8_1MMainline, FK_FP_ARMV8_FULLFP16_D16, false,
+

[PATCH] D74483: [AArch64] Add Cortex-A34 Support for clang and llvm

2020-02-18 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson closed this revision.
LukeGeeson marked an inline comment as not done.
LukeGeeson added inline comments.



Comment at: llvm/unittests/Support/TargetParserTest.cpp:784
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a34", "armv8-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_SIMD |

SjoerdMeijer wrote:
> nit: this looks the same as the a35. Would be better to keep the same order 
> of arch extension and also the formatting (to make it easier to eyeball 
> differences).
Good spot thanks, fixed.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74483/new/

https://reviews.llvm.org/D74483



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D74483: [AArch64] Add Cortex-A34 Support for clang and llvm

2020-02-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson updated this revision to Diff 244377.
LukeGeeson marked an inline comment as done.
LukeGeeson added a comment.

- Added MIDR to Host.cpp
- Fixed a clang test I missed (copy/paste error)


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74483/new/

https://reviews.llvm.org/D74483

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Preprocessor/aarch64-target-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/lib/Support/Host.cpp
  llvm/lib/Target/AArch64/AArch64.td
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -781,6 +781,10 @@
   AArch64::AEK_NONE, ""));
 
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a34", "armv8-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
+  AArch64::AEK_SIMD, "8-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cortex-a35", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
   AArch64::AEK_SIMD, "8-A"));
@@ -957,7 +961,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 35;
+static constexpr unsigned NumAArch64CPUArchs = 36;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
@@ -1002,6 +1006,8 @@
 }
 
 TEST(TargetParserTest, testAArch64Extension) {
+  EXPECT_FALSE(testAArch64Extension("cortex-a34",
+AArch64::ArchKind::INVALID, "ras"));
   EXPECT_FALSE(testAArch64Extension("cortex-a35",
 AArch64::ArchKind::INVALID, "ras"));
   EXPECT_FALSE(testAArch64Extension("cortex-a53",
Index: llvm/test/CodeGen/AArch64/remat.ll
===
--- llvm/test/CodeGen/AArch64/remat.ll
+++ llvm/test/CodeGen/AArch64/remat.ll
@@ -1,3 +1,4 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a34 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a35 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a55 -o - %s | FileCheck %s
Index: llvm/test/CodeGen/AArch64/cpus.ll
===
--- llvm/test/CodeGen/AArch64/cpus.ll
+++ llvm/test/CodeGen/AArch64/cpus.ll
@@ -3,6 +3,7 @@
 
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a34 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a55 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
Index: llvm/lib/Target/AArch64/AArch64.td
===
--- llvm/lib/Target/AArch64/AArch64.td
+++ llvm/lib/Target/AArch64/AArch64.td
@@ -853,6 +853,7 @@
  ]>;
 
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
Index: llvm/lib/Support/Host.cpp
===
--- llvm/lib/Support/Host.cpp
+++ llvm/lib/Support/Host.cpp
@@ -178,6 +178,8 @@
 // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
 // values correspond to the "Part number" in the CP15/c0 register. The
 // contents are specified in the various processor manuals.
+// This corresponds to the Main ID Register in Technical Reference Manuals.
+// and is used in programs like sys-utils
 return StringSwitch(Lines[I].substr(8).ltrim("\t :"))
 .Case("0x926", "arm926ej-s")
 .Case("0xb02", "mpcore")
@@ -190,6 +192,7 @@
 .Case("0xc20", "cortex-m0")
 .Case("0xc23", "cortex-m3")
 .Case("0xc24", "cortex-m4")
+.Case("0xd02", "cortex-a34")
 .Case("0xd04", "cortex-a35")
 .Case("0xd03", "cortex-a53")
 .Case("0xd07", "cortex-a57")
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -85,6 +85,8 @@
 #ifndef AARCH64_CPU_NAME
 #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU,

[PATCH] D74483: [AArch64] Add Cortex-A34 Support for clang and llvm

2020-02-12 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
Herald added subscribers: llvm-commits, cfe-commits, hiraditya, kristof.beyls.
Herald added projects: clang, LLVM.

This patch upstreams support for the AArch64 Armv8-A cpu Cortex-A34.


   

In detail adding support for:

- mcpu option in clang
- AArch64 Target Features in clang
- llvm AArch64 TargetParser definitions

  details of the cpu can be found here: 
https://developer.arm.com/ip-products/processors/cortex-a/cortex-a34


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74483

Files:
  clang/test/Driver/aarch64-cpus.c
  clang/test/Preprocessor/aarch64-target-features.c
  llvm/include/llvm/Support/AArch64TargetParser.def
  llvm/lib/Target/AArch64/AArch64.td
  llvm/test/CodeGen/AArch64/cpus.ll
  llvm/test/CodeGen/AArch64/remat.ll
  llvm/unittests/Support/TargetParserTest.cpp

Index: llvm/unittests/Support/TargetParserTest.cpp
===
--- llvm/unittests/Support/TargetParserTest.cpp
+++ llvm/unittests/Support/TargetParserTest.cpp
@@ -781,6 +781,11 @@
   AArch64::AEK_NONE, ""));
 
   EXPECT_TRUE(testAArch64CPU(
+  "cortex-a34", "armv8-a", "crypto-neon-fp-armv8",
+  AArch64::AEK_CRC | AArch64::AEK_SIMD |
+  AArch64::AEK_FP | AArch64::AEK_CRYPTO,
+  "8-A"));
+  EXPECT_TRUE(testAArch64CPU(
   "cortex-a35", "armv8-a", "crypto-neon-fp-armv8",
   AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
   AArch64::AEK_SIMD, "8-A"));
@@ -957,7 +962,7 @@
   "8.2-A"));
 }
 
-static constexpr unsigned NumAArch64CPUArchs = 35;
+static constexpr unsigned NumAArch64CPUArchs = 36;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector List;
@@ -1002,6 +1007,8 @@
 }
 
 TEST(TargetParserTest, testAArch64Extension) {
+  EXPECT_FALSE(testAArch64Extension("cortex-a34",
+AArch64::ArchKind::INVALID, "ras"));
   EXPECT_FALSE(testAArch64Extension("cortex-a35",
 AArch64::ArchKind::INVALID, "ras"));
   EXPECT_FALSE(testAArch64Extension("cortex-a53",
Index: llvm/test/CodeGen/AArch64/remat.ll
===
--- llvm/test/CodeGen/AArch64/remat.ll
+++ llvm/test/CodeGen/AArch64/remat.ll
@@ -1,3 +1,4 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a34 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a35 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a55 -o - %s | FileCheck %s
Index: llvm/test/CodeGen/AArch64/cpus.ll
===
--- llvm/test/CodeGen/AArch64/cpus.ll
+++ llvm/test/CodeGen/AArch64/cpus.ll
@@ -3,6 +3,7 @@
 
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a34 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a55 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
Index: llvm/lib/Target/AArch64/AArch64.td
===
--- llvm/lib/Target/AArch64/AArch64.td
+++ llvm/lib/Target/AArch64/AArch64.td
@@ -853,6 +853,7 @@
  ]>;
 
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a55", CortexA53Model, [ProcA55]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
Index: llvm/include/llvm/Support/AArch64TargetParser.def
===
--- llvm/include/llvm/Support/AArch64TargetParser.def
+++ llvm/include/llvm/Support/AArch64TargetParser.def
@@ -85,6 +85,8 @@
 #ifndef AARCH64_CPU_NAME
 #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)
 #endif
+AARCH64_CPU_NAME("cortex-a34", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a35", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
  (AArch64::AEK_CRC))
 AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
Index: clang/test/Preprocessor/aarch64-target-features.c
===
--- clang/test/Preprocessor/aarch64-target-features.c
+++ clang/test/Preprocessor/aarch64-target-features.c
@@ -152,6 +152,7 @@
 // RUN: %clang -target aarch64 -mcpu=apple-s4 -### -c %s 2>&1 | FileCheck

[PATCH] D60272: [Aarch64] Add v8.2-a half precision element extract intrinsics

2019-04-11 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson added a comment.

couple of nits, besides from that LGTM




Comment at: lib/CodeGen/CGBuiltin.cpp:7813
   }
+  case NEON::BI__builtin_neon_vduph_lane_f16:{
+return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),

nit: spacing between : and { 



Comment at: lib/CodeGen/CGBuiltin.cpp:7817
+  }
+  case NEON::BI__builtin_neon_vduph_laneq_f16:{
+return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),

nit: spacing again


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D60272/new/

https://reviews.llvm.org/D60272



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D48119: [AArch64] Added support for the vcvta_u16_f16 instrinsic for FP16 Armv8.2-A

2018-06-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked an inline comment as done.
LukeGeeson added a comment.

address Nit's and code comments, please see above.




Comment at: CodeGen/arm-v8.2a-neon-intrinsics.c:170
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+   return vcvta_u16_f16(a);

SjoerdMeijer wrote:
> Is this exactly the same test also added in aarch64-v8.2a-neon-intrinsics.c?
Not that I'm aware. One is for AArch platforms and the other is Arm. see the 
second CHECK line of each


Repository:
  rC Clang

https://reviews.llvm.org/D48119



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D48119: [AArch64] Added Clang Codegen+Test Support for FP16 VCVTA_U16 intrinsic

2018-06-13 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: SjoerdMeijer.
Herald added a reviewer: javed.absar.
Herald added a subscriber: kristof.beyls.

-Added support for existing IP for vcvta_u16_f16 instrinsic for Arm v8.2a 
-CGBuiltin simply adds cases for this intrinsic to gen the clang IR
-tests added for Aarch64 and Arm test suites
-tested and works using ninja check


Repository:
  rC Clang

https://reviews.llvm.org/D48119

Files:
  CodeGen/CGBuiltin.cpp
  CodeGen/aarch64-v8.2a-neon-intrinsics.c
  CodeGen/arm-v8.2a-neon-intrinsics.c


Index: CodeGen/arm-v8.2a-neon-intrinsics.c
===
--- CodeGen/arm-v8.2a-neon-intrinsics.c
+++ CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -164,6 +164,13 @@
   return vcvta_s16_f16(a);
 }
 
+// CHECK-LABEL: test_vcvta_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.fcvtau.v4i16.v4f16(<4 
x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+   return vcvta_u16_f16(a);
+}
+
 // CHECK-LABEL: test_vcvtaq_s16_f16
 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 
x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
Index: CodeGen/aarch64-v8.2a-neon-intrinsics.c
===
--- CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -164,6 +164,13 @@
   return vcvta_s16_f16(a);
 }
 
+// CHECK-LABEL: test_vcvta_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+  return vcvta_u16_f16(a);
+}
+
 // CHECK-LABEL: test_vcvtaq_s16_f16
 // CHECK:  [[VCVT:%.*]] = call <8 x i16> 
@llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
Index: CodeGen/CGBuiltin.cpp
===
--- CodeGen/CGBuiltin.cpp
+++ CodeGen/CGBuiltin.cpp
@@ -3998,6 +3998,7 @@
   NEONMAP0(vcvt_u32_v),
   NEONMAP0(vcvt_u64_v),
   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
@@ -4882,6 +4883,7 @@
 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   }
   case NEON::BI__builtin_neon_vcvta_s16_v:
+  case NEON::BI__builtin_neon_vcvta_u16_v:
   case NEON::BI__builtin_neon_vcvta_s32_v:
   case NEON::BI__builtin_neon_vcvta_s64_v:
   case NEON::BI__builtin_neon_vcvta_u32_v:
@@ -7618,6 +7620,7 @@
 return Builder.CreateFPToSI(Ops[0], Ty);
   }
   case NEON::BI__builtin_neon_vcvta_s16_v:
+  case NEON::BI__builtin_neon_vcvta_u16_v:
   case NEON::BI__builtin_neon_vcvta_s32_v:
   case NEON::BI__builtin_neon_vcvtaq_s16_v:
   case NEON::BI__builtin_neon_vcvtaq_s32_v:


Index: CodeGen/arm-v8.2a-neon-intrinsics.c
===
--- CodeGen/arm-v8.2a-neon-intrinsics.c
+++ CodeGen/arm-v8.2a-neon-intrinsics.c
@@ -164,6 +164,13 @@
   return vcvta_s16_f16(a);
 }
 
+// CHECK-LABEL: test_vcvta_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.arm.neon.fcvtau.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+   return vcvta_u16_f16(a);
+}
+
 // CHECK-LABEL: test_vcvtaq_s16_f16
 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
Index: CodeGen/aarch64-v8.2a-neon-intrinsics.c
===
--- CodeGen/aarch64-v8.2a-neon-intrinsics.c
+++ CodeGen/aarch64-v8.2a-neon-intrinsics.c
@@ -164,6 +164,13 @@
   return vcvta_s16_f16(a);
 }
 
+// CHECK-LABEL: test_vcvta_u16_f16
+// CHECK:  [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtau.v4i16.v4f16(<4 x half> %a)
+// CHECK:  ret <4 x i16> [[VCVT]]
+int16x4_t test_vcvta_u16_f16 (float16x4_t a) {
+  return vcvta_u16_f16(a);
+}
+
 // CHECK-LABEL: test_vcvtaq_s16_f16
 // CHECK:  [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtas.v8i16.v8f16(<8 x half> %a)
 // CHECK:  ret <8 x i16> [[VCVT]]
Index: CodeGen/CGBuiltin.cpp
===
--- CodeGen/CGBuiltin.cpp
+++ CodeGen/CGBuiltin.cpp
@@ -3998,6 +3998,7 @@
   NEONMAP0(vcvt_u32_v),
   NEONMAP0(vcvt_u64_v),
   NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
+  NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
@@ -4882,6 +4883,7 @@
 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   }
   case NEON::BI__builtin_neon_vcvta_s16_v:
+  case NEON::BI__builtin_neon_vcvta_u16_v:
   case NEON::BI__builtin_neon_vcvta_s32_v:
   case NEON::BI__builtin_neon_vcvta_s64_v:
   case

[PATCH] D47592: [AArch64] Corrected FP16 Intrinsic range checks in Clang + added Sema tests

2018-06-12 Thread Luke Geeson via Phabricator via cfe-commits

This revision was automatically updated to reflect the committed changes.
LukeGeeson marked an inline comment as done.
Closed by commit rL334489: [AArch64] Corrected FP16 Intrinsic range checks in 
Clang + added Sema tests (authored by LukeGeeson, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D47592?vs=149737=150912#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D47592

Files:
  cfe/trunk/include/clang/Basic/arm_fp16.td
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  cfe/trunk/test/Sema/aarch64-neon-fp16-ranges.c
  cfe/trunk/utils/TableGen/NeonEmitter.cpp

Index: cfe/trunk/utils/TableGen/NeonEmitter.cpp
===
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp
@@ -2162,8 +2162,7 @@
   OS << "#endif\n\n";
 }
 
-void
-NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream ,
+void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream ,
 SmallVectorImpl ) {
   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
 
@@ -2188,11 +2187,15 @@
 Record *R = Def->getRecord();
 if (R->getValueAsBit("isVCVT_N")) {
   // VCVT between floating- and fixed-point values takes an immediate
-  // in the range [1, 32) for f32 or [1, 64) for f64.
+  // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
   LowerBound = "1";
-  if (Def->getBaseType().getElementSizeInBits() == 32)
+	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
+		  Def->getName().find('h') != std::string::npos)
+		// VCVTh operating on FP16 intrinsics in range [1, 16)
+		UpperBound = "15";
+	  else if (Def->getBaseType().getElementSizeInBits() == 32)
 UpperBound = "31";
-  else
+	  else
 UpperBound = "63";
 } else if (R->getValueAsBit("isScalarShift")) {
   // Right shifts have an 'r' in the name, left shifts do not. Convert
Index: cfe/trunk/include/clang/Basic/arm_fp16.td
===
--- cfe/trunk/include/clang/Basic/arm_fp16.td
+++ cfe/trunk/include/clang/Basic/arm_fp16.td
@@ -75,15 +75,15 @@
   def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
   def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
   def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
-
-  def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
-  def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
-  def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
-  def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
-  def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
-  def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
-  def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
-
+  let isVCVT_N = 1 in {
+def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "silUsUiUl">;
+def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
+def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
+def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
+def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
+def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
+def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
+  }
   // Comparison
   def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
   def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
Index: cfe/trunk/test/Sema/aarch64-neon-fp16-ranges.c
===
--- cfe/trunk/test/Sema/aarch64-neon-fp16-ranges.c
+++ cfe/trunk/test/Sema/aarch64-neon-fp16-ranges.c
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple arm64-linux-gnu -fallow-half-arguments-and-returns -target-feature +neon -target-feature +fullfp16 -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -fallow-half-arguments-and-returns -target-feature +fullfp16 -target-feature +neon -ffreestanding -fsyntax-only -verify %s
+
+#include 
+#include 
+
+void test_vcvt_f16_16(int16_t a){
+  vcvth_n_f16_s16(a, 1);
+  vcvth_n_f16_s16(a, 16);
+  vcvth_n_f16_s16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_s16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_f16_u16(a, 1);
+  vcvth_n_f16_u16(a, 16);
+  vcvth_n_f16_u16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_u16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+}
+
+void test_vcvt_f16_32(int32_t a){
+  vcvth_n_f16_u32(a, 1);
+  vcvth_n_f16_u32(a, 16);
+  vcvth_n_f16_u32(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_u32(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_f16_s32(a, 1);
+  vcvth_n_f16_s32(a, 16);
+  vcvth_n_f16_s32(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_s32(a, 17); //

[PATCH] D47592: [AArch64] Corrected FP16 Intrinsic range checks in Clang + added Sema tests

2018-06-04 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson marked 2 inline comments as done.
LukeGeeson added inline comments.



Comment at: lib/Sema/SemaChecking.cpp:1409
-  switch (BuiltinID) {
-#define GET_NEON_OVERLOAD_CHECK
-#include "clang/Basic/arm_neon.inc"

SjoerdMeijer wrote:
> Why do we need to remove this?
We don't actually need this one, inspecting include/clang/Basic/arm_fp16.inc 
after building shows an empty set of #ifdefs  pragmas - ie nothing is generated 
and hence nothing happens.
Will leave in for continuity, however this may be redundant 



Comment at: test/Sema/aarch64-neon-fp16-ranges.c:1
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon 
-fallow-half-arguments-and-returns -target-feature +fullfp16 -ffreestanding 
-fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon 
-fallow-half-arguments-and-returns -target-feature +fullfp16 -ffreestanding 
-fsyntax-only -verify %s

SjoerdMeijer wrote:
> Nit: target feature fullfp16 implies ARMv8 FP, so I think you can remove 
> +neon; just a tiny optimisation to make the command line shorter (same below).
That doesn't seem to work, removing neon introduces hundreds of errors around 
incompatible types (including things this diff doesn't change) which would 
suggest maybe neon doesn't cover everything?



Comment at: test/Sema/aarch64-neon-fp16-ranges.c:39
+
+void test_vcvt_su_f(int64_t a){
+  vcvth_n_s16_f16(a, 1);

SjoerdMeijer wrote:
> why is this is 'a' an int64_t? Should this not be float16_t?
Agreed. Fixed.


https://reviews.llvm.org/D47592



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D47592: [AArch64] Corrected FP16 Intrinsic range checks in Clang + added Sema tests

2018-05-31 Thread Luke Geeson via Phabricator via cfe-commits

LukeGeeson created this revision.
LukeGeeson added a reviewer: SjoerdMeijer.
Herald added a reviewer: javed.absar.
Herald added a subscriber: kristof.beyls.

This fixes the ranges for the vcvth family of FP16 intrinsics in the clang 
front end. Previously it was accepting incorrect ranges
-Changed builtin range checking in SemaChecking
-added tests SemaCheck changes - included in  their own file since no similar 
one exists
-modified existing tests to reflect new ranges


https://reviews.llvm.org/D47592

Files:
  lib/Sema/SemaChecking.cpp
  test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
  test/Sema/aarch64-neon-fp16-ranges.c

Index: test/Sema/aarch64-neon-fp16-ranges.c
===
--- /dev/null
+++ test/Sema/aarch64-neon-fp16-ranges.c
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -fallow-half-arguments-and-returns -target-feature +fullfp16 -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -fallow-half-arguments-and-returns -target-feature +fullfp16 -ffreestanding -fsyntax-only -verify %s
+
+#include 
+#include 
+
+void test_vcvt_f16_16(int16_t a){
+  vcvth_n_f16_s16(a, 1);
+  vcvth_n_f16_s16(a, 16);
+  vcvth_n_f16_s16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_s16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_f16_u16(a, 1);
+  vcvth_n_f16_u16(a, 16);
+  vcvth_n_f16_u16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_u16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+}
+
+void test_vcvt_f16_32(int32_t a){
+  vcvth_n_f16_u32(a, 1);
+  vcvth_n_f16_u32(a, 16);
+  vcvth_n_f16_u32(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_u32(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_f16_s32(a, 1);
+  vcvth_n_f16_s32(a, 16);
+  vcvth_n_f16_s32(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_s32(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+}
+
+void test_vcvt_f16_64(int64_t a){
+  vcvth_n_f16_s64(a, 1);
+  vcvth_n_f16_s64(a, 16);
+  vcvth_n_f16_s64(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_f16_s64(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+}
+
+
+void test_vcvt_su_f(int64_t a){
+  vcvth_n_s16_f16(a, 1);
+  vcvth_n_s16_f16(a, 16);
+  vcvth_n_s16_f16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_s16_f16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_s32_f16(a, 1);
+  vcvth_n_s32_f16(a, 16);
+  vcvth_n_s32_f16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_s32_f16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_s64_f16(a, 1);
+  vcvth_n_s64_f16(a, 32);
+  vcvth_n_s64_f16(a, 0);  // expected-error {{argument should be a value from 1 to 32}}
+  vcvth_n_s64_f16(a, 33); // expected-error {{argument should be a value from 1 to 32}}
+
+  vcvth_n_u16_f16(a, 1);
+  vcvth_n_u16_f16(a, 16);
+  vcvth_n_u16_f16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_u16_f16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+
+  vcvth_n_u32_f16(a, 1);
+  vcvth_n_u32_f16(a, 16);
+  vcvth_n_u32_f16(a, 0);  // expected-error {{argument should be a value from 1 to 16}}
+  vcvth_n_u32_f16(a, 17); // expected-error {{argument should be a value from 1 to 16}}
+}
Index: test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
===
--- test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -486,90 +486,90 @@
 
 // CHECK-LABEL: test_vcvth_n_f16_s16
 // CHECK: [[SEXT:%.*]] = sext i16 %a to i32
-// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 0)
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 1)
 // CHECK:  ret half [[CVT]]
 float16_t test_vcvth_n_f16_s16(int16_t a) {
-  return vcvth_n_f16_s16(a, 0);
+  return vcvth_n_f16_s16(a, 1);
 }
 
 // CHECK-LABEL: test_vcvth_n_f16_s32
-// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 0)
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
 // CHECK:  ret half [[CVT]]
 float16_t test_vcvth_n_f16_s32(int32_t a) {
-  return vcvth_n_f16_s32(a, 0);
+  return vcvth_n_f16_s32(a, 1);
 }
 
 // CHECK-LABEL: test_vcvth_n_f16_s64
-// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 0)
+// CHECK:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1)
 // CHECK:  ret half [[CVT]]
 float16_t test_vcvth_n_f16_s64(int64_t a) {
-  return vcvth_n_f16_s64(a,

85 matches

Mail list logo