[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-17 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG1f057e365f1f: [X86] AMD Zen 4 Initial enablement (authored 
by GGanesh).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s

Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,6 +17,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -15,6 +15,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
===
--- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
===
--- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64  | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll
===

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 483141.
GGanesh added a comment.

Fixed the tests

tools/llvm-mca/X86/cpus.s
tools/llvm-mca/X86/read-after-ld-1.s
tools/llvm-mca/X86/register-file-statistics.s
tools/llvm-mca/X86/scheduler-queue-usage.s


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s

Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -17,6 +17,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -15,6 +15,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
 
 ; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
Index: llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
===
--- llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver1 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
 
Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
===
--- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4  | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64  | FileCheck %s --check-prefixes=X86-64
 
 define float @f32_no_daz(float %f) #0 {
Index: llvm/test/CodeGen/X86/slow-unaligned-mem.ll

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

In D139073#3997418 , @RKSimon wrote:

> Thanks @GGanesh  - LGTM with one minor - the orphan ZNVER4 checks from the 
> llvm-mca tests need removing

Thank you! I left them intentionally as this is a stop-gap patch. Agreed! Will 
remove them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-15 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 483092.
GGanesh added a comment.

Update to use 'no' processor model.
Fixed the below tests
tools/llvm-mca/X86/cpus.s
tools/llvm-mca/X86/read-after-ld-1.s
tools/llvm-mca/X86/register-file-statistics.s
tools/llvm-mca/X86/scheduler-queue-usage.s

Addressed comments from @RKSimon


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s
  llvm/test/tools/llvm-mca/X86/cpus.s
  llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
  llvm/test/tools/llvm-mca/X86/register-file-statistics.s
  llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s

Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
===
--- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -112,6 +112,12 @@
 # ZNVER3-NEXT: [3] Maximum number of used buffer entries.
 # ZNVER3-NEXT: [4] Total number of buffer entries.
 
+# ZNVER4:  Scheduler's queue usage:
+# ZNVER4-NEXT: [1] Resource name.
+# ZNVER4-NEXT: [2] Average number of used buffer entries.
+# ZNVER4-NEXT: [3] Maximum number of used buffer entries.
+# ZNVER4-NEXT: [4] Total number of buffer entries.
+
 # BARCELONA:[1][2][3][4]
 # BARCELONA-NEXT:  SBPortAny0  1  54
 
@@ -165,3 +171,9 @@
 # ZNVER3-NEXT: Zn3Int   0  1  96
 # ZNVER3-NEXT: Zn3Load  0  0  72
 # ZNVER3-NEXT: Zn3Store 0  0  64
+
+# ZNVER4:   [1][2][3][4]
+# ZNVER4-NEXT: Zn3FP0  0  64
+# ZNVER4-NEXT: Zn3Int   0  1  96
+# ZNVER4-NEXT: Zn3Load  0  0  72
+# ZNVER4-NEXT: Zn3Store 0  0  64
Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s
===
--- llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -54,6 +54,11 @@
 # ZNVER3-NEXT:   Total number of mappings created: 0
 # ZNVER3-NEXT:   Max number of mappings used:  0
 
+# ZNVER4: *  Register File #1 -- Zn3FpPRF:
+# ZNVER4-NEXT:   Number of physical registers: 160
+# ZNVER4-NEXT:   Total number of mappings created: 0
+# ZNVER4-NEXT:   Max number of mappings used:  0
+
 # BDVER2: *  Register File #2 -- PdIntegerPRF:
 # BDVER2-NEXT:   Number of physical registers: 96
 # BDVER2-NEXT:   Total number of mappings created: 2
@@ -78,3 +83,8 @@
 # ZNVER3-NEXT:   Number of physical registers: 192
 # ZNVER3-NEXT:   Total number of mappings created: 2
 # ZNVER3-NEXT:   Max number of mappings used:  2
+
+# ZNVER4: *  Register File #2 -- Zn3IntegerPRF:
+# ZNVER4-NEXT:   Number of physical registers: 192
+# ZNVER4-NEXT:   Total number of mappings created: 2
+# ZNVER4-NEXT:   Max number of mappings used:  2
Index: llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
===
--- llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
+++ llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
@@ -47,6 +47,9 @@
 # ZNVER3-NEXT:Total Cycles:  17
 # ZNVER3-NEXT:Total uOps:2
 
+# ZNVER4-NEXT:Total Cycles:  17
+# ZNVER4-NEXT:Total uOps:2
+
 # BARCELONA:  Dispatch Width:4
 # BARCELONA-NEXT: uOps Per Cycle:0.15
 # BARCELONA-NEXT: IPC:   0.10
@@ -97,6 +100,11 @@
 # ZNVER3-NEXT:IPC:   0.12
 # ZNVER3-NEXT:Block RThroughput: 3.0
 
+# ZNVER4: Dispatch Width:6
+# ZNVER4-NEXT:uOps Per Cycle:0.12
+# ZNVER4-NEXT:IPC:   0.12
+# ZNVER4-NEXT:Block RThroughput: 3.0
+
 # ALL:Timeline view:
 
 # BARCELONA-NEXT: 0123456789
@@ -129,6 +137,9 @@
 # ZNVER3-NEXT:0123456
 # ZNVER3-NEXT:Index 0123456789
 
+# ZNVER4-NEXT:

[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-12-14 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

In D139073#3995741 , @RKSimon wrote:

> @GGanesh reverse-ping

@RKSimon Thanks a lot! We were trying to get the libpfm patch posted and 
subsequently enabling the zen4 scheduler model. Probably a week or two from 
there. The intel model isn't approved for obvious reasons. Yes I will post a 
stopgap patch!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139073/new/

https://reviews.llvm.org/D139073

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D139073: [X86] AMD Zen 4 Initial enablement

2022-11-30 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper, andreadb.
Herald added subscribers: Enna1, StephenFan, pengfei, gbedwell, javed.absar, 
hiraditya.
Herald added a project: All.
GGanesh requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

This patch has the initial skeleton that enables AMD znver4!

AMD znver4 belongs to Family 19h with model numbers as below
Models 0x10 to 0x1f 
Models 0x60 to 0x74
Models 0x78 to 0x7b
Models 0xA0 to 0xAf

The patch

1. Includes ISAs that already have target descriptions are added.
2. Uses znver3 scheduler model as of now. (We have update this later)
3. Updates few tests as per the initial enablement.
4. ISAs that are added are

avx512f, 
avx512dq, 
avx512ifma, 
avx512cd, 
avx512bw, 
avx512vl, 
avx512_bf16, 
avx512vbmi, 
avx512vbmi2,
avx512vl,
avx512_vnni, 
avx512_bitalg, 
avx512_vpopcntdq/vl


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D139073

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/CodeGen/target-builtin-noerror.c
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Misc/target-invalid-cpu-note.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/include/llvm/Support/X86TargetParser.h
  llvm/lib/Support/Host.cpp
  llvm/lib/Support/X86TargetParser.cpp
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/test/CodeGen/X86/rdpru.ll
  llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
  llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/X86/x86_long_nop.s
  llvm/test/tools/llvm-mca/X86/cpus.s
  llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
  llvm/test/tools/llvm-mca/X86/register-file-statistics.s
  llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s

Index: llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
===
--- llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -5,6 +5,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER3 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ZNVER4 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SNB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s
@@ -112,6 +113,12 @@
 # ZNVER3-NEXT: [3] Maximum number of used buffer entries.
 # ZNVER3-NEXT: [4] Total number of buffer entries.
 
+# ZNVER4:  Scheduler's queue usage:
+# ZNVER4-NEXT: [1] Resource name.
+# ZNVER4-NEXT: [2] Average number of used buffer entries.
+# ZNVER4-NEXT: [3] Maximum number of used buffer entries.
+# ZNVER4-NEXT: [4] Total number of buffer entries.
+
 # BARCELONA:[1][2][3][4]
 # BARCELONA-NEXT:  SBPortAny0  1  54
 
@@ -165,3 +172,9 @@
 # ZNVER3-NEXT: Zn3Int   0  1  96
 # ZNVER3-NEXT: Zn3Load  0  0  72
 # ZNVER3-NEXT: Zn3Store 0  0  64
+
+# ZNVER4:   [1][2][3][4]
+# ZNVER4-NEXT: Zn3FP0  0  64
+# ZNVER4-NEXT: Zn3Int   0  1  96
+# ZNVER4-NEXT: Zn3Load  0  0  72
+# ZNVER4-NEXT: Zn3Store 0  0  64
Index: llvm/test/tools/llvm-mca/X86/register-file-statistics.s
===
--- llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -6,6 +6,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown 

[PATCH] D92812: [X86] Update tests for znver3

2021-01-06 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdbfc1ac4d86c: [X86] Update tests for znver3 (authored by 
GGanesh).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92812/new/

https://reviews.llvm.org/D92812

Files:
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  llvm/test/MC/X86/x86_long_nop.s


Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -15,6 +15,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver1 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu 
-mcpu=znver2 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu 
-mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem 
%s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu 
-mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck 
--check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu 
-mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck 
--check-prefix=LNOP15 %s
Index: clang/test/Frontend/x86-target-cpu.c
===
--- clang/test/Frontend/x86-target-cpu.c
+++ clang/test/Frontend/x86-target-cpu.c
@@ -36,5 +36,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
 //
 // expected-no-diagnostics
Index: clang/test/Driver/x86-march.c
===
--- clang/test/Driver/x86-march.c
+++ clang/test/Driver/x86-march.c
@@ -179,6 +179,10 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver2
 // znver2: "-target-cpu" "znver2"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver3 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=znver3
+// znver3: "-target-cpu" "znver3"
 
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s 
--check-prefix=x86-64
 // x86-64: "-target-cpu" "x86-64"


Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -15,6 +15,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver1 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver2 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: clang/test/Frontend/x86-target-cpu.c
===
--- clang/test/Frontend/x86-target-cpu.c
+++ clang/test/Frontend/x86-target-cpu.c
@@ -36,5 +36,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
 // RUN: %clang_cc1 

[PATCH] D92812: [X86] AMD Znver3 (Family 19H) Enablement

2021-01-05 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 314703.
GGanesh edited the summary of this revision.
GGanesh added a comment.

Updaing the patch so that the simplified patch adds only few missing znver3 
tests. The subsequent patches will comprehensively enable other znver3 features.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92812/new/

https://reviews.llvm.org/D92812

Files:
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  llvm/test/MC/X86/x86_long_nop.s


Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -15,6 +15,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver1 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu 
-mcpu=znver2 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu 
-mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s 
-mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s 
--check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem 
%s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu 
-mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck 
--check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu 
-mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck 
--check-prefix=LNOP15 %s
Index: clang/test/Frontend/x86-target-cpu.c
===
--- clang/test/Frontend/x86-target-cpu.c
+++ clang/test/Frontend/x86-target-cpu.c
@@ -36,5 +36,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver2 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver1 -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
 //
 // expected-no-diagnostics
Index: clang/test/Driver/x86-march.c
===
--- clang/test/Driver/x86-march.c
+++ clang/test/Driver/x86-march.c
@@ -179,6 +179,10 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver2
 // znver2: "-target-cpu" "znver2"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver3 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=znver3
+// znver3: "-target-cpu" "znver3"
 
 // RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s 
--check-prefix=x86-64
 // x86-64: "-target-cpu" "x86-64"


Index: llvm/test/MC/X86/x86_long_nop.s
===
--- llvm/test/MC/X86/x86_long_nop.s
+++ llvm/test/MC/X86/x86_long_nop.s
@@ -15,6 +15,8 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver1 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver2 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver2 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver3 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
Index: clang/test/Frontend/x86-target-cpu.c
===
--- clang/test/Frontend/x86-target-cpu.c
+++ clang/test/Frontend/x86-target-cpu.c
@@ -36,5 +36,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu btver2 

[PATCH] D92812: [X86] AMD Znver3 (Family 19H) Enablement

2020-12-08 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

In D92812#2439324 , @RKSimon wrote:

> it looks like a very bad merge imo.

Yep, Thank you! I will post smaller incremental patches.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92812/new/

https://reviews.llvm.org/D92812

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D92812: [X86] AMD Znver3 (Family 19H) Enablement

2020-12-07 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper, lebedev.ri, courbet.
GGanesh added projects: LLVM, clang.
Herald added subscribers: mstojanovic, pengfei, jfb, gbedwell, hiraditya.
Herald added a reviewer: andreadb.
GGanesh requested review of this revision.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.
Herald added a project: Sanitizers.

This patch enables the new AMD family 19H architecture

1. Introduced a new command line switch -march=”znver3”
2. Following ISAs are added to znver3 arch. New ISAs are added to Instruction 
tables.

•   INVPCID,
•   PKU
•   VAES,
•   VPCLMULQDQ
•   SNP
•   INVLPGB
•   TLBSYNC

3. Enables the "march=native" detection to znver3.
4. Adds testcases to test the latency, throughput and execution pipelines.
5. Enables llvm exegesis tool.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D92812

Files:
  clang/lib/Basic/Targets/X86.cpp
  clang/test/Driver/x86-march.c
  clang/test/Frontend/x86-target-cpu.c
  clang/test/Preprocessor/predefined-arch-macros.c
  compiler-rt/lib/builtins/cpu_model.c
  llvm/lib/Target/X86/X86.td
  llvm/lib/Target/X86/X86InstrInfo.td
  llvm/lib/Target/X86/X86InstrSNP.td
  llvm/lib/Target/X86/X86PfmCounters.td
  llvm/lib/Target/X86/X86ScheduleZnver3.td
  llvm/lib/Target/X86/X86Subtarget.h
  llvm/test/CodeGen/X86/cpus-amd.ll
  llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
  llvm/test/CodeGen/X86/slow-unaligned-mem.ll
  llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
  llvm/test/MC/Disassembler/X86/x86-32.txt
  llvm/test/MC/X86/SNP-32.s
  llvm/test/MC/X86/SNP-64.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-2.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-7.s
  llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s
  llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D58343: Enablement for AMD znver2 architecture - skeleton patch

2019-02-19 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 187389.

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58343/new/

https://reviews.llvm.org/D58343

Files:
  include/llvm/Support/X86TargetParser.def
  lib/Support/Host.cpp
  lib/Target/X86/X86.td
  test/CodeGen/X86/cpus-amd.ll
  test/CodeGen/X86/lzcnt-zext-cmp.ll
  test/CodeGen/X86/slow-unaligned-mem.ll
  test/CodeGen/X86/x86-64-double-shifts-var.ll

Index: test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -13,8 +13,9 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
 ; instructions.
 
@@ -25,7 +26,7 @@
 
 define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
 entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
   %sh_prom = zext i32 %c to i64
   %shl = shl i64 %a, %sh_prom
   %sub = sub nsw i32 64, %c
Index: test/CodeGen/X86/slow-unaligned-mem.ll
===
--- test/CodeGen/X86/slow-unaligned-mem.ll
+++ test/CodeGen/X86/slow-unaligned-mem.ll
@@ -47,6 +47,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver32>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver42>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver12>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver22>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
Index: test/CodeGen/X86/lzcnt-zext-cmp.ll
===
--- test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
Index: test/CodeGen/X86/cpus-amd.ll
===
--- test/CodeGen/X86/cpus-amd.ll
+++ test/CodeGen/X86/cpus-amd.ll
@@ -26,6 +26,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: lib/Target/X86/X86.td
===
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -1144,8 +1144,8 @@
   FeatureMacroFusion
 ]>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
@@ -1184,6 +1184,19 @@
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
+class Znver1Proc : ProcModel;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc : ProcModel;
+def : Znver2Proc<"znver2">;
+
 def : Proc<"geode",   [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",  [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
Index: lib/Support/Host.cpp
===
--- lib/Support/Host.cpp
+++ lib/Support/Host.cpp
@@ -916,7 +916,14 @@
 break; // "btver2"
   case 23:
 *Type = X86::AMDFAM17H;
-*Subtype = X86::AMDFAM17H_ZNVER1;
+if (Model >= 0x30 && Model <= 0x3f) {
+  *Subtype = X86::AMDFAM17H_ZNVER2;
+  break; // "znver2"; 30h-3fh: Zen2
+}
+if (Model <= 0x0f) {
+  *Subtype = 

[PATCH] D58343: Enablement for AMD znver2 architecture - skeleton patch

2019-02-19 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 187386.

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58343/new/

https://reviews.llvm.org/D58343

Files:
  include/llvm/Support/X86TargetParser.def
  lib/Support/Host.cpp
  lib/Target/X86/X86.td
  test/CodeGen/X86/cpus-amd.ll
  test/CodeGen/X86/lzcnt-zext-cmp.ll
  test/CodeGen/X86/slow-unaligned-mem.ll
  test/CodeGen/X86/x86-64-double-shifts-var.ll

Index: test/CodeGen/X86/x86-64-double-shifts-var.ll
===
--- test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -13,8 +13,9 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
 ; instructions.
 
@@ -25,7 +26,7 @@
 
 define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
 entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
   %sh_prom = zext i32 %c to i64
   %shl = shl i64 %a, %sh_prom
   %sub = sub nsw i32 64, %c
Index: test/CodeGen/X86/slow-unaligned-mem.ll
===
--- test/CodeGen/X86/slow-unaligned-mem.ll
+++ test/CodeGen/X86/slow-unaligned-mem.ll
@@ -47,6 +47,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver32>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver42>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver12>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver22>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
Index: test/CodeGen/X86/lzcnt-zext-cmp.ll
===
--- test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
Index: test/CodeGen/X86/cpus-amd.ll
===
--- test/CodeGen/X86/cpus-amd.ll
+++ test/CodeGen/X86/cpus-amd.ll
@@ -26,6 +26,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
Index: lib/Target/X86/X86.td
===
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -1144,15 +1144,14 @@
   FeatureMacroFusion
 ]>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
   FeatureBMI,
   FeatureBMI2,
   FeatureCLFLUSHOPT,
-  FeatureCLZERO,
   FeatureCMOV,
   Feature64Bit,
   FeatureCMPXCHG16B,
@@ -1184,6 +1183,21 @@
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
+class Znver1Proc : ProcModel;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc : ProcModel;
+def : Znver2Proc<"znver2">;
+
 def : Proc<"geode",   [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",  [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
Index: lib/Support/Host.cpp
===
--- lib/Support/Host.cpp
+++ lib/Support/Host.cpp
@@ -916,7 +916,14 @@
 break; // "btver2"
   case 23:
 *Type = X86::AMDFAM17H;
-*Subtype = X86::AMDFAM17H_ZNVER1;
+if (Model >= 0x30 && Model <= 0x3f) {
+  

[PATCH] D58344: Enablement for AMD znver2 architecture - skeleton

2019-02-19 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 187387.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58344/new/

https://reviews.llvm.org/D58344

Files:
  include/clang/Basic/X86Target.def
  lib/Basic/Targets/X86.cpp
  test/CodeGen/target-builtin-noerror.c
  test/Driver/x86-march.c
  test/Frontend/x86-target-cpu.c
  test/Misc/target-invalid-cpu-note.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -2676,6 +2676,100 @@
 // CHECK_ZNVER1_M64: #define __znver1 1
 // CHECK_ZNVER1_M64: #define __znver1__ 1
 
+// RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M32: #define __ADX__ 1
+// CHECK_ZNVER2_M32: #define __AES__ 1
+// CHECK_ZNVER2_M32: #define __AVX2__ 1
+// CHECK_ZNVER2_M32: #define __AVX__ 1
+// CHECK_ZNVER2_M32: #define __BMI2__ 1
+// CHECK_ZNVER2_M32: #define __BMI__ 1
+// CHECK_ZNVER2_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M32: #define __CLWB__ 1
+// CHECK_ZNVER2_M32: #define __CLZERO__ 1
+// CHECK_ZNVER2_M32: #define __F16C__ 1
+// CHECK_ZNVER2_M32: #define __FMA__ 1
+// CHECK_ZNVER2_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M32: #define __LZCNT__ 1
+// CHECK_ZNVER2_M32: #define __MMX__ 1
+// CHECK_ZNVER2_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M32: #define __POPCNT__ 1
+// CHECK_ZNVER2_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M32: #define __RDPID__ 1
+// CHECK_ZNVER2_M32: #define __RDRND__ 1
+// CHECK_ZNVER2_M32: #define __RDSEED__ 1
+// CHECK_ZNVER2_M32: #define __SHA__ 1
+// CHECK_ZNVER2_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE2__ 1
+// CHECK_ZNVER2_M32: #define __SSE3__ 1
+// CHECK_ZNVER2_M32: #define __SSE4A__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE__ 1
+// CHECK_ZNVER2_M32: #define __SSSE3__ 1
+// CHECK_ZNVER2_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M32: #define __XSAVES__ 1
+// CHECK_ZNVER2_M32: #define __XSAVE__ 1
+// CHECK_ZNVER2_M32: #define __i386 1
+// CHECK_ZNVER2_M32: #define __i386__ 1
+// CHECK_ZNVER2_M32: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M32: #define __znver2 1
+// CHECK_ZNVER2_M32: #define __znver2__ 1
+
+// RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M64: #define __ADX__ 1
+// CHECK_ZNVER2_M64: #define __AES__ 1
+// CHECK_ZNVER2_M64: #define __AVX2__ 1
+// CHECK_ZNVER2_M64: #define __AVX__ 1
+// CHECK_ZNVER2_M64: #define __BMI2__ 1
+// CHECK_ZNVER2_M64: #define __BMI__ 1
+// CHECK_ZNVER2_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M64: #define __CLWB__ 1
+// CHECK_ZNVER2_M64: #define __CLZERO__ 1
+// CHECK_ZNVER2_M64: #define __F16C__ 1
+// CHECK_ZNVER2_M64: #define __FMA__ 1
+// CHECK_ZNVER2_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M64: #define __LZCNT__ 1
+// CHECK_ZNVER2_M64: #define __MMX__ 1
+// CHECK_ZNVER2_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M64: #define __POPCNT__ 1
+// CHECK_ZNVER2_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M64: #define __RDPID__ 1
+// CHECK_ZNVER2_M64: #define __RDRND__ 1
+// CHECK_ZNVER2_M64: #define __RDSEED__ 1
+// CHECK_ZNVER2_M64: #define __SHA__ 1
+// CHECK_ZNVER2_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE2__ 1
+// CHECK_ZNVER2_M64: #define __SSE3__ 1
+// CHECK_ZNVER2_M64: #define __SSE4A__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE__ 1
+// CHECK_ZNVER2_M64: #define __SSSE3__ 1
+// CHECK_ZNVER2_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M64: #define __XSAVES__ 1
+// CHECK_ZNVER2_M64: #define __XSAVE__ 1
+// CHECK_ZNVER2_M64: #define __amd64 1
+// CHECK_ZNVER2_M64: #define __amd64__ 1
+// CHECK_ZNVER2_M64: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M64: #define __x86_64 1
+// CHECK_ZNVER2_M64: #define __x86_64__ 1
+// CHECK_ZNVER2_M64: #define __znver2 1
+// CHECK_ZNVER2_M64: #define __znver2__ 1
+
 // End X86/GCC/Linux tests --
 
 // Begin PPC/GCC/Linux tests 
Index: test/Misc/target-invalid-cpu-note.c
===
--- test/Misc/target-invalid-cpu-note.c

[PATCH] D58343: Enablement for AMD znver2 architecture - skeleton patch

2019-02-19 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 187340.
GGanesh added a comment.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Addressed the comments from Craig Topper


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58343/new/

https://reviews.llvm.org/D58343

Files:
  include/clang/Basic/X86Target.def
  lib/Basic/Targets/X86.cpp
  test/CodeGen/target-builtin-noerror.c
  test/Driver/x86-march.c
  test/Frontend/x86-target-cpu.c
  test/Misc/target-invalid-cpu-note.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -2676,6 +2676,100 @@
 // CHECK_ZNVER1_M64: #define __znver1 1
 // CHECK_ZNVER1_M64: #define __znver1__ 1
 
+// RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M32: #define __ADX__ 1
+// CHECK_ZNVER2_M32: #define __AES__ 1
+// CHECK_ZNVER2_M32: #define __AVX2__ 1
+// CHECK_ZNVER2_M32: #define __AVX__ 1
+// CHECK_ZNVER2_M32: #define __BMI2__ 1
+// CHECK_ZNVER2_M32: #define __BMI__ 1
+// CHECK_ZNVER2_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M32: #define __CLWB__ 1
+// CHECK_ZNVER2_M32: #define __CLZERO__ 1
+// CHECK_ZNVER2_M32: #define __F16C__ 1
+// CHECK_ZNVER2_M32: #define __FMA__ 1
+// CHECK_ZNVER2_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M32: #define __LZCNT__ 1
+// CHECK_ZNVER2_M32: #define __MMX__ 1
+// CHECK_ZNVER2_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M32: #define __POPCNT__ 1
+// CHECK_ZNVER2_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M32: #define __RDPID__ 1
+// CHECK_ZNVER2_M32: #define __RDRND__ 1
+// CHECK_ZNVER2_M32: #define __RDSEED__ 1
+// CHECK_ZNVER2_M32: #define __SHA__ 1
+// CHECK_ZNVER2_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE2__ 1
+// CHECK_ZNVER2_M32: #define __SSE3__ 1
+// CHECK_ZNVER2_M32: #define __SSE4A__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE__ 1
+// CHECK_ZNVER2_M32: #define __SSSE3__ 1
+// CHECK_ZNVER2_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M32: #define __XSAVES__ 1
+// CHECK_ZNVER2_M32: #define __XSAVE__ 1
+// CHECK_ZNVER2_M32: #define __i386 1
+// CHECK_ZNVER2_M32: #define __i386__ 1
+// CHECK_ZNVER2_M32: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M32: #define __znver2 1
+// CHECK_ZNVER2_M32: #define __znver2__ 1
+
+// RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M64: #define __ADX__ 1
+// CHECK_ZNVER2_M64: #define __AES__ 1
+// CHECK_ZNVER2_M64: #define __AVX2__ 1
+// CHECK_ZNVER2_M64: #define __AVX__ 1
+// CHECK_ZNVER2_M64: #define __BMI2__ 1
+// CHECK_ZNVER2_M64: #define __BMI__ 1
+// CHECK_ZNVER2_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M64: #define __CLWB__ 1
+// CHECK_ZNVER2_M64: #define __CLZERO__ 1
+// CHECK_ZNVER2_M64: #define __F16C__ 1
+// CHECK_ZNVER2_M64: #define __FMA__ 1
+// CHECK_ZNVER2_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M64: #define __LZCNT__ 1
+// CHECK_ZNVER2_M64: #define __MMX__ 1
+// CHECK_ZNVER2_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M64: #define __POPCNT__ 1
+// CHECK_ZNVER2_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M64: #define __RDPID__ 1
+// CHECK_ZNVER2_M64: #define __RDRND__ 1
+// CHECK_ZNVER2_M64: #define __RDSEED__ 1
+// CHECK_ZNVER2_M64: #define __SHA__ 1
+// CHECK_ZNVER2_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE2__ 1
+// CHECK_ZNVER2_M64: #define __SSE3__ 1
+// CHECK_ZNVER2_M64: #define __SSE4A__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE__ 1
+// CHECK_ZNVER2_M64: #define __SSSE3__ 1
+// CHECK_ZNVER2_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M64: #define __XSAVES__ 1
+// CHECK_ZNVER2_M64: #define __XSAVE__ 1
+// CHECK_ZNVER2_M64: #define __amd64 1
+// CHECK_ZNVER2_M64: #define __amd64__ 1
+// CHECK_ZNVER2_M64: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M64: #define __x86_64 1
+// CHECK_ZNVER2_M64: #define __x86_64__ 1
+// CHECK_ZNVER2_M64: #define __znver2 1
+// CHECK_ZNVER2_M64: #define __znver2__ 1
+
 // End X86/GCC/Linux tests --
 
 // Begin PPC/GCC/Linux tests 
Index: 

[PATCH] D58344: Enablement for AMD znver2 architecture - skeleton

2019-02-18 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh created this revision.
GGanesh added reviewers: RKSimon, craig.topper.
GGanesh created this object with visibility "All Users".
GGanesh added a project: clang.
Herald added a subscriber: cfe-commits.

This patch enables the following

1. AMD family 17h "znver2" tune flag (-march, -mcpu).
2. ISAs that are enabled for "znver2" architecture.
3. For the time being, it uses the znver1 scheduler model.
4. Tests are updated.


Repository:
  rC Clang

https://reviews.llvm.org/D58344

Files:
  include/clang/Basic/X86Target.def
  lib/Basic/Targets/X86.cpp
  test/CodeGen/attr-target-mv.c
  test/CodeGen/target-builtin-noerror.c
  test/Driver/x86-march.c
  test/Frontend/x86-target-cpu.c
  test/Misc/target-invalid-cpu-note.c
  test/Preprocessor/predefined-arch-macros.c

Index: test/Preprocessor/predefined-arch-macros.c
===
--- test/Preprocessor/predefined-arch-macros.c
+++ test/Preprocessor/predefined-arch-macros.c
@@ -2676,8 +2676,102 @@
 // CHECK_ZNVER1_M64: #define __znver1 1
 // CHECK_ZNVER1_M64: #define __znver1__ 1

+// RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M32: #define __ADX__ 1
+// CHECK_ZNVER2_M32: #define __AES__ 1
+// CHECK_ZNVER2_M32: #define __AVX2__ 1
+// CHECK_ZNVER2_M32: #define __AVX__ 1
+// CHECK_ZNVER2_M32: #define __BMI2__ 1
+// CHECK_ZNVER2_M32: #define __BMI__ 1
+// CHECK_ZNVER2_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M32: #define __CLWB__ 1
+// CHECK_ZNVER2_M32: #define __CLZERO__ 1
+// CHECK_ZNVER2_M32: #define __F16C__ 1
+// CHECK_ZNVER2_M32: #define __FMA__ 1
+// CHECK_ZNVER2_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M32: #define __LZCNT__ 1
+// CHECK_ZNVER2_M32: #define __MMX__ 1
+// CHECK_ZNVER2_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M32: #define __POPCNT__ 1
+// CHECK_ZNVER2_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M32: #define __RDPID__ 1
+// CHECK_ZNVER2_M32: #define __RDRND__ 1
+// CHECK_ZNVER2_M32: #define __RDSEED__ 1
+// CHECK_ZNVER2_M32: #define __SHA__ 1
+// CHECK_ZNVER2_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE2__ 1
+// CHECK_ZNVER2_M32: #define __SSE3__ 1
+// CHECK_ZNVER2_M32: #define __SSE4A__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M32: #define __SSE__ 1
+// CHECK_ZNVER2_M32: #define __SSSE3__ 1
+// CHECK_ZNVER2_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M32: #define __XSAVES__ 1
+// CHECK_ZNVER2_M32: #define __XSAVE__ 1
+// CHECK_ZNVER2_M32: #define __i386 1
+// CHECK_ZNVER2_M32: #define __i386__ 1
+// CHECK_ZNVER2_M32: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M32: #define __znver2 1
+// CHECK_ZNVER2_M32: #define __znver2__ 1
+
+// RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER2_M64: #define __ADX__ 1
+// CHECK_ZNVER2_M64: #define __AES__ 1
+// CHECK_ZNVER2_M64: #define __AVX2__ 1
+// CHECK_ZNVER2_M64: #define __AVX__ 1
+// CHECK_ZNVER2_M64: #define __BMI2__ 1
+// CHECK_ZNVER2_M64: #define __BMI__ 1
+// CHECK_ZNVER2_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER2_M64: #define __CLWB__ 1
+// CHECK_ZNVER2_M64: #define __CLZERO__ 1
+// CHECK_ZNVER2_M64: #define __F16C__ 1
+// CHECK_ZNVER2_M64: #define __FMA__ 1
+// CHECK_ZNVER2_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER2_M64: #define __LZCNT__ 1
+// CHECK_ZNVER2_M64: #define __MMX__ 1
+// CHECK_ZNVER2_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER2_M64: #define __POPCNT__ 1
+// CHECK_ZNVER2_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER2_M64: #define __RDPID__ 1
+// CHECK_ZNVER2_M64: #define __RDRND__ 1
+// CHECK_ZNVER2_M64: #define __RDSEED__ 1
+// CHECK_ZNVER2_M64: #define __SHA__ 1
+// CHECK_ZNVER2_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE2__ 1
+// CHECK_ZNVER2_M64: #define __SSE3__ 1
+// CHECK_ZNVER2_M64: #define __SSE4A__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER2_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER2_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER2_M64: #define __SSE__ 1
+// CHECK_ZNVER2_M64: #define __SSSE3__ 1
+// CHECK_ZNVER2_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER2_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER2_M64: #define __XSAVES__ 1
+// CHECK_ZNVER2_M64: #define __XSAVE__ 1
+// CHECK_ZNVER2_M64: #define __amd64 1
+// CHECK_ZNVER2_M64: #define __amd64__ 1
+// CHECK_ZNVER2_M64: #define __tune_znver2__ 1
+// CHECK_ZNVER2_M64: #define __x86_64 1
+// CHECK_ZNVER2_M64: 

[PATCH] D29386: Clzero flag addition and inclusion under znver1

2017-02-08 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

Thank you @craig.topper.

Just want to check if the patch can be commited to 4.0 release branch as well.
I understand that the trunk is in blocker bug-fix stage but just want to get it 
confirmed.


Repository:
  rL LLVM

https://reviews.llvm.org/D29386



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D29386: Clzero flag addition and inclusion under znver1

2017-02-07 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 87437.
GGanesh added a comment.

Updated the builtins test for "__builtin_ia32_clzero"


Repository:
  rL LLVM

https://reviews.llvm.org/D29386

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets.cpp
  lib/Headers/CMakeLists.txt
  lib/Headers/clzerointrin.h
  lib/Headers/module.modulemap
  lib/Headers/x86intrin.h
  test/CodeGen/builtins-x86.c

Index: test/CodeGen/builtins-x86.c
===
--- test/CodeGen/builtins-x86.c
+++ test/CodeGen/builtins-x86.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -emit-llvm -o %t %s
-// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -emit-llvm -o %t %s
+// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -fsyntax-only -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -285,6 +285,7 @@
 
   (void) __builtin_ia32_monitorx(tmp_vp, tmp_Ui, tmp_Ui);
   (void) __builtin_ia32_mwaitx(tmp_Ui, tmp_Ui, tmp_Ui);
+  (void) __builtin_ia32_clzero(tmp_vp);
 
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
Index: lib/Headers/x86intrin.h
===
--- lib/Headers/x86intrin.h
+++ lib/Headers/x86intrin.h
@@ -80,6 +80,10 @@
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
+#include 
+#endif
+
 /* FIXME: LWP */
 
 #endif /* __X86INTRIN_H */
Index: lib/Headers/module.modulemap
===
--- lib/Headers/module.modulemap
+++ lib/Headers/module.modulemap
@@ -61,6 +61,7 @@
 textual header "xopintrin.h"
 textual header "fma4intrin.h"
 textual header "mwaitxintrin.h"
+textual header "clzerointrin.h"
 
 explicit module mm_malloc {
   requires !freestanding
Index: lib/Headers/clzerointrin.h
===
--- lib/Headers/clzerointrin.h
+++ lib/Headers/clzerointrin.h
@@ -0,0 +1,50 @@
+/*===--- clzerointrin.h - CLZERO --===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===---===
+ */
+#ifndef __X86INTRIN_H
+#error "Never use  directly; include  instead."
+#endif
+
+#ifndef _CLZEROINTRIN_H
+#define _CLZEROINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__,  __target__("clzero")))
+
+/// \brief Loads the cache line address and zero's out the cacheline
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the  CLZERO  instruction.
+///
+/// \param __line
+///A pointer to a cacheline which needs to be zeroed out.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_clzero (void * __line)
+{
+  __builtin_ia32_clzero ((void *)__line);
+}
+
+#undef __DEFAULT_FN_ATTRS 
+
+#endif /* _CLZEROINTRIN_H */
Index: lib/Headers/CMakeLists.txt
===
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -28,6 +28,7 @@
   

[PATCH] D29386: Clzero flag addition and inclusion under znver1

2017-02-07 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 87386.
GGanesh added a comment.

Updated for review comments.


Repository:
  rL LLVM

https://reviews.llvm.org/D29386

Files:
  include/clang/Basic/BuiltinsX86.def
  include/clang/Driver/Options.td
  lib/Basic/Targets.cpp
  lib/Headers/CMakeLists.txt
  lib/Headers/clzerointrin.h
  lib/Headers/module.modulemap
  lib/Headers/x86intrin.h

Index: lib/Headers/x86intrin.h
===
--- lib/Headers/x86intrin.h
+++ lib/Headers/x86intrin.h
@@ -80,6 +80,10 @@
 #include 
 #endif
 
+#if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLZERO__)
+#include 
+#endif
+
 /* FIXME: LWP */
 
 #endif /* __X86INTRIN_H */
Index: lib/Headers/module.modulemap
===
--- lib/Headers/module.modulemap
+++ lib/Headers/module.modulemap
@@ -61,6 +61,7 @@
 textual header "xopintrin.h"
 textual header "fma4intrin.h"
 textual header "mwaitxintrin.h"
+textual header "clzerointrin.h"
 
 explicit module mm_malloc {
   requires !freestanding
Index: lib/Headers/clzerointrin.h
===
--- lib/Headers/clzerointrin.h
+++ lib/Headers/clzerointrin.h
@@ -0,0 +1,50 @@
+/*===--- clzerointrin.h - CLZERO --===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===---===
+ */
+#ifndef __X86INTRIN_H
+#error "Never use  directly; include  instead."
+#endif
+
+#ifndef _CLZEROINTRIN_H
+#define _CLZEROINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__,  __target__("clzero")))
+
+/// \brief Loads the cache line address and zero's out the cacheline
+///
+/// \headerfile 
+///
+/// This intrinsic corresponds to the  CLZERO  instruction.
+///
+/// \param __line
+///A pointer to a cacheline which needs to be zeroed out.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_clzero (void * __line)
+{
+  __builtin_ia32_clzero ((void *)__line);
+}
+
+#undef __DEFAULT_FN_ATTRS 
+
+#endif /* _CLZEROINTRIN_H */
Index: lib/Headers/CMakeLists.txt
===
--- lib/Headers/CMakeLists.txt
+++ lib/Headers/CMakeLists.txt
@@ -28,6 +28,7 @@
   __clang_cuda_intrinsics.h
   __clang_cuda_math_forward_declares.h
   __clang_cuda_runtime_wrapper.h
+  clzerointrin.h
   cpuid.h
   clflushoptintrin.h
   emmintrin.h
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2489,6 +2489,7 @@
   bool HasXSAVEC = false;
   bool HasXSAVES = false;
   bool HasMWAITX = false;
+  bool HasCLZERO = false;
   bool HasPKU = false;
   bool HasCLFLUSHOPT = false;
   bool HasPCOMMIT = false;
@@ -3205,6 +3206,7 @@
 setFeatureEnabledImpl(Features, "bmi", true);
 setFeatureEnabledImpl(Features, "bmi2", true);
 setFeatureEnabledImpl(Features, "clflushopt", true);
+setFeatureEnabledImpl(Features, "clzero", true);
 setFeatureEnabledImpl(Features, "cx16", true);
 setFeatureEnabledImpl(Features, "f16c", true);
 setFeatureEnabledImpl(Features, "fma", true);
Index: include/clang/Driver/Options.td
===
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1714,6 +1714,7 @@
 def mno_xsavec : Flag<["-"], "mno-xsavec">, Group;
 def mno_xsaves : Flag<["-"], "mno-xsaves">, Group;
 def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group;
+def mno_clzero : Flag<["-"], "mno-clzero">, Group;
 def mno_pku : Flag<["-"], "mno-pku">, Group;
 
 def munaligned_access : Flag<["-"], "munaligned-access">, Group,
@@ -1907,6 +1908,7 @@
 def mxsavec : Flag<["-"], "mxsavec">, Group;
 def 

[PATCH] D28018: AMD family 17h (znver1) enablement

2017-01-09 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

If Okay, can you please commit these on my behalf. I don't have write access.


https://reviews.llvm.org/D28018



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28018: AMD family 17h (znver1) enablement

2017-01-09 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added a comment.

Yes. True I mentioned that for the grouping or the order of the features 
enabled. These initFeatureMap are done based on the intrinsics and the CodeGen 
part.


https://reviews.llvm.org/D28018



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28018: AMD family 17h (znver1) enablement

2017-01-09 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh updated this revision to Diff 83626.
GGanesh added a comment.

Fallback to CK_BTVER1 is ok but not to CK_BTVER2. This is not possible because 
of the partial YMM writes. They have different behavior for znver1 with AVX and 
their legacy SIMD counterparts. So, as of now leaving them to alphabetical 
order.


https://reviews.llvm.org/D28018

Files:
  lib/Basic/Targets.cpp


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2651,6 +2651,12 @@
 CK_BDVER4,
 //@}
 
+/// \name zen
+/// Zen architecture processors.
+//@{
+CK_ZNVER1,
+//@}
+
 /// This specification is deprecated and will be removed in the future.
 /// Users should prefer \see CK_K8.
 // FIXME: Warn on this when the CPU is set to it.
@@ -2732,6 +2738,7 @@
 .Case("bdver2", CK_BDVER2)
 .Case("bdver3", CK_BDVER3)
 .Case("bdver4", CK_BDVER4)
+.Case("znver1", CK_ZNVER1)
 .Case("x86-64", CK_x86_64)
 .Case("geode", CK_Geode)
 .Default(CK_Generic);
@@ -2931,6 +2938,7 @@
 case CK_BDVER2:
 case CK_BDVER3:
 case CK_BDVER4:
+case CK_ZNVER1:
 case CK_x86_64:
   return true;
 }
@@ -3178,6 +3186,33 @@
 setFeatureEnabledImpl(Features, "cx16", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 break;
+  case CK_ZNVER1:
+setFeatureEnabledImpl(Features, "adx", true);
+setFeatureEnabledImpl(Features, "aes", true);
+setFeatureEnabledImpl(Features, "avx2", true);
+setFeatureEnabledImpl(Features, "bmi", true);
+setFeatureEnabledImpl(Features, "bmi2", true);
+setFeatureEnabledImpl(Features, "clflushopt", true);
+setFeatureEnabledImpl(Features, "cx16", true);
+setFeatureEnabledImpl(Features, "f16c", true);
+setFeatureEnabledImpl(Features, "fma", true);
+setFeatureEnabledImpl(Features, "fsgsbase", true);
+setFeatureEnabledImpl(Features, "fxsr", true);
+setFeatureEnabledImpl(Features, "lzcnt", true);
+setFeatureEnabledImpl(Features, "mwaitx", true);
+setFeatureEnabledImpl(Features, "movbe", true);
+setFeatureEnabledImpl(Features, "pclmul", true);
+setFeatureEnabledImpl(Features, "popcnt", true);
+setFeatureEnabledImpl(Features, "prfchw", true);
+setFeatureEnabledImpl(Features, "rdrnd", true);
+setFeatureEnabledImpl(Features, "rdseed", true);
+setFeatureEnabledImpl(Features, "sha", true);
+setFeatureEnabledImpl(Features, "sse4a", true);
+setFeatureEnabledImpl(Features, "xsave", true);
+setFeatureEnabledImpl(Features, "xsavec", true);
+setFeatureEnabledImpl(Features, "xsaveopt", true);
+setFeatureEnabledImpl(Features, "xsaves", true);
+break;
   case CK_BDVER4:
 setFeatureEnabledImpl(Features, "avx2", true);
 setFeatureEnabledImpl(Features, "bmi2", true);
@@ -3729,6 +3764,9 @@
   case CK_BDVER4:
 defineCPUMacros(Builder, "bdver4");
 break;
+  case CK_ZNVER1:
+defineCPUMacros(Builder, "znver1");
+break;
   case CK_Geode:
 defineCPUMacros(Builder, "geode");
 break;


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2651,6 +2651,12 @@
 CK_BDVER4,
 //@}
 
+/// \name zen
+/// Zen architecture processors.
+//@{
+CK_ZNVER1,
+//@}
+
 /// This specification is deprecated and will be removed in the future.
 /// Users should prefer \see CK_K8.
 // FIXME: Warn on this when the CPU is set to it.
@@ -2732,6 +2738,7 @@
 .Case("bdver2", CK_BDVER2)
 .Case("bdver3", CK_BDVER3)
 .Case("bdver4", CK_BDVER4)
+.Case("znver1", CK_ZNVER1)
 .Case("x86-64", CK_x86_64)
 .Case("geode", CK_Geode)
 .Default(CK_Generic);
@@ -2931,6 +2938,7 @@
 case CK_BDVER2:
 case CK_BDVER3:
 case CK_BDVER4:
+case CK_ZNVER1:
 case CK_x86_64:
   return true;
 }
@@ -3178,6 +3186,33 @@
 setFeatureEnabledImpl(Features, "cx16", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 break;
+  case CK_ZNVER1:
+setFeatureEnabledImpl(Features, "adx", true);
+setFeatureEnabledImpl(Features, "aes", true);
+setFeatureEnabledImpl(Features, "avx2", true);
+setFeatureEnabledImpl(Features, "bmi", true);
+setFeatureEnabledImpl(Features, "bmi2", true);
+setFeatureEnabledImpl(Features, "clflushopt", true);
+setFeatureEnabledImpl(Features, "cx16", true);
+setFeatureEnabledImpl(Features, "f16c", true);
+setFeatureEnabledImpl(Features, "fma", true);
+setFeatureEnabledImpl(Features, "fsgsbase", true);
+setFeatureEnabledImpl(Features, "fxsr", true);
+setFeatureEnabledImpl(Features, "lzcnt", true);
+setFeatureEnabledImpl(Features, "mwaitx", true);
+setFeatureEnabledImpl(Features, "movbe", true);
+setFeatureEnabledImpl(Features, "pclmul", true);
+

[PATCH] D28018: AMD family 17h (znver1) enablement

2017-01-08 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh added inline comments.



Comment at: lib/Basic/Targets.cpp:3189
 break;
+  case CK_ZNVER1:
+setFeatureEnabledImpl(Features, "adx", true);

RKSimon wrote:
> Same as what I asked on D28017 - is there an accepted order that we should be 
> using here?
Some of them seems to be chronological.
Some of them are alphabetical.

I personally don't have any preference as such.
Alphabetical order suits a long list. 
I would like to know your suggestion.


https://reviews.llvm.org/D28018



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D28018: AMD family 17h (znver1) enablement

2017-01-08 Thread Ganesh Gopalasubramanian via Phabricator via cfe-commits
GGanesh removed rL LLVM as the repository for this revision.
GGanesh updated this revision to Diff 83566.
GGanesh added a comment.

The clzero builtins and feature addition will be handled separately in another 
patch.
SSE4a and movbe are added to the ISA list.


https://reviews.llvm.org/D28018

Files:
  lib/Basic/Targets.cpp


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2651,6 +2651,12 @@
 CK_BDVER4,
 //@}
 
+/// \name zen
+/// Zen architecture processors.
+//@{
+CK_ZNVER1,
+//@}
+
 /// This specification is deprecated and will be removed in the future.
 /// Users should prefer \see CK_K8.
 // FIXME: Warn on this when the CPU is set to it.
@@ -2732,6 +2738,7 @@
 .Case("bdver2", CK_BDVER2)
 .Case("bdver3", CK_BDVER3)
 .Case("bdver4", CK_BDVER4)
+.Case("znver1", CK_ZNVER1)
 .Case("x86-64", CK_x86_64)
 .Case("geode", CK_Geode)
 .Default(CK_Generic);
@@ -2931,6 +2938,7 @@
 case CK_BDVER2:
 case CK_BDVER3:
 case CK_BDVER4:
+case CK_ZNVER1:
 case CK_x86_64:
   return true;
 }
@@ -3178,6 +3186,33 @@
 setFeatureEnabledImpl(Features, "cx16", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 break;
+  case CK_ZNVER1:
+setFeatureEnabledImpl(Features, "adx", true);
+setFeatureEnabledImpl(Features, "aes", true);
+setFeatureEnabledImpl(Features, "avx2", true);
+setFeatureEnabledImpl(Features, "bmi", true);
+setFeatureEnabledImpl(Features, "bmi2", true);
+setFeatureEnabledImpl(Features, "clflushopt", true);
+setFeatureEnabledImpl(Features, "cx16", true);
+setFeatureEnabledImpl(Features, "f16c", true);
+setFeatureEnabledImpl(Features, "fma", true);
+setFeatureEnabledImpl(Features, "fsgsbase", true);
+setFeatureEnabledImpl(Features, "fxsr", true);
+setFeatureEnabledImpl(Features, "lzcnt", true);
+setFeatureEnabledImpl(Features, "mwaitx", true);
+setFeatureEnabledImpl(Features, "movbe", true);
+setFeatureEnabledImpl(Features, "pclmul", true);
+setFeatureEnabledImpl(Features, "popcnt", true);
+setFeatureEnabledImpl(Features, "prfchw", true);
+setFeatureEnabledImpl(Features, "rdrnd", true);
+setFeatureEnabledImpl(Features, "rdseed", true);
+setFeatureEnabledImpl(Features, "sha", true);
+setFeatureEnabledImpl(Features, "sse4a", true);
+setFeatureEnabledImpl(Features, "xsave", true);
+setFeatureEnabledImpl(Features, "xsavec", true);
+setFeatureEnabledImpl(Features, "xsaveopt", true);
+setFeatureEnabledImpl(Features, "xsaves", true);
+break;
   case CK_BDVER4:
 setFeatureEnabledImpl(Features, "avx2", true);
 setFeatureEnabledImpl(Features, "bmi2", true);
@@ -3729,6 +3764,9 @@
   case CK_BDVER4:
 defineCPUMacros(Builder, "bdver4");
 break;
+  case CK_ZNVER1:
+defineCPUMacros(Builder, "znver1");
+break;
   case CK_Geode:
 defineCPUMacros(Builder, "geode");
 break;


Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2651,6 +2651,12 @@
 CK_BDVER4,
 //@}
 
+/// \name zen
+/// Zen architecture processors.
+//@{
+CK_ZNVER1,
+//@}
+
 /// This specification is deprecated and will be removed in the future.
 /// Users should prefer \see CK_K8.
 // FIXME: Warn on this when the CPU is set to it.
@@ -2732,6 +2738,7 @@
 .Case("bdver2", CK_BDVER2)
 .Case("bdver3", CK_BDVER3)
 .Case("bdver4", CK_BDVER4)
+.Case("znver1", CK_ZNVER1)
 .Case("x86-64", CK_x86_64)
 .Case("geode", CK_Geode)
 .Default(CK_Generic);
@@ -2931,6 +2938,7 @@
 case CK_BDVER2:
 case CK_BDVER3:
 case CK_BDVER4:
+case CK_ZNVER1:
 case CK_x86_64:
   return true;
 }
@@ -3178,6 +3186,33 @@
 setFeatureEnabledImpl(Features, "cx16", true);
 setFeatureEnabledImpl(Features, "fxsr", true);
 break;
+  case CK_ZNVER1:
+setFeatureEnabledImpl(Features, "adx", true);
+setFeatureEnabledImpl(Features, "aes", true);
+setFeatureEnabledImpl(Features, "avx2", true);
+setFeatureEnabledImpl(Features, "bmi", true);
+setFeatureEnabledImpl(Features, "bmi2", true);
+setFeatureEnabledImpl(Features, "clflushopt", true);
+setFeatureEnabledImpl(Features, "cx16", true);
+setFeatureEnabledImpl(Features, "f16c", true);
+setFeatureEnabledImpl(Features, "fma", true);
+setFeatureEnabledImpl(Features, "fsgsbase", true);
+setFeatureEnabledImpl(Features, "fxsr", true);
+setFeatureEnabledImpl(Features, "lzcnt", true);
+setFeatureEnabledImpl(Features, "mwaitx", true);
+setFeatureEnabledImpl(Features, "movbe", true);
+setFeatureEnabledImpl(Features, "pclmul", true);
+setFeatureEnabledImpl(Features, "popcnt", true);
+