Hi all,

In ARMv8-A there's a general expectation that AESE/AESMC and AESD/AESIMC sequences of the form:

AESE Vn, _
AESMC Vn, Vn

will issue both instructions in a single cycle on super-scalar implementations. It would be nice to model that in our pipeline descriptions. This patch defines a function to detect such pairs and uses it in the pipeline description for these instructions for the Cortex-A53.

The patch also adds some missed AdvancedSIMD information to the pipeline description for the Cortex-A53.

Bootstrapped and tested on arm-none-linux-gnueabihf and aarch64-none-linux-gnu.

Cortex-A53 scheduling is the default scheduling description on aarch64 so this patch can change default behaviour. That's an argument for taking this in stage1 or maybe backporting it into 4.9.1 once the release is made.

What do people think?

Thanks,
Kyrill


2014-03-25  Kyrylo Tkachov  <kyrylo.tkac...@arm.com>

    * config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New function.
    * config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue): Declare
    extern.
    * config/arm/cortex-a53.md: Add reservations and bypass for crypto
    instructions as well as AdvancedSIMD loads.
commit b89802221229e8ca7fac5fcd6d552392301edde0
Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
Date:   Mon Jan 27 11:29:44 2014 +0000

     Crypto scheduling for A53

diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 056fe56..2b33626 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,7 @@
 #ifndef GCC_AARCH_COMMON_PROTOS_H
 #define GCC_AARCH_COMMON_PROTOS_H
 
+extern int aarch_crypto_can_dual_issue (rtx, rtx);
 extern int arm_early_load_addr_dep (rtx, rtx);
 extern int arm_early_store_addr_dep (rtx, rtx);
 extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index c11f7e9..17c4924 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -31,6 +31,42 @@
 #include "c-family/c-common.h"
 #include "rtl.h"
 
+/* In ARMv8-A there's a general expectation that AESE/AESMC
+   and AESD/AESIMC sequences of the form:
+
+   AESE Vn, _
+   AESMC Vn, Vn
+
+   will issue both instructions in a single cycle on super-scalar
+   implementations.  This function identifies such pairs.  */
+
+int
+aarch_crypto_can_dual_issue (rtx producer, rtx consumer)
+{
+  rtx producer_src, consumer_src;
+
+  producer = single_set (producer);
+  consumer = single_set (consumer);
+
+  producer_src = producer ? SET_SRC (producer) : NULL;
+  consumer_src = consumer ? SET_SRC (consumer) : NULL;
+
+  if (producer_src && consumer_src
+      && GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
+      && ((XINT (producer_src, 1) == UNSPEC_AESE
+           && XINT (consumer_src, 1) == UNSPEC_AESMC)
+          || (XINT (producer_src, 1) == UNSPEC_AESD
+              && XINT (consumer_src, 1) == UNSPEC_AESIMC)))
+  {
+    unsigned int regno = REGNO (SET_DEST (producer));
+
+    return REGNO (SET_DEST (consumer)) == regno
+           && REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
+  }
+
+  return 0;
+}
+
 typedef struct
 {
   rtx_code search_code;
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index deae8eb..b131c81 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -61,6 +61,11 @@
 
 (define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
 
+;; The Advanced SIMD pipelines.
+
+(define_cpu_unit "cortex_a53_simd0" "cortex_a53")
+(define_cpu_unit "cortex_a53_simd1" "cortex_a53")
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; ALU instructions.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -248,6 +253,39 @@
   "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ARMv8-A Cryptographic extensions.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_a53_crypto_aese" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "crypto_aese"))
+  "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_aesmc" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "crypto_aesmc"))
+  "cortex_a53_simd0 | cortex_a53_simd1")
+
+(define_insn_reservation "cortex_a53_crypto_sha1_fast" 2
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "crypto_sha1_fast, crypto_sha256_fast"))
+  "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_sha1_xor" 3
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "crypto_sha1_xor"))
+  "cortex_a53_simd0")
+
+(define_insn_reservation "cortex_a53_crypto_sha_slow" 5
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
+  "cortex_a53_simd0")
+
+(define_bypass 0 "cortex_a53_crypto_aese"
+                 "cortex_a53_crypto_aesmc"
+                 "aarch_crypto_can_dual_issue")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; VFP to/from core transfers.
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -284,6 +322,16 @@
        (eq_attr "type" "f_loadd"))
   "cortex_a53_slot0")
 
+(define_insn_reservation "cortex_a53_f_load_2reg" 5
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "neon_load2_2reg_q"))
+  "(cortex_a53_slot_any+cortex_a53_ls)*2")
+
+(define_insn_reservation "cortex_a53_f_loadq" 5
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "type" "neon_load1_1reg_q"))
+  "cortex_a53_slot_any+cortex_a53_ls")
+
 (define_insn_reservation "cortex_a53_f_stores" 0
   (and (eq_attr "tune" "cortexa53")
        (eq_attr "type" "f_stores"))
@@ -307,3 +355,11 @@
 		  cortex_a53_fdivs, cortex_a53_fdivd,\
 		  cortex_a53_f2r")
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Crude Advanced SIMD approximation.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn_reservation "cortex_53_advsimd" 4
+  (and (eq_attr "tune" "cortexa53")
+       (eq_attr "is_neon_type" "yes"))
+  "cortex_a53_simd0")

Reply via email to