Add the clobber high expressions to tls_desc for aarch64. It also adds three tests.
In addition I also tested by taking the gcc torture test suite and making all global variables __thread. Then emended the suite to compile with -fpic, save the .s file and only for one given O level. I ran this before and after the patch and compared the resulting .s files, ensuring that there were no ASM changes. I discarded the 10% of tests that failed to compile (due to the code in the test now being invalid C). I did this for O0,O2,O3 on both x86 and aarch64 and observed no difference between ASM files before and after the patch. Alan. 2018-07-25 Alan Hayward <alan.hayw...@arm.com> gcc/ * config/aarch64/aarch64.md: Add clobber highs to tls_desc. gcc/testsuite/ * gcc.target/aarch64/sve_tls_preserve_1.c: New test. * gcc.target/aarch64/sve_tls_preserve_2.c: New test. * gcc.target/aarch64/sve_tls_preserve_3.c: New test. --- gcc/config/aarch64/aarch64.md | 69 ++++++++++++++++++---- .../gcc.target/aarch64/sve_tls_preserve_1.c | 19 ++++++ .../gcc.target/aarch64/sve_tls_preserve_2.c | 24 ++++++++ .../gcc.target/aarch64/sve_tls_preserve_3.c | 24 ++++++++ 4 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index e9c16f9697b..a41d6e15bc8 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -57,14 +57,36 @@ (LR_REGNUM 30) (SP_REGNUM 31) (V0_REGNUM 32) + (V1_REGNUM 33) + (V2_REGNUM 34) + (V3_REGNUM 35) (V4_REGNUM 36) + (V5_REGNUM 37) + (V6_REGNUM 38) + (V7_REGNUM 39) (V8_REGNUM 40) + (V9_REGNUM 41) + (V10_REGNUM 42) + (V11_REGNUM 43) (V12_REGNUM 44) + (V13_REGNUM 45) + (V14_REGNUM 46) (V15_REGNUM 47) (V16_REGNUM 48) + (V17_REGNUM 49) + (V18_REGNUM 50) + (V19_REGNUM 51) (V20_REGNUM 52) + (V21_REGNUM 53) + (V22_REGNUM 54) + (V23_REGNUM 55) (V24_REGNUM 56) + (V25_REGNUM 57) + (V26_REGNUM 58) + (V27_REGNUM 59) (V28_REGNUM 60) + (V29_REGNUM 61) + (V30_REGNUM 62) (V31_REGNUM 63) (LAST_SAVED_REGNUM 63) (SFP_REGNUM 64) @@ -6302,24 +6324,47 @@ [(set_attr "type" "call") (set_attr "length" "16")]) -;; For SVE, model tlsdesc calls as clobbering all vector and predicate -;; registers, on top of the usual R0 and LR. In reality the calls -;; preserve the low 128 bits of the vector registers, but we don't -;; yet have a way of representing that in the instruction pattern. +;; For SVE, model tlsdesc calls as clobbering the lower 128 bits of +;; all vector registers, and clobber all predicate registers, on +;; top of the usual R0 and LR. (define_insn "tlsdesc_small_sve_<mode>" [(set (reg:PTR R0_REGNUM) (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) (clobber (reg:CC CC_REGNUM)) - (clobber (reg:XI V0_REGNUM)) - (clobber (reg:XI V4_REGNUM)) - (clobber (reg:XI V8_REGNUM)) - (clobber (reg:XI V12_REGNUM)) - (clobber (reg:XI V16_REGNUM)) - (clobber (reg:XI V20_REGNUM)) - (clobber (reg:XI V24_REGNUM)) - (clobber (reg:XI V28_REGNUM)) + (clobber_high (reg:TI V0_REGNUM)) + (clobber_high (reg:TI V1_REGNUM)) + (clobber_high (reg:TI V2_REGNUM)) + (clobber_high (reg:TI V3_REGNUM)) + (clobber_high (reg:TI V4_REGNUM)) + (clobber_high (reg:TI V5_REGNUM)) + (clobber_high (reg:TI V6_REGNUM)) + (clobber_high (reg:TI V7_REGNUM)) + (clobber_high (reg:TI V8_REGNUM)) + (clobber_high (reg:TI V9_REGNUM)) + (clobber_high (reg:TI V10_REGNUM)) + (clobber_high (reg:TI V11_REGNUM)) + (clobber_high (reg:TI V12_REGNUM)) + (clobber_high (reg:TI V13_REGNUM)) + (clobber_high (reg:TI V14_REGNUM)) + (clobber_high (reg:TI V15_REGNUM)) + (clobber_high (reg:TI V16_REGNUM)) + (clobber_high (reg:TI V17_REGNUM)) + (clobber_high (reg:TI V18_REGNUM)) + (clobber_high (reg:TI V19_REGNUM)) + (clobber_high (reg:TI V20_REGNUM)) + (clobber_high (reg:TI V21_REGNUM)) + (clobber_high (reg:TI V22_REGNUM)) + (clobber_high (reg:TI V23_REGNUM)) + (clobber_high (reg:TI V24_REGNUM)) + (clobber_high (reg:TI V25_REGNUM)) + (clobber_high (reg:TI V26_REGNUM)) + (clobber_high (reg:TI V27_REGNUM)) + (clobber_high (reg:TI V28_REGNUM)) + (clobber_high (reg:TI V29_REGNUM)) + (clobber_high (reg:TI V30_REGNUM)) + (clobber_high (reg:TI V31_REGNUM)) (clobber (reg:VNx2BI P0_REGNUM)) (clobber (reg:VNx2BI P1_REGNUM)) (clobber (reg:VNx2BI P2_REGNUM)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c new file mode 100644 index 00000000000..3bb1725e5e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve" } */ + +/* Clobber highs do not need to be spilled around tls usage. */ + +typedef float v4si __attribute__ ((vector_size (16))); + +__thread v4si tx; + +v4si foo (v4si a, v4si b, v4si c) +{ + v4si y; + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-not {\tst[rp]\t[dqv]} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c new file mode 100644 index 00000000000..69e8829287b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve -msve-vector-bits=256 -fno-schedule-insns" } */ + +/* Clobber highs must be spilled around tls usage. */ + +typedef float v8si __attribute__ ((vector_size (32))); + +__thread v8si tx; + +v8si foo (v8si a, v8si b, v8si c) +{ + v8si y; + + /* There is nothing stopping the compiler from making the tls call before + loading the input variables off the stack. However, there appears to + be no way in C of enforcing this. Thankfully the compiler doesn't + do this reordering. */ + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+,} 3 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c new file mode 100644 index 00000000000..b6aa59a3c73 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve_tls_preserve_3.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fpic -march=armv8-a+sve -msve-vector-bits=512 -fno-schedule-insns" } */ + +/* Clobber highs must be spilled around tls usage. */ + +typedef float v16si __attribute__ ((vector_size (64))); + +__thread v16si tx; + +v16si foo (v16si a, v16si b, v16si c) +{ + v16si y; + + /* There is nothing stopping the compiler from making the tls call before + loading the input variables off the stack. However, there appears to + be no way in C of enforcing this. Thankfully the compiler doesn't + do this reordering. */ + + y = a + tx + b + c; + + return y + 7; +} + +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+,} 3 } } */ -- 2.15.2 (Apple Git-101.1)