Hi Richard,

> - Why do we rewrite the constant moves after reload into ldr_got_small_sidi
>   and ldr_got_small_<mode>?  Couldn't we just get the move patterns to
>   output the sequence directly?

That's possible too, however it makes the movsi/di patterns more complex.
See version v4 below.

> - I think we should leave out the rtx_costs part and deal with that
>   separately.  This patch should just be about whether we emit two
>   separate define_insns for the move or whether we keep a single one
>   (to support relaxation).

As the title and description explain, code quality improves significantly by
keeping the instructions together before we even consider linker relaxation.
The cost improvements can be done separately but they are important to
get the measured code quality gains.

Here is v4:

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-11-02  Wilco Dijkstra  <wdijk...@arm.com>

        * config/aarch64/aarch64.md (movsi): Add alternative for GOT accesses.
        (movdi): Likewise.
        (ldr_got_small_<mode>): Remove pattern.
        (ldr_got_small_sidi): Likewise.
        * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Keep
        GOT accesses as moves.
        (aarch64_print_operand): Correctly print got_lo12 in L specifier.
        (aarch64_mov_operand_p): Make GOT accesses valid move operands.
        * config/aarch64/constraints.md: Add new constraint Usw for GOT access.

---
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
6d26218a61bd0f9e369bb32388b7f8643b632172..ab2954d11333b3f5a419c55d0a74f13d1df70680
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3753,47 +3753,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
       }
 
     case SYMBOL_SMALL_GOT_4G:
-      {
-       /* In ILP32, the mode of dest can be either SImode or DImode,
-          while the got entry is always of SImode size.  The mode of
-          dest depends on how dest is used: if dest is assigned to a
-          pointer (e.g. in the memory), it has SImode; it may have
-          DImode if dest is dereferenced to access the memeory.
-          This is why we have to handle three different ldr_got_small
-          patterns here (two patterns for ILP32).  */
-
-       rtx insn;
-       rtx mem;
-       rtx tmp_reg = dest;
-       machine_mode mode = GET_MODE (dest);
-
-       if (can_create_pseudo_p ())
-         tmp_reg = gen_reg_rtx (mode);
-
-       emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
-       if (mode == ptr_mode)
-         {
-           if (mode == DImode)
-             insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
-           else
-             insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-           mem = XVECEXP (SET_SRC (insn), 0, 0);
-         }
-       else
-         {
-           gcc_assert (mode == Pmode);
-
-           insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-           mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
-         }
-
-       gcc_assert (MEM_P (mem));
-       MEM_READONLY_P (mem) = 1;
-       MEM_NOTRAP_P (mem) = 1;
-       emit_insn (insn);
-       return;
-      }
+      emit_insn (gen_rtx_SET (dest, imm));
+      return;
 
     case SYMBOL_SMALL_TLSGD:
       {
@@ -11159,7 +11120,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       switch (aarch64_classify_symbolic_expression (x))
        {
        case SYMBOL_SMALL_GOT_4G:
-         asm_fprintf (asm_out_file, ":lo12:");
+         asm_fprintf (asm_out_file, ":got_lo12:");
          break;
 
        case SYMBOL_SMALL_TLSGD:
@@ -20241,6 +20202,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
       return aarch64_simd_valid_immediate (x, NULL);
     }
 
+  /* GOT accesses are valid moves.  */
+  if (SYMBOL_REF_P (x)
+      && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+    return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
7085cd4a51dc4c22def9b95f2221b3847603a9e5..9d38269e9429597b825838faf9f241216cc6ab47
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1263,8 +1263,8 @@ (define_expand "mov<mode>"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  
r,  r, w,r,w, w")
-       (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  
r,  r,  r, w,r,w, w")
+       (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
   "@
@@ -1278,6 +1278,7 @@ (define_insn_and_split "*movsi_aarch64"
    ldr\\t%s0, %1
    str\\t%w1, %0
    str\\t%s1, %0
+   * return \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]\";
    adr\\t%x0, %c1
    adrp\\t%x0, %A1
    fmov\\t%s0, %w1
@@ -1293,13 +1294,15 @@ (define_insn_and_split "*movsi_aarch64"
     }"
   ;; The "mov_imm" type for CNT is just a placeholder.
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
-                   load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
+                   
load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
+   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
+]
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,  
r,  r, w,r,w, w")
-       (match_operand:DI 1 "aarch64_mov_operand"  " 
r,r,k,N,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m,  
 r,  r,  r, w,r,w, w")
+       (match_operand:DI 1 "aarch64_mov_operand"  " 
r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   "@
@@ -1314,13 +1317,14 @@ (define_insn_and_split "*movdi_aarch64"
    ldr\\t%d0, %1
    str\\t%x1, %0
    str\\t%d1, %0
+   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : 
\"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
    adr\\t%x0, %c1
    adrp\\t%x0, %A1
    fmov\\t%d0, %x1
    fmov\\t%x0, %d1
    fmov\\t%d0, %d1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
DImode))
+   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
DImode)
     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
    [(const_int 0)]
    "{
@@ -1329,9 +1333,10 @@ (define_insn_and_split "*movdi_aarch64"
     }"
   ;; The "mov_imm" type for CNTD is just a placeholder.
   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
-                    load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,
-                    neon_move")
-   (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
+                    load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
+                    fmov,neon_move")
+   (set_attr "arch"   "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+   (set_attr "length" "4,4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
 )
 
 (define_insn "insv_imm<mode>"
@@ -6707,29 +6712,6 @@ (define_insn "add_losym_<mode>"
   [(set_attr "type" "alu_imm")]
 )
 
-(define_insn "ldr_got_small_<mode>"
-  [(set (match_operand:PTR 0 "register_operand" "=r")
-       (unspec:PTR [(mem:PTR (lo_sum:PTR
-                             (match_operand:PTR 1 "register_operand" "r")
-                             (match_operand:PTR 2 "aarch64_valid_symref" 
"S")))]
-                   UNSPEC_GOTSMALLPIC))]
-  ""
-  "ldr\\t%<w>0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_<ldst_sz>")]
-)
-
-(define_insn "ldr_got_small_sidi"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI
-        (unspec:SI [(mem:SI (lo_sum:DI
-                            (match_operand:DI 1 "register_operand" "r")
-                            (match_operand:DI 2 "aarch64_valid_symref" "S")))]
-                   UNSPEC_GOTSMALLPIC)))]
-  "TARGET_ILP32"
-  "ldr\\t%w0, [%1, #:got_lo12:%c2]"
-  [(set_attr "type" "load_4")]
-)
-
 (define_insn "ldr_got_small_28k_<mode>"
   [(set (match_operand:PTR 0 "register_operand" "=r")
        (unspec:PTR [(mem:PTR (lo_sum:PTR
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 
3b49b452119c49320020fa9183314d9a25b92491..985fa2217e6a044b1eb2adc886864f63a1f9f9b2
 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -152,6 +152,14 @@ (define_constraint "Usa"
        (match_test "aarch64_symbolic_address_p (op)")
        (match_test "aarch64_mov_operand_p (op, GET_MODE (op))")))
 
+(define_constraint "Usw"
+  "@internal
+   A constraint that matches a small GOT access."
+  (and (match_code "symbol_ref")
+       (match_test "aarch64_symbolic_address_p (op)")
+       (match_test "aarch64_classify_symbolic_expression (op)
+                    == SYMBOL_SMALL_GOT_4G")))
+
 (define_constraint "Uss"
   "@internal
   A constraint that matches an immediate shift constant in SImode."

Reply via email to