Re: [SH] PR 50751 - Add support for SH2A movu.b and movu.w insns

2012-08-14 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 This adds support for the SH2A instructions movu.b and movu.w for
 zero-extending mem loads with displacement addressing.
 Tested on rev 190332 with
 make -k check RUNTESTFLAGS=--target_board=sh-sim
 \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}
 
 and no new failures.
 OK?

OK.

Regards,
kaz


[SH] PR 50751 - Add support for SH2A movu.b and movu.w insns

2012-08-13 Thread Oleg Endo
Hello,

This adds support for the SH2A instructions movu.b and movu.w for
zero-extending mem loads with displacement addressing.
Tested on rev 190332 with
make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}

and no new failures.
OK?

Cheers,
Oleg

ChangeLog:

PR target/50751
* config/sh/constraints.md (Sra): New constraint.
* config/sh/predicates.md (simple_mem_operand, 
displacement_mem_operand, zero_extend_movu_operand): New 
predicates.
(zero_extend_operand): Check zero_extend_movu_operand for SH2A.
* config/sh/sh.md (*zero_extendqisi2_disp_mem, 
*zero_extendhisi2_disp_mem): Add new insns and two new related 
peephole2 patterns.

testsuite/ChangeLog:

PR target/50751
* gcc.target/sh/pr50751-8.c: New.
Index: gcc/config/sh/constraints.md
===
--- gcc/config/sh/constraints.md	(revision 190332)
+++ gcc/config/sh/constraints.md	(working copy)
@@ -49,6 +49,7 @@
 ;;  Sbw: QImode address with 12 bit displacement
 ;;  Snd: address without displacement
 ;;  Sdd: address with displacement
+;;  Sra: simple register address
 ;; W: vector
 ;; Z: zero in any mode
 ;;
@@ -307,3 +308,8 @@
(match_test GET_MODE (op) == QImode)
(match_test satisfies_constraint_K12 (XEXP (XEXP (op, 0), 1)
 
+(define_memory_constraint Sra
+  A memory reference that uses a simple register addressing.
+  (and (match_test MEM_P (op))
+   (match_test REG_P (XEXP (op, 0)
+
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 190332)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4842,6 +4842,88 @@
   extu.b	%1,%0
   [(set_attr type arith)])
 
+;; SH2A supports two zero extending load instructions: movu.b and movu.w.
+;; They could also be used for simple memory addresses like @Rn by setting
+;; the displacement value to zero.  However, doing so too early results in
+;; missed opportunities for other optimizations such as post-inc or index
+;; addressing loads.
+;; Although the 'zero_extend_movu_operand' predicate does not allow simple
+;; register addresses (an address without a displacement, index, post-inc),
+;; zero-displacement addresses might be generated during reload, wich are
+;; simplified to simple register addresses in turn.  Thus, we have to
+;; provide the Sdd and Sra alternatives in the patterns.
+(define_insn *zero_extendqisi2_disp_mem
+  [(set (match_operand:SI 0 arith_reg_dest =r,r)
+	(zero_extend:SI
+	  (match_operand:QI 1 zero_extend_movu_operand Sdd,Sra)))]
+  TARGET_SH2A
+  @
+	movu.b	%1,%0
+	movu.b	@(0,%t1),%0
+  [(set_attr type load)
+   (set_attr length 4)])
+
+(define_insn *zero_extendhisi2_disp_mem
+  [(set (match_operand:SI 0 arith_reg_dest =r,r)
+	(zero_extend:SI
+	  (match_operand:HI 1 zero_extend_movu_operand Sdd,Sra)))]
+  TARGET_SH2A
+  @
+	movu.w	%1,%0
+	movu.w	@(0,%t1),%0
+  [(set_attr type load)
+   (set_attr length 4)])
+
+;; Convert the zero extending loads in sequences such as:
+;;	movu.b	@(1,r5),r0	movu.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; back to sign extending loads like:
+;;	mov.b	@(1,r5),r0	mov.w	@(2,r5),r0
+;;	mov.b	r0,@(1,r4)	mov.b	r0,@(1,r4)
+;;
+;; if the extension type is irrelevant.  The sign extending mov.{b|w} insn
+;; is only 2 bytes in size if the displacement is {K04|K05}.
+;; If the displacement is greater it doesn't matter, so we convert anyways.
+(define_peephole2
+  [(set (match_operand:SI 0 arith_reg_dest )
+	(zero_extend:SI (match_operand 1 displacement_mem_operand )))
+   (set (match_operand 2 general_operand )
+	(match_operand 3 arith_reg_operand ))]
+  TARGET_SH2A
+REGNO (operands[0]) == REGNO (operands[3])
+peep2_reg_dead_p (2, operands[0])
+GET_MODE_SIZE (GET_MODE (operands[2]))
+  = GET_MODE_SIZE (GET_MODE (operands[1]))
+  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))
+   (set (match_dup 2) (match_dup 3))])
+
+;; Fold sequences such as
+;;	mov.b	@r3,r7
+;;	extu.b	r7,r7
+;; into
+;;	movu.b	@(0,r3),r7
+;; This does not reduce the code size but the number of instructions is
+;; halved, which results in faster code.
+(define_peephole2
+  [(set (match_operand:SI 0 arith_reg_dest )
+	(sign_extend:SI (match_operand 1 simple_mem_operand )))
+   (set (match_operand:SI 2 arith_reg_dest )
+	(zero_extend:SI (match_operand 3 arith_reg_operand )))]
+  TARGET_SH2A
+GET_MODE (operands[1]) == GET_MODE (operands[3])
+(GET_MODE (operands[1]) == QImode || GET_MODE (operands[1]) == HImode)
+REGNO (operands[0]) == REGNO (operands[3])
+(REGNO (operands[2]) == REGNO (operands[0])
+   || peep2_reg_dead_p (2, operands[0]))
+  [(set (match_dup 2) (zero_extend:SI (match_dup 4)))]
+{
+  operands[4]
+= replace_equiv_address (operands[1],
+			 gen_rtx_PLUS (SImode, XEXP (operands[1], 0),
+	   const0_rtx));
+})
+
 ;; 

Re: [SH] PR 50751

2012-08-09 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 This patch fixes a minor issue related to the displacement addressing
 patterns, which leads to useless movt exts.* sequences and one of the
 predicates wrongly accepting non-mem ops.
 
 Tested on rev 190151 with
  make -k check RUNTESTFLAGS=--target_board=sh-sim
 \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}
 
 and no new failures.
 OK?

OK.

Regards,
kaz


[SH] PR 50751

2012-08-08 Thread Oleg Endo
Hello,

This patch fixes a minor issue related to the displacement addressing
patterns, which leads to useless movt exts.* sequences and one of the
predicates wrongly accepting non-mem ops.

Tested on rev 190151 with
 make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}

and no new failures.
OK?

Cheers,
Oleg

ChangeLog:

PR target/50751
* config/sh/sh.md (*extendqisi2_compact_reg, 
*extendhisi2_compact_reg): Use arith_reg_operand predicate 
instead of register_operand.
* config/sh/predicates.md (movsrc_no_disp_mem_operand): Accept 
only mem, simplify.
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 190151)
+++ gcc/config/sh/sh.md	(working copy)
@@ -4819,14 +4819,14 @@
 
 (define_insn *extendqisi2_compact_reg
   [(set (match_operand:SI 0 arith_reg_dest =r)
-	(sign_extend:SI (match_operand:QI 1 register_operand r)))]
+	(sign_extend:SI (match_operand:QI 1 arith_reg_operand r)))]
   TARGET_SH1
   exts.b	%1,%0
   [(set_attr type arith)])
 
 (define_insn *extendhisi2_compact_reg
   [(set (match_operand:SI 0 arith_reg_dest =r)
-	(sign_extend:SI (match_operand:HI 1 register_operand r)))]
+	(sign_extend:SI (match_operand:HI 1 arith_reg_operand r)))]
   TARGET_SH1
   exts.w	%1,%0
   [(set_attr type arith)])
Index: gcc/config/sh/predicates.md
===
--- gcc/config/sh/predicates.md	(revision 190151)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -428,28 +428,12 @@
   return general_operand (op, mode);
 })
 
-;; Same as movsrc_operand, but rejects displacement addressing.
+;; Returns 1 if OP is a MEM that does not use displacement addressing.
 
 (define_predicate movsrc_no_disp_mem_operand
-  (match_code subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,const,const_vector)
+  (match_code mem)
 {
-  if (!general_movsrc_operand (op, mode))
-return 0;
-
-  if ((mode == QImode || mode == HImode)
-   mode == GET_MODE (op)
-   (MEM_P (op)
-	  || (GET_CODE (op) == SUBREG  MEM_P (SUBREG_REG (op)
-{
-  rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0);
-
-  if (GET_CODE (x) == PLUS
-	   REG_P (XEXP (x, 0))
-	   CONST_INT_P (XEXP (x, 1)))
-	return 0;
-}
-
-  return 1;
+  return general_movsrc_operand (op, mode)  satisfies_constraint_Snd (op);
 })
 
 ;; Returns 1 if OP can be a destination of a move. Same as


Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-11 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 BTW, do you have the numbers of CSiBE with this?
 
 
 Only for -m4-single -ml -O2 -mpretend-cmove so far.
 Not so spectacular :T
 I'll also do a comparison of more variants to see if something went
 really bad.  It's a bit difficult to isolate the degradations because
 there's quite some code reordering happening after the patch... 

Thanks for numbers!  Looks good considering that HImode would be
less frequently used than QImode in the usual working set.

Regards,
kaz


Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-11 Thread Oleg Endo
On Wed, 2012-04-11 at 22:10 +0900, Kaz Kojima wrote:
 Oleg Endo oleg.e...@t-online.de wrote:
  BTW, do you have the numbers of CSiBE with this?
  
  
  Only for -m4-single -ml -O2 -mpretend-cmove so far.
  Not so spectacular :T
  I'll also do a comparison of more variants to see if something went
  really bad.  It's a bit difficult to isolate the degradations because
  there's quite some code reordering happening after the patch... 
 
 Thanks for numbers!  Looks good considering that HImode would be
 less frequently used than QImode in the usual working set.
 
Sure, no problem.
I think there is some room for improvement in the
'sh_find_mov_disp_adjust' function.  If it was a bit smarter, all the
displacement move insns would instantly benefit from it.

Cheers,
Oleg



[SH] PR 50751- add HImode testcases

2012-04-11 Thread Oleg Endo
Hi,

The attached patch adds some more test cases for PR 50751.

Tested on sh-sim with 
make check-gcc RUNTESTFLAGS=sh.exp=pr50751* --target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a-single/-mb,-m4-single/-ml,
-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}

to confirm that the tests pass as expected.
OK?

Cheers,
Oleg

testsuite/ChangeLog:

PR target/50751
* gcc/target/sh/pr50751-4.c: New.
* gcc/target/sh/pr50751-5.c: New.
* gcc/target/sh/pr50751-6.c: New.
* gcc/target/sh/pr50751-7.c: New.

Index: gcc/testsuite/gcc.target/sh/pr50751-4.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-4.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-4.c	(revision 0)
@@ -0,0 +1,30 @@
+/* Check that the mov.w displacement addressing insn is generated.
+   If the insn is generated as expected, there should be no address 
+   calculations outside the mov insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const short* ap, short* bp, short val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
+void
+testfunc_01 (volatile const short* ap, volatile short* bp, short val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-5.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-5.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-5.c	(revision 0)
@@ -0,0 +1,27 @@
+/* Check that the mov.w displacement addressing insn is generated and the 
+   base address is adjusted only once.  On SH2A this test is skipped because
+   there is a 4 byte mov.w insn that can handle larger displacements.  Thus
+   on SH2A the base address will not be adjusted in this case.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5* -m2a* } {  } } */
+/* { dg-final { scan-assembler-times add 2 } } */
+
+void
+testfunc_00 (const short* ap, short* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const short* ap, volatile short* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-6.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-6.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-6.c	(revision 0)
@@ -0,0 +1,26 @@
+/* Check that on SH2A the 4 byte mov.w displacement insn is generated to
+   handle larger displacements.  If it is generated correctly, there should
+   be no base address adjustments outside the mov.w insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { * } { -m2a* } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const short* ap, short* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const short* ap, volatile short* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-7.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-7.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-7.c	(revision 0)
@@ -0,0 +1,35 @@
+/* Check that mov.b and mov.w displacement insns are generated.
+   If this is working properly, there should be no base address adjustments
+   outside the mov insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+typedef struct 
+{
+  char	a;
+  char	b;
+  char	c;
+  char	d;
+
+  short e;
+  short f;
+
+  int g;
+  int h;
+} X;
+
+void
+testfunc_00 (X* x)
+{
+  x-g = x-b | x-c;
+  x-h = x-e | x-f;
+  x-d = x-g;
+  x-f = x-h;
+}
+
+int testfunc_01 (X* x)
+{
+  return x-b | x-e | x-g;
+}


Re: [SH] PR 50751- add HImode testcases

2012-04-11 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch adds some more test cases for PR 50751.
 
 Tested on sh-sim with 
 make check-gcc RUNTESTFLAGS=sh.exp=pr50751* --target_board=sh-sim
 \{-m2/-ml,-m2/-mb,-m2a-single/-mb,-m4-single/-ml,
 -m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}
 
 to confirm that the tests pass as expected.
 OK?

OK.

Regards,
kaz


Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-10 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch adds HImode addressing support.
 Tested against rev. 186243 with
 sudo make -k check RUNTESTFLAGS=--target_board=sh-sim
 \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
 -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}
 
 and no new failures.
 Test cases will follow soon.

The patch is OK for trunk.

BTW, do you have the numbers of CSiBE with this?

Regards,
kaz


Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-10 Thread Oleg Endo
On Tue, 2012-04-10 at 22:42 +0900, Kaz Kojima wrote:
 Oleg Endo oleg.e...@t-online.de wrote:
  The attached patch adds HImode addressing support.
  Tested against rev. 186243 with
  sudo make -k check RUNTESTFLAGS=--target_board=sh-sim
  \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
  -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}
  
  and no new failures.
  Test cases will follow soon.
 
 The patch is OK for trunk.
 
 BTW, do you have the numbers of CSiBE with this?
 

Only for -m4-single -ml -O2 -mpretend-cmove so far.
Not so spectacular :T
I'll also do a comparison of more variants to see if something went
really bad.  It's a bit difficult to isolate the degradations because
there's quite some code reordering happening after the patch... 

Cheers,
Oleg



OpenTCP-1.0.4
  arp 2089 - 2061 -28 / -1.340354 %
  bootp/bootp  740 - 704  -36 / -4.864865 %
  demo/main_demo   372 - 368   -4 / -1.075269 %
  demo/tcp_client_demo 396 - 396 
  demo/tcp_server_demo 468 - 468 
  demo/udp_demo268 - 268 
  dhcp/dhcpc  1588 - 1588
  dns/dns 1340 - 1328 -12 / -0.895522 %
  ethernet1240 - 1228 -12 / -0.967742 %
  http/http_server1784 - 1764 -20 / -1.121076 %
  http/https_callbacks 656 - 656 
  icmp 348 - 352   +4 / +1.149425 %
  ip  1780 - 1668-112 / -6.292135 %
  pop3/pop3_client3796 - 3728 -68 / -1.791359 %
  pop3/pop3c_callbacks  28 - 28  
  smtp/smtp_client2284 - 2212 -72 / -3.152364 %
  smtp/smtpc_callbacks  32 - 32  
  system   980 - 980 
  tcp 4966 - 4970  +4 / +0.080548 %
  tftp/tftps  1024 - 920 -104 / -10.156250 %
  timers   228 - 228 
  udp 1362 - 1322 -40 / -2.936858 %

 total: 27769 - 27269  -500 / -1.800569 %


bzip2-1.0.2
  blocksort   7262 - 7258  -4 / -0.055081 %
  bzip2  17288 - 17248-40 / -0.231374 %
  bzip2recover3868 - 3868
  bzlib  10668 - 10636-32 / -0.299963 %
  compress   14208 - 14192-16 / -0.112613 %
  crctable1024 - 1024
  decompress  8380 - 8380
  huffman 1436 - 1436
  randtable   2048 - 2048

 total: 66182 - 66090  -92 / -0.139011 %


cg_compiler_opensrc
  atom3728 - 3728
  binding 1752 - 1728 -24 / -1.369863 %
  cgcmain 2388 - 2388
  cgstruct 128 - 128 
  check   3300 - 3300
  compile14052 - 14052   
  constfold   3996 - 3996
  cpp 6776 - 6768  -8 / -0.118064 %
  generic_hal 2516 - 2516
  hal 2720 - 2720
  inline  2724 - 2724
  memory   548 - 548 
  parser 15796 - 15796   
  printutils 11888 - 11888   
  scanner 6716 - 6672 -44 / -0.655152 %
  semantic4364 - 4364
  stdlib  3224 - 3224
  support24328 - 24332 +4 / +0.016442 %
  support_iter1832 - 1832
  symbols 6324 - 6312 -12 / -0.189753 %
  tokenize 624 - 624 
  tokens  3112 - 3120  +8 / +0.257069 %

 total:122836 - 122760 -76 / -0.061871 %


compiler
  cg 2868 - 2868
  main   2600 - 2600
  parser 4524 - 4528  +4 / +0.088417 %
  scanner3788 - 3788
  vam5264 - 5264
  vas6292 - 6300  +8 / +0.127146 %

 total: 25336 - 25348  +12 / +0.047363 %


flex-2.5.31
  buf  1100 - 1100
  ccl  1220 - 1220
  dfa  6952 - 6952
  ecs   612 - 612 
  filter   2360 - 2360
  gen 24512 - 24488-24 / -0.097911 %
  libmain28 - 28  
  libyywrap   4 - 4   
  main19804 - 19792-12 / -0.060594 %
  misc 6480 - 6480
  nfa  3812 - 3812
  options  2768 - 2768
  parse9524 - 9528  +4 / +0.041999 %
  regex 596 - 596 
  scan47012 - 47004 -8 / -0.017017 %
  scanopt  3672 - 3672
  skel96652 - 96652   
  sym  1012 - 1012
  tables   2080 - 2064 -16 / -0.769231 %
  tables_shared  28 - 28  
  tblcmp   3764 - 3764
  yylex1292 - 1292

 total:235284 - 235228 -56 / -0.023801 %


jikespg-1.3
  src/ctabs  49912 - 49876-36 / -0.072127 %
  src/globals  288 - 288 
  src/lpgparse   47340 - 47036   -304 / -0.642163 %
  src/lpgutil 5152 - 5120 -32 / -0.621118 %
  src/main5440 - 5440
  src/mkfirst

Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-10 Thread Nathan Froyd
- Original Message -
  BTW, do you have the numbers of CSiBE with this?
 
 Only for -m4-single -ml -O2 -mpretend-cmove so far.
 Not so spectacular :T
 I'll also do a comparison of more variants to see if something went
 really bad.  It's a bit difficult to isolate the degradations because
 there's quite some code reordering happening after the patch...

Are you looking at execution time or code size?  If the latter, you could try 
disabling scheduling for comparison purposes (-fno-schedule-insns 
-fno-schedule-insns2).  Even if you're looking at execution time, disabling 
scheduling might make it easier to see where things are going wrong in the 
patched code.

-Nathan


Re: [SH] PR 50751 - add HImode displacement addressing support

2012-04-10 Thread Oleg Endo
On Tue, 2012-04-10 at 07:14 -0700, Nathan Froyd wrote:
 - Original Message -
   BTW, do you have the numbers of CSiBE with this?
  
  Only for -m4-single -ml -O2 -mpretend-cmove so far.
  Not so spectacular :T
  I'll also do a comparison of more variants to see if something went
  really bad.  It's a bit difficult to isolate the degradations because
  there's quite some code reordering happening after the patch...
 
 Are you looking at execution time or code size?  If the latter, you
 could try disabling scheduling for comparison purposes
 (-fno-schedule-insns -fno-schedule-insns2).  Even if you're looking at
 execution time, disabling scheduling might make it easier to see where
 things are going wrong in the patched code.

Code size only.  Thanks for the hint, will try it out.

Cheers,
Oleg



[SH] PR 50751 - add HImode displacement addressing support

2012-04-09 Thread Oleg Endo
Hello,

The attached patch adds HImode addressing support.
Tested against rev. 186243 with
sudo make -k check RUNTESTFLAGS=--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
-m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}

and no new failures.
Test cases will follow soon.

Cheers,
Oleg

ChangeLog:

PR target/50751
* config/sh/sh-protos.h (sh_legitimate_index_p): Add
new arguments consider_sh2a and allow_zero.
* config/sh/sh.c (sh_legitimate_index_p): Likewise.
(disp_addr_displacement): New function.
(sh_address_cost): Use disp_addr_displacement function instead
of DISP_ADDR_OFFSET.
(sh_legitimate_address_p): Adapt to changed
sh_legitimate_index_p declaration.
(sh_find_mov_disp_adjust): Remove HImode check.
(sh_secondary_reload): Add HImode case.  Use 
satisfies_constraint_Sdd, disp_addr_displacement and
max_mov_insn_displacement.
(max_mov_insn_displacement): Remove HImode check.
* config/sh/sh.h (CONST_OK_FOR_K04, CONST_OK_FOR_K12, 
DISP_ADDR_P, DISP_ADDR_OFFSET): Remove.
* config/sh/constraints.md (K05, K13): New constraints.
(K12): Correct comment.
(Sdd): Do not use DISP_ADDR_P macro.
(Snd): Use satisfies_constraint_Sdd.
(Sbw): Likewise.
* config/sh/sh.md (extendhisi2): Remove constraints from
expander.
(*extendhisi2_compact, movhi_i): Remove.
(*extendhisi2_compact_reg, *extendhisi2_compact_mem_disp,
*extendhisi2_compact_mem_disp, *extendhisi2_compact_snd,
*movhi_reg_reg, *movhi_store_mem_disp05,
*movhi_store_mem_disp13, *movhi_load_mem_disp, 
*movhi_load_mem_disp, *movhi): New insns.
(*extendqisi2_compact_mem_disp, *extendqisi2_compact_mem_disp,
*movqi_store_mem_disp04, *movqi_store_mem_disp12,
*movqi_load_mem_disp, *movqi_load_mem_disp): Use 
sh_legitimate_index_p instead of CONST_OK_FOR_Kxx.
Add new peepholes for HImode displacement addressing.
Index: gcc/config/sh/predicates.md
===
--- gcc/config/sh/predicates.md	(revision 186233)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -404,7 +404,7 @@
   if (GET_CODE (x) == PLUS
 	   REG_P (XEXP (x, 0))
 	   CONST_INT_P (XEXP (x, 1)))
-	return sh_legitimate_index_p (mode, XEXP (x, 1));
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
 }
 
   if (TARGET_SHMEDIA
@@ -466,7 +466,7 @@
   if (GET_CODE (x) == PLUS
 	   REG_P (XEXP (x, 0))
 	   CONST_INT_P (XEXP (x, 1)))
-	return sh_legitimate_index_p (mode, XEXP (x, 1));
+	return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
 }
 
   return general_operand (op, mode);
Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h	(revision 186233)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -56,7 +56,7 @@
 extern bool fp_zero_operand (rtx);
 extern bool fp_one_operand (rtx);
 extern rtx get_fpscr_rtx (void);
-extern bool sh_legitimate_index_p (enum machine_mode, rtx);
+extern bool sh_legitimate_index_p (enum machine_mode, rtx, bool, bool);
 extern bool sh_legitimize_reload_address (rtx *, enum machine_mode, int, int);
 extern rtx legitimize_pic_address (rtx, enum machine_mode, rtx);
 extern bool nonpic_symbol_mentioned_p (rtx);
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 186233)
+++ gcc/config/sh/sh.c	(working copy)
@@ -304,6 +304,7 @@
 static int mov_insn_size (enum machine_mode, bool);
 static int max_mov_insn_displacement (enum machine_mode, bool);
 static int mov_insn_alignment_mask (enum machine_mode, bool);
+static HOST_WIDE_INT disp_addr_displacement (rtx);
 
 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
 
@@ -3160,11 +3161,6 @@
  scale the max. displacement value accordingly.  */
   const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
 
-  /* FIXME: HImode with displacement addressing is not supported yet.
- Make it purposefully fail for now.  */
-  if (mode == HImode)
-return 0;
-
   /* SH2A supports FPU move insns with 12 bit displacements.
  Other variants to do not support any kind of displacements for
  FPU move insns.  */
@@ -3194,15 +3190,24 @@
   return mov_insn_sz  0 ? (mov_insn_sz - 1) : 0;
 }
 
+/* Return the displacement value of a displacement address.  */
+
+static inline HOST_WIDE_INT
+disp_addr_displacement (rtx x)
+{
+  gcc_assert (satisfies_constraint_Sdd (x));
+  return INTVAL (XEXP (XEXP (x, 0), 1));
+}
+
 /* Compute the cost of an address.  */
 
 static int
 sh_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
 {
   /* 'reg + disp' addressing.  */
-  if (DISP_ADDR_P (x))
+  if (satisfies_constraint_Sdd (x))
 {
-  const HOST_WIDE_INT offset = 

Re: [SH] PR 50751 - rework displacement calculations

2012-03-27 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch generalizes the move insn displacement calculations a
 little bit.  Before, the same address rebasing code was present in
 sh_legitimize_address as well as sh_legitimize_reload_address.  I've
 pulled those out into a separate function as a preparation step for
 adding HImode displacement addressing support.
 
 Tested by doing 'make all' (c,c++), compiling newlib and by making sure
 that for the following variants the CSiBE set code size did not change:
 
 -m2a-single -mb -O2, -m2a-single -mb -Os,
 -m4a-single -mb -O2, -m4a-single -mb -Os,
 -m4a-single -ml -O2, -m4a-single -ml -Os,
 -m4-single -mb -O2, -m4-single -mb -Os,
 -m4-single -ml -O2, -m4-single -ml -Os
 
 I'm now also running the usual reg tests on sh-sim, just in case.
 Other than that, OK?

OK with removing the first hunk

 @@ -9663,10 +9663,12 @@
  {
if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
  return true;
 +
else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
   ! TARGET_SHMEDIA
   MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
  return true;
 +
else if (GET_CODE (x) == PLUS
   (mode != PSImode || reload_completed))
  {

which adds extra empty lines.

Regards,
kaz


[SH] PR 50751 - rework displacement calculations

2012-03-26 Thread Oleg Endo
Hi,

The attached patch generalizes the move insn displacement calculations a
little bit.  Before, the same address rebasing code was present in
sh_legitimize_address as well as sh_legitimize_reload_address.  I've
pulled those out into a separate function as a preparation step for
adding HImode displacement addressing support.

Tested by doing 'make all' (c,c++), compiling newlib and by making sure
that for the following variants the CSiBE set code size did not change:

-m2a-single -mb -O2, -m2a-single -mb -Os,
-m4a-single -mb -O2, -m4a-single -mb -Os,
-m4a-single -ml -O2, -m4a-single -ml -Os,
-m4-single -mb -O2, -m4-single -mb -Os,
-m4-single -ml -O2, -m4-single -ml -Os

I'm now also running the usual reg tests on sh-sim, just in case.
Other than that, OK?

Cheers,
Oleg


ChangeLog:

PR target/50751
* config/sh/sh.c (sh_legitimize_address,
sh_legitimize_reload_address):  Rearrange conditional logic.
Move displacement address calculations to ...
(sh_find_mov_disp_adjust): ... this new function.
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 185768)
+++ gcc/config/sh/sh.c	(working copy)
@@ -9663,10 +9663,12 @@
 {
   if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
 return true;
+
   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
 	! TARGET_SHMEDIA
 	MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
 return true;
+
   else if (GET_CODE (x) == PLUS
 	(mode != PSImode || reload_completed))
 {
@@ -9779,80 +9781,114 @@
   return orig;
 }
 
-/* Try machine-dependent ways of modifying an illegitimate address
-   to be legitimate.  If we find one, return the new, valid address.
-   Otherwise, return X.
+/* Given a (logical) mode size and an offset in bytes, try to find a the
+   appropriate displacement value for a mov insn.  On SH the displacements
+   are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
+   15 bytes in QImode.  To compensate this we create a new base address by
+   adding an adjustment value to it.
 
-   For the SH, if X is almost suitable for indexing, but the offset is
-   out of range, convert it into a normal form so that CSE has a chance
-   of reducing the number of address registers used.  */
+   If the originally requested offset is greater than 127 we prefer using
+   values 124..127 over 128..131 to increase opportunities to use the
+   add #imm, Rn insn.
 
+   In some cases it is possible that a requested offset might seem unaligned
+   or inappropriate for the mode size, like offset = 2 and mode size = 4.
+   This is compensated by adjusting the base address so that the effective
+   address of the displacement move insn will be aligned. 
+
+   This is not the best possible way of rebasing the base address, as it
+   does not look at other present displacement addressings around it.
+   In some cases this can create more base address adjustments than would
+   actually be necessary.  */
+
+struct disp_adjust
+{
+  rtx offset_adjust;
+  rtx mov_disp;
+  int max_mov_disp;
+};
+
+static struct disp_adjust
+sh_find_mov_disp_adjust (int mode_sz, HOST_WIDE_INT offset)
+{
+  struct disp_adjust res = { NULL_RTX, NULL_RTX, 0 };
+
+  /* The max. available mode for actual move insns is SImode.
+ Larger accesses will be split into multiple loads/stores.  */
+  const int max_mov_sz = GET_MODE_SIZE (SImode);
+
+  const int mov_insn_size = mode_sz = max_mov_sz ? max_mov_sz : mode_sz;
+  const HOST_WIDE_INT max_disp = 15 * mov_insn_size;
+  HOST_WIDE_INT align_modifier = offset  127 ? mov_insn_size : 0;
+
+  HOST_WIDE_INT offset_adjust;
+
+  /* In some cases this actually does happen and we must check for it.  */
+  if (mode_sz  1 || mode_sz  8)
+return res;
+
+  /* FIXME: HImode with displacement addressing is not supported yet.
+ Make it purposefully fail for now.  */
+  if (mov_insn_size == 2)
+return res;
+
+  /* Keeps the previous behavior for QImode displacement addressing.
+ This just decides how the offset is re-based.  Removing this special
+ case will result in slightly bigger code on average, but it's not that
+ bad actually.  */
+  if (mov_insn_size == 1)
+align_modifier = 0;
+
+  res.max_mov_disp = max_disp + mov_insn_size;
+
+  offset_adjust = ((offset + align_modifier)  ~max_disp) - align_modifier;
+
+  if (mode_sz + offset - offset_adjust = res.max_mov_disp)
+{
+  res.offset_adjust = GEN_INT (offset_adjust);
+  res.mov_disp = GEN_INT (offset - offset_adjust);
+}
+
+  return res;
+}
+
+/* Try to modify an illegitimate address and make it legitimate.
+   If we find one, return the new, valid address.
+   Otherwise, return the original address.  */
+
 static rtx
 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
 {
   if (flag_pic)
 x = legitimize_pic_address (oldx, mode, NULL_RTX);
 
-  if (GET_CODE (x) == PLUS
-   (GET_MODE_SIZE (mode) == 4
-	  

[SH] PR 50751 - some test cases

2012-03-20 Thread Oleg Endo
Hi,

The attached patch adds some test cases for PR 50751 to check whether
mov.b insns are generated.

Tested on sh-sim with 
make check-gcc RUNTESTFLAGS=sh.exp=pr50751* --target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a-single/-mb,-m4-single/-ml,
-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}

to confirm that the tests pass as expected.

Cheers,
Oleg

testsuite/ChangeLog:

PR target/50751
* gcc/target/sh/pr50751-1.c: New.
* gcc/target/sh/pr50751-2.c: New.
* gcc/target/sh/pr50751-3.c: New.

Index: gcc/testsuite/gcc.target/sh/pr50751-1.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-1.c	(revision 0)
@@ -0,0 +1,30 @@
+/* Check that the mov.b displacement addressing insn is generated.
+   If the insn is generated as expected, there should be no address 
+   calculations outside the mov insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5*} {  } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const char* ap, char* bp, char val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp, char val)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[15];
+  bp[4] = val;
+  bp[14] = val;
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-2.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-2.c	(revision 0)
@@ -0,0 +1,27 @@
+/* Check that the mov.b displacement addressing insn is generated and the 
+   base address is adjusted only once.  On SH2A this test is skipped because
+   there is a 4 byte mov.b insn that can handle larger displacements.  Thus
+   on SH2A the base address will not be adjusted in this case.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { -m5* -m2a* } {  } } */
+/* { dg-final { scan-assembler-times add 2 } } */
+
+void
+testfunc_00 (const char* ap, char* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp)
+{
+  bp[0] = ap[15];
+  bp[2] = ap[5];
+  bp[9] = ap[7];
+  bp[0] = ap[25];
+}
+
Index: gcc/testsuite/gcc.target/sh/pr50751-3.c
===
--- gcc/testsuite/gcc.target/sh/pr50751-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr50751-3.c	(revision 0)
@@ -0,0 +1,26 @@
+/* Check that on SH2A the 4 byte mov.b displacement insn is generated to
+   handle larger displacements.  If it is generated correctly, there should
+   be no base address adjustments outside the mov.b insns.  */
+/* { dg-do compile { target sh*-*-* } } */
+/* { dg-options -O1 } */
+/* { dg-skip-if  { sh*-*-* } { * } { -m2a* } } */
+/* { dg-final { scan-assembler-not add|sub } } */
+
+void
+testfunc_00 (const char* ap, char* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+
+void
+testfunc_01 (volatile const char* ap, volatile char* bp)
+{
+  bp[100] = ap[15];
+  bp[200] = ap[50];
+  bp[900] = ap[71];
+  bp[0] = ap[25];
+}
+


Re: [SH] PR 50751 - some test cases

2012-03-20 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 The attached patch adds some test cases for PR 50751 to check whether
 mov.b insns are generated.

OK.

Regards,
kaz


[SH] PR 50751 - Add QImode displacement addressing

2012-03-19 Thread Oleg Endo
Hi, 

This is an updated version of the QImode displacement addressing patch
from the PR that applies to rev 185405.  Tested on sh-sim with no new
failures.

The issue as a whole requires some more work and I'd like to split it in
smaller incremental changes and separate patches.

Cheers,
Oleg

ChangeLog:

PR target/50751
* config/sh/sh.h (CONST_OK_FOR_K04, CONST_OK_FOR_K12,
DISP_ADDR_P, DISP_ADDR_OFFSET): New macros.
* config/sh/sh.c (sh_address_cost): Add SH2A special case.
(sh_legitimate_index_p): Allow QImode displacements for 
non-SH2A.
(sh_legitimize_address): Add QImode displacement handling.
(sh_cannot_change_mode_class): Disallow GENERAL_REGS for SFmode 
vector subregs.
(sh_secondary_reload): Add QImode displacement handling.
* config/sh/predicates.md (movsrc_no_disp_mem_operand): New 
predicate.
* config/sh/constraints.md (K04, Snd, Sdd): New constraints.
* config/sh/sh.md (extendqisi2): Remove constraints from 
expander.
(*extendqisi2_compact): Rename to *extendqisi2_compact_reg, 
restrict to register operands only.
(*extendqisi2_compact_mem_disp, *extendqisi2_compact_snd): New 
insns.
(extendqihi2): Change insn to expander.
(*extendqihi2_compact_reg): New insn.
(movqi_i, movqi): Replace with ... 
(movqi, *movqi_reg_reg, *movqi_store_mem_disp12,
*movqi_load_mem_disp, *movqi_load_mem_disp): ... these.
Add new peepholes for QImode displacement addressing.

Index: gcc/config/sh/predicates.md
===
--- gcc/config/sh/predicates.md	(revision 185405)
+++ gcc/config/sh/predicates.md	(working copy)
@@ -418,6 +418,30 @@
   return general_operand (op, mode);
 })
 
+;; Same as movsrc_operand, but rejects displacement addressing.
+
+(define_predicate movsrc_no_disp_mem_operand
+  (match_code subreg,reg,const_int,const_double,mem,symbol_ref,label_ref,const,const_vector)
+{
+  if (!general_movsrc_operand (op, mode))
+return 0;
+
+  if ((mode == QImode || mode == HImode)
+   mode == GET_MODE (op)
+   (MEM_P (op)
+	  || (GET_CODE (op) == SUBREG  MEM_P (SUBREG_REG (op)
+{
+  rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0);
+
+  if (GET_CODE (x) == PLUS
+	   REG_P (XEXP (x, 0))
+	   CONST_INT_P (XEXP (x, 1)))
+	return 0;
+}
+
+  return 1;
+})
+
 ;; Returns 1 if OP can be a destination of a move. Same as
 ;; general_operand, but no preinc allowed.
 
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 185405)
+++ gcc/config/sh/sh.c	(working copy)
@@ -3137,6 +3137,11 @@
 sh_address_cost (rtx X,
 	 bool speed ATTRIBUTE_UNUSED)
 {
+  /*  SH2A supports 4 byte displacement mov insns with higher offsets.
+  Consider those as more expensive than 2 byte insns.  */
+  if (DISP_ADDR_P (X)  GET_MODE (X) == QImode)
+return DISP_ADDR_OFFSET (X)  16 ? 0 : 1;
+
   return (GET_CODE (X) == PLUS
 	   ! CONSTANT_P (XEXP (X, 1))
 	   ! TARGET_SHMEDIA ? 1 : 0);
@@ -9606,11 +9611,13 @@
 
   if (TARGET_SH2A)
 	{
-	  if (GET_MODE_SIZE (mode) == 1
-		 (unsigned) INTVAL (op)  4096)
+	  if (mode == QImode  (unsigned) INTVAL (op)  4096)
 	return true;
 	}
 
+  if (mode == QImode  (unsigned) INTVAL (op)  16)
+	return true;
+
   if ((GET_MODE_SIZE (mode) == 4
 	(unsigned) INTVAL (op)  64
 	!(INTVAL (op)  3)
@@ -9816,6 +9823,25 @@
 	}
 }
 
+  /* This could be generalized for SImode, HImode, QImode displacement
+ addressing.  */
+  if (mode == QImode  GET_CODE (x) == PLUS
+   BASE_REGISTER_RTX_P (XEXP (x, 0))  CONST_INT_P (XEXP (x, 1)))
+{
+  rtx index_rtx = XEXP (x, 1);
+  HOST_WIDE_INT offset = INTVAL (index_rtx);
+  HOST_WIDE_INT offset_base = offset  ~15;
+
+  if (offset - offset_base = 16)
+	{
+	  rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
+			  GEN_INT (offset_base), NULL_RTX, 0,
+			  OPTAB_LIB_WIDEN);
+
+	  return gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));
+	}
+}
+
   return x;
 }
 
@@ -11444,8 +11470,13 @@
 {
   /* We want to enable the use of SUBREGs as a means to
  VEC_SELECT a single element of a vector.  */
+
+  /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
+ This can be problematic when SFmode vector subregs need to be accessed
+ on the stack with displacement addressing, as it happens with -O0.
+ Thus we disallow the mode change for -O0.  */
   if (to == SFmode  VECTOR_MODE_P (from)  GET_MODE_INNER (from) == SFmode)
-return (reg_classes_intersect_p (GENERAL_REGS, rclass));
+return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
 
   if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
 {
@@ -11460,7 +11491,7 @@
 	return reg_classes_intersect_p (DF_HI_REGS, rclass);
 	}
 

Re: [SH] PR 50751 - Add QImode displacement addressing

2012-03-19 Thread Kaz Kojima
Oleg Endo oleg.e...@t-online.de wrote:
 This is an updated version of the QImode displacement addressing patch
 from the PR that applies to rev 185405.  Tested on sh-sim with no new
 failures.
 
 The issue as a whole requires some more work and I'd like to split it in
 smaller incremental changes and separate patches.

Either way, the patch is OK.

Regards,
kaz