fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Prathamesh Kulkarni
This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
Bootstrapped, regtested on x86_64-unknown-linux-gnu
Ok for trunk ?

[gcc]
* gcc/builtins.c (fold_builtin_strchr):  returns tree for s1 + strlen (s1)
if TREE_SIDE_EFFECTS (s1) is false and integer_zerop (s2) is true.

[gcc/testsuite]
* gcc.dg/strlenopt-5.c: modified dg-final
scan-tree-dump-times for strchr and strlen
* gcc.dg/strlenopt-7.c: Likewise
* gcc.dg/strlenopt-9.c: Likewise
* gcc.dg/strlenopt-20.c: Likewise
* gcc.dg/strlenopt-21.c: Likewise
* gcc.dg/strlenopt-22.c: Likewise
* gcc.dg/strlenopt-22g.c: Likewise
* gcc.dg/strlenopt-26.c: Likewise
* gcc.c-torture/execute/builtins/strchr.c: added test case

Thanks and Regards,
Prathamesh
Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 207700)
+++ gcc/builtins.c	(working copy)
@@ -11587,6 +11587,17 @@ fold_builtin_strchr (location_t loc, tre
 	  tem = fold_build_pointer_plus_hwi_loc (loc, s1, r - p1);
 	  return fold_convert_loc (loc, type, tem);
 	}
+  else if (integer_zerop (s2)  !TREE_SIDE_EFFECTS (s1))  // simplify strchr (s1, '\0') to s1 + strlen (s1)
+  {
+tree fn = builtin_decl_implicit (BUILT_IN_STRLEN);
+
+if (!fn)
+  return NULL_TREE;
+
+tree call_expr = build_call_expr_loc (loc, fn, 1, s1);
+tree ptr_plus_expr = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (s1), s1, call_expr);
+return fold_convert_loc (loc, type, ptr_plus_expr);
+  }
   return NULL_TREE;
 }
 }
Index: gcc/testsuite/gcc.dg/strlenopt-20.c
===
--- gcc/testsuite/gcc.dg/strlenopt-20.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-20.c	(working copy)
@@ -48,7 +48,7 @@ __attribute__((noinline, noclone)) char
 fn3 (char *p)
 {
   char *c;
-  /* The strcpy call can be optimized into memcpy, strchr needs to stay,
+  /* The strcpy call can be optimized into memcpy, strchr (p, '\0') is converted to p + strlen (p), 
  strcat is optimized into memcpy.  */
   strcpy (p, abc);
   p[3] = 'd';
@@ -86,10 +86,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times strlen \\( 1 strlen } } */
+/* { dg-final { scan-tree-dump-times strlen \\( 2 strlen } } */
 /* { dg-final { scan-tree-dump-times memcpy \\( 4 strlen } } */
 /* { dg-final { scan-tree-dump-times strcpy \\( 0 strlen } } */
 /* { dg-final { scan-tree-dump-times strcat \\( 0 strlen } } */
-/* { dg-final { scan-tree-dump-times strchr \\( 1 strlen } } */
+/* { dg-final { scan-tree-dump-times strchr \\( 0 strlen } } */
 /* { dg-final { scan-tree-dump-times stpcpy \\( 0 strlen } } */
 /* { dg-final { cleanup-tree-dump strlen } } */
Index: gcc/testsuite/gcc.dg/strlenopt-21.c
===
--- gcc/testsuite/gcc.dg/strlenopt-21.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-21.c	(working copy)
@@ -10,6 +10,7 @@ foo (char *x, int n)
 {
   int i;
   char a[64];
+  /* strlen (x, '\0') is converted to x + strlen (x) */
   char *p = strchr (x, '\0');
   struct S s;
   /* strcpy here is optimized into memcpy, length computed as p - x + 1.  */
@@ -57,10 +58,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times strlen \\( 1 strlen } } */
+/* { dg-final { scan-tree-dump-times strlen \\( 2 strlen } } */
 /* { dg-final { scan-tree-dump-times memcpy \\( 3 strlen } } */
 /* { dg-final { scan-tree-dump-times strcpy \\( 0 strlen } } */
 /* { dg-final { scan-tree-dump-times strcat \\( 0 strlen } } */
-/* { dg-final { scan-tree-dump-times strchr \\( 1 strlen } } */
+/* { dg-final { scan-tree-dump-times strchr \\( 0 strlen } } */
 /* { dg-final { scan-tree-dump-times stpcpy \\( 0 strlen } } */
 /* { dg-final { cleanup-tree-dump strlen } } */
Index: gcc/testsuite/gcc.dg/strlenopt-22.c
===
--- gcc/testsuite/gcc.dg/strlenopt-22.c	(revision 207700)
+++ gcc/testsuite/gcc.dg/strlenopt-22.c	(working copy)
@@ -9,7 +9,7 @@ bar (char *p, char *q)
   size_t l1, l2, l3;
   char *r = strchr (p, '\0');
   strcpy (r, abcde);
-  char *s = strchr (r, '\0');
+  char *s = strchr (r, '\0');  /* strchr (r, '\0') is converted to r + strlen (r) */
   strcpy (s, q);
   l1 = strlen (p);
   l2 = strlen (r);
@@ -31,10 +31,10 @@ main ()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times strlen \\( 3 strlen } } */
+/* { dg-final { scan-tree-dump-times strlen \\( 4 strlen } } */
 /* { dg-final { scan-tree-dump-times memcpy \\( 1 strlen } } */
 /* { dg-final { scan-tree-dump-times strcpy \\( 1 strlen } } */
 /* { dg-final { scan-tree-dump-times strcat \\( 0 strlen } } */
-/* { dg-final { scan-tree-dump-times strchr \\( 1 strlen } } */
-/* { dg-final { scan-tree-dump-times stpcpy \\( 0 strlen } } *
+/* { dg-final { scan-tree-dump-times strchr \\( 0 strlen } } */
+/* { dg-final { scan-tree-dump-times stpcpy \\( 0 strlen } } */
 /* { dg-final { cleanup-tree-dump strlen } } */
Index: 

Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Jakub Jelinek
On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
 This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
 Bootstrapped, regtested on x86_64-unknown-linux-gnu
 Ok for trunk ?

Why do you think it is a good idea?  It is often very much the opposite.

Jakub


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Prathamesh Kulkarni
On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
 This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
 Bootstrapped, regtested on x86_64-unknown-linux-gnu
 Ok for trunk ?

 Why do you think it is a good idea?  It is often very much the opposite.
I maybe completely wrong, but since  strchr(p, 0), matches each
character of p with c until '\0' is found, I thought it would be
faster to call strlen, since strlen would just skip over characters upto '\0'.
Also, I saw this committed in llvm trunk recently, and thought it
might be a good idea:
http://llvm-reviews.chandlerc.com/rL200736


 Jakub


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Richard Biener
On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
bilbotheelffri...@gmail.com wrote:
 On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek ja...@redhat.com wrote:
 On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
 This patch folds strchr (e, 0) to e + strlen (e), if e has no side-effects.
 Bootstrapped, regtested on x86_64-unknown-linux-gnu
 Ok for trunk ?

 Why do you think it is a good idea?  It is often very much the opposite.
 I maybe completely wrong, but since  strchr(p, 0), matches each
 character of p with c until '\0' is found, I thought it would be
 faster to call strlen, since strlen would just skip over characters upto '\0'.
 Also, I saw this committed in llvm trunk recently, and thought it
 might be a good idea:
 http://llvm-reviews.chandlerc.com/rL200736

If it ends up being a good idea then please add this transform to
tree-ssa-strlen.c,
not to GENERIC builtins folding.

Richard.


 Jakub


RE: [PATCH] Fix Cilk+ ICEs in the alias oracle

2014-02-15 Thread Richard Biener
On Fri, 14 Feb 2014, Iyer, Balaji V wrote:

 
 
  -Original Message-
  From: Jeff Law [mailto:l...@redhat.com]
  Sent: Friday, February 14, 2014 12:34 PM
  To: Richard Biener; gcc-patches@gcc.gnu.org
  Cc: Iyer, Balaji V
  Subject: Re: [PATCH] Fix Cilk+ ICEs in the alias oracle
  
  On 02/13/14 05:47, Richard Biener wrote:
   On Thu, 13 Feb 2014, Richard Biener wrote:
  
  
   Cilk+ builds INDIRECT_REFs when expanding builtins (oops) and thus
   those can leak into MEM_EXRs which will lead to ICEs later.
   The following patch properly builds a MEM_REF instead.  Grepping for
   INDIRECT_REF I found another suspicious use (just removed, it cannot
   have triggered and it looks bogus) and the use of a langhook instead
   of proper GIMPLE interfaces (function also used during expansion).
  
   Bootstrap / testing in progress together with some other stuff.
  
   Ok?
  
   Btw, this exposes that Cilk+ is LTO-ignorant - it doesn't properly
   register its global trees (bah, more global trees...).  So the
   types_compatible_p call ICEs.  Trying to process them in
   lto/lto.c:read_cgraph_and_symbols doesn't seem to work though.
  
   So I'm opting to remove the assert and leave fixing LTO for somebody
   who cares about Cilk+.
  
   Simpifies the patch as follows, bootstrapped  tested on
   x86_64-unknown-linux-gnu.
  
   Richard.
  
   2014-02-13  Richard Biener  rguent...@suse.de
  
 * cilk-common.c (cilk_arrow): Build a MEM_REF, not an
  INDIRECT_REF.
 (get_frame_arg): Drop the assert with langhook
  types_compatible_p.
 Do not strip INDIRECT_REFs.
  FWIW, I see a recurring issue here.  Specifically I'm regularly seeing
  cases where submissions are not playing well with LTO.   Speaking
  strictly for myself, I'm not LTO-aware enough to spot them in patches as 
  they
  fly by.
 
 I thought I had handled LTO correctly. I apologize if I made a mistake. 
 I assure you that it was not deliberate. I even had my tests use -flto 
 flags to make sure it is going through it correctly...

By using the langhook types_compatible_p you by-passed the failure
on LTO (because that langhook is not implemented there).

As it's only builtins expansion the mismatches don't really matter.

Richard.


Re: [Patch, fortran] PR 59599 ICE on intrinsic ichar

2014-02-15 Thread Richard Biener
On Fri, 14 Feb 2014, Mikael Morin wrote:

 Hello,
 
 this bug is not a regression, but the patch shouldn't wreck the compiler
 too much on the other hand.
 The problem is a wrong number of arguments while generating code for the
 ichar intrinsic.  The correct number is 2 without the kind argument and
 3 with it.
 The attached patch uses the gfc_intrinsic_argument_list_length function
 like it's done for other intrinsics.
 
 Regression tested on x86_64-unknown-linux-gnu. OK for trunk/4.8/4.7?

Generally wrong-code non-regression fixes for Fortran are fine
if Fortran maintainers think so.

Richard.


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Ondřej Bílka
On Sat, Feb 15, 2014 at 10:50:02AM +0100, Richard Biener wrote:
 On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
 bilbotheelffri...@gmail.com wrote:
  On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek ja...@redhat.com wrote:
  On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
  This patch folds strchr (e, 0) to e + strlen (e), if e has no 
  side-effects.
  Bootstrapped, regtested on x86_64-unknown-linux-gnu
  Ok for trunk ?
 
  Why do you think it is a good idea?  It is often very much the opposite.
  I maybe completely wrong, but since  strchr(p, 0), matches each
  character of p with c until '\0' is found, I thought it would be
  faster to call strlen, since strlen would just skip over characters upto 
  '\0'.
  Also, I saw this committed in llvm trunk recently, and thought it
  might be a good idea:
  http://llvm-reviews.chandlerc.com/rL200736
 
 If it ends up being a good idea then please add this transform to
 tree-ssa-strlen.c,
 not to GENERIC builtins folding.
 
No, if that transform is good idea it should be added to headers. Also
this will not match anything as with O1 and higher it already is
transformed to rawmemchr(x, 0) as in program below.

#include string.h
char *
foo (char *x)
{
   return strchr (x, 0);
}



Re: [Patch, fortran] PR 59599 ICE on intrinsic ichar

2014-02-15 Thread Mikael Morin
Le 15/02/2014 11:04, Richard Biener a écrit :
 On Fri, 14 Feb 2014, Mikael Morin wrote:
 
 Hello,

 this bug is not a regression, but the patch shouldn't wreck the compiler
 too much on the other hand.
 The problem is a wrong number of arguments while generating code for the
 ichar intrinsic.  The correct number is 2 without the kind argument and
 3 with it.
 The attached patch uses the gfc_intrinsic_argument_list_length function
 like it's done for other intrinsics.

 Regression tested on x86_64-unknown-linux-gnu. OK for trunk/4.8/4.7?
 
 Generally wrong-code non-regression fixes for Fortran are fine
 if Fortran maintainers think so.
 
Technically it's an ICE, not a wrong-code; but I bet it would be a
wrong-code if the scalarizer didn't notice a problem.
I'll proceed with the committal. Thanks for the review(s).

Mikael


Re: fold strchr (e, 0) to e + strlen (e)

2014-02-15 Thread Jakub Jelinek
On Sat, Feb 15, 2014 at 12:15:22PM +0100, Ondřej Bílka wrote:
 On Sat, Feb 15, 2014 at 10:50:02AM +0100, Richard Biener wrote:
  On Sat, Feb 15, 2014 at 10:45 AM, Prathamesh Kulkarni
  bilbotheelffri...@gmail.com wrote:
   On Sat, Feb 15, 2014 at 2:28 PM, Jakub Jelinek ja...@redhat.com wrote:
   On Sat, Feb 15, 2014 at 02:23:24PM +0530, Prathamesh Kulkarni wrote:
   This patch folds strchr (e, 0) to e + strlen (e), if e has no 
   side-effects.
   Bootstrapped, regtested on x86_64-unknown-linux-gnu
   Ok for trunk ?
  
   Why do you think it is a good idea?  It is often very much the opposite.
   I maybe completely wrong, but since  strchr(p, 0), matches each
   character of p with c until '\0' is found, I thought it would be
   faster to call strlen, since strlen would just skip over characters upto 
   '\0'.
   Also, I saw this committed in llvm trunk recently, and thought it
   might be a good idea:
   http://llvm-reviews.chandlerc.com/rL200736
  
  If it ends up being a good idea then please add this transform to
  tree-ssa-strlen.c,
  not to GENERIC builtins folding.
  
 No, if that transform is good idea it should be added to headers. Also
 this will not match anything as with O1 and higher it already is
 transformed to rawmemchr(x, 0) as in program below.
 
 #include string.h
 char *
 foo (char *x)
 {
return strchr (x, 0);
 }

Of course rawmemchr(x, 0) is much better implementation of strchr(x, 0) than
x + strlen(x).  The reason why gcc doesn't transform it that way is because
rawmemchr is just a GNU extension, not part of C or POSIX.

Jakub


Re: [PATCH RFC] MIPS add support for MIPS SIMD ARCHITECTURE V1.07

2014-02-15 Thread Richard Sandiford
Graham Stott graham.st...@imgtec.com writes:
 +(define_constraint YC
 +  @internal
 +   A constant vector with each element is a unsigned bitimm-bit integer with 
 only one bit set

Maybe:

  A replicated vector constant in which the replicated value has a single
  bit set

Likewise YZ and clear bits.

 +(define_constraint Y5
 +  @internal
 +   A constant vector with each element is a signed 6-bit integer
 +  (and (match_code const_vector)
 +   (match_test mips_const_vector_any_int_p (op, mode, -32, 31

Maybe use Usv6.

  A replicated vector constant in which the replicated value is a signed
  6-bit number.

 +(define_constraint Y6
 +  @internal
 +   A constant vector with each element a unsigned 6-bit integer
 +  (and (match_code const_vector)
 +   (match_test mips_const_vector_any_int_p (op, mode, 0, 31

Similarly here for Uuv6.  Upper bound should be 63 for a 6-bit integer.
Would be good to have a test for that.

 +(define_constraint Y8
 +  @internal
 +   A constant vector with each element a unsigned 0-bit integer
 +  (and (match_code const_vector)
 +   (match_test mips_const_vector_any_int_p (op, mode, 0, 255

Similarly here for Uuv8.

 @@ -127,3 +351,4 @@
  DEF_MIPS_FTYPE (1, (VOID, USI))
  DEF_MIPS_FTYPE (2, (VOID, V2HI, V2HI))
  DEF_MIPS_FTYPE (2, (VOID, V4QI, V4QI))
 +

No newline here.

 +(define_c_enum unspec [
 +UNSPEC_MSA_ADDVI
 +UNSPEC_MSA_ANDI_B
 +UNSPEC_MSA_ASUB_S
 +  UNSPEC_MSA_ASUB_U
 +  UNSPEC_MSA_AVE_S
 +  UNSPEC_MSA_AVE_U

Formatting (second is right).

 +(define_mode_iterator MODE128_2 [V2DF V4SF V2DI V4SI V8HI V16QI])
 +(define_mode_iterator IMODE128 [V2DI V4SI V8HI V16QI])

These two aren't used and I can't see where MODE128_2 would come in useful.
Let's drop these for now.

 +(define_mode_attr VHALFMODE 
 +  [(V8HI V16QI)
 +   (V4SI V8HI)
 +   (V2SI V4SI)
 +   (V2DI V4SI)
 +   (V2DF V4SF)])
 +
 +;; This attribute gives the integer mode for selection mask in vec_perm.
 +;; vcond also uses MSA_I for operand 0, 1, and 2.
 +(define_mode_attr MSA_I
 +  [(V2DF V2DI)
 +   (V4SF V4SI)
 +   (V2DI V2DI)
 +   (V4SI V4SI)
 +   (V8HI V8HI)
 +   (V16QI V16QI)])
 +
 +;; The attribute give the integer vector mode with same size
 +(define_mode_attr MODE_I
 +  [(V2DF V2DI)
 +   (V4SF V4SI)
 +   (V2DI V2DI)
 +   (V4SI V4SI)
 +   (V8HI V8HI)
 +   (V16QI V16QI)])

Let's call this VIMODE for consistency with both IMODE in mips.md
and the HALFMODE/VHALFMODE pair.  VIMODE can be used in place of MSA_I;
no need for both.

 +;; This attribute qives suffix gives the mode of the result for copy_s_b, 
 copy_u_b etc.
 +(define_mode_attr RES
 +  [(V2DF DF)
 +   (V4SF SF)
 +   (V2DI DI)
 +   (V4SI SI)
 +   (V8HI SI)
 +   (V16QI SI)])

Why we do need to promote sub-SI values to SI for this?  I'd prefer
that we use the correct mode (i.e. UNITMODE) instead.

 +;; This is used in msa_cast* to output mov.s or mov.d.
 +(define_mode_attr msafmt2
 +  [(V2DF d)
 +   (V4SF s)])

Not really an MSA format.  Maybe unitfmt?

 +;; This attribute qives define_insn suffix for MSA instructions 
 +;; with need distinction between integer and floating point.
 +(define_mode_attr msafmt3
 +  [(V2DF d_f)
 +   (V4SF w_f)
 +   (V2DI d)
 +   (V4SI w)
 +   (V8HI h)
 +   (V16QI b)])

msafmt_f might be more mnemonic than msafmt3.

 +;; The maximum index inside a vector.
 +(define_mode_attr max_elem_index
 +  [(V2DF 1)
 +   (V4SF 3)
 +   (V2DI 1)
 +   (V4SI 3)
 +   (V8HI 7)
 +   (V16QI 15)])

In the asserts where this is used it could just be
GET_MODE_NUNITS (MODEmode)

 +;; This is used to form an immediate operand constraint 
 +;; using const_imm_operand.
 +(define_mode_attr imm
 +  [(V2DF 0_or_1)
 +   (V4SF 0_to_3)
 +   (V2DI 0_or_1)
 +   (V4SI 0_to_3)
 +   (V8HI uimm3)
 +   (V16QI uimm4)])

Maybe indeximm rather than imm, for consistency with bitimm?

 +;; This attribute is used to form the MODE for reg_or_0_operand
 +;; constraint.
 +(define_mode_attr REGOR0
 +  [(V2DF DF)
 +   (V4SF SF)
 +   (V2DI DI)
 +   (V4SI SI)
 +   (V8HI SI)
 +   (V16QI SI)])

Same as RES, and same comment.

 +(define_expand vec_extractmode
 +  [(match_operand:UNITMODE 0 register_operand)
 +   (match_operand:IMSA 1 register_operand)
 +   (match_operand 2 const_int_operand)]
 +  ISA_HAS_MSA
 +{
 +  gcc_assert (UINTVAL (operands[2]) = max_elem_index);
 +  enum machine_mode mode0 = GET_MODE (operands[0]);
 +  if (mode0 == QImode || mode0 == HImode)
 +emit_move_insn (operands[0],
 + gen_lowpart (mode0, gen_reg_rtx (SImode)));
 +  else
 +emit_insn (gen_msa_copy_s_msafmt (operands[0], operands[1], 
 operands[2]));
 +  DONE;
 +})

The QImode/HImode case isn't right -- the source of the move is an
uninitialised register.  Please make sure there's a testcase for this.

You should be able to use UNITMODEmode instead of mode0.

 +(define_expand vec_extractmode
 +  [(match_operand:UNITMODE 0 register_operand)
 +   (match_operand:FMSA 1 register_operand)
 +   (match_operand 2 const_int_operand)]
 +  ISA_HAS_MSA
 +{
 +  rtx temp;

[PATCH] Fixing SEH exceptions for languages != C++

2014-02-15 Thread Jonathan Schleifer
Hi!

The following patch fixes a bug in SEH exception handling that made it
crash with ObjC (and most likely other languages as well). The problem
is that the SEH exception handler always passes the unwind exception as
4th parameter to RtlUnwindEx, which RtlUnwindEx then later passes to
the landing pad as argument. This works for C++, as libstdc++ sets data
register 0 to the unwind exception anyway, but it crashes for ObjC as
the landing pad expects the thrown object to be in data register 0. The
solution is of course to fix the SEH wrapper to get the value that was
set for data register 0 using _Unwind_SetGR and pass that to
RtlUnwindEx, so that later on the correct value is passed to the
landing pad.

The patch was tested for C++ and ObjC, the latter with both, the GNU
libobjc runtime and my own. (With -O0, it still crashed and complained
about invalid frames, but that is another issue.)

I don't think this patch needs transfer of copyright, as it is small
enoguh, so would it be possible to please include that in GCC 4.8.3?
This would finally make ObjC usable on Windows again - and most likely
other languages using exceptions as well.

Thanks!

PS: Please CC me as I'm not on the list!

-- 
Jonathan
--- libgcc/unwind-seh.c.orig	2014-02-15 17:01:59.012396423 +0100
+++ libgcc/unwind-seh.c	2014-02-15 17:03:54.064755427 +0100
@@ -313,8 +313,9 @@
 	  ms_exc-ExceptionInformation[3] = gcc_context.reg[1];
 
 	  /* Begin phase 2.  Perform the unwinding.  */
-	  RtlUnwindEx (this_frame, gcc_context.ra, ms_exc, gcc_exc,
-		   ms_orig_context, ms_disp-HistoryTable);
+	  RtlUnwindEx (this_frame, gcc_context.ra, ms_exc,
+		   (PVOID)gcc_context.reg[0], ms_orig_context,
+		   ms_disp-HistoryTable);
 	}
 
   /* In _Unwind_RaiseException we return _URC_FATAL_PHASE1_ERROR.  */


Re: [PATCH][ARM] add HFmode to arm_preferred_simd_mode

2014-02-15 Thread Kugan


On 14/02/14 14:34, Kugan wrote:
 
 
 On 14/02/14 11:24, Andrew Pinski wrote:
 On Thu, Feb 13, 2014 at 4:15 PM, Kugan
 kugan.vivekanandara...@linaro.org wrote:
 Hi,

 Is there any reason why HFmode is not there in arm_preferred_simd_mode?
 NEON does support this.

 Most likely because there is no support for Half-float in the vectorizer.

 
 I can see that get_vectype_for_scalar_type_and_size failing while
 building vector type (with build_vector_type) for Half-float. I guess we
 should add support there first.

Just for records, I was wrong here. ARM NEON that supports half float
does not support vector arithmetic operations in half float; it supports
vector conversions to float and operate on 32bit. Therefore HFmode is
not really a preferred mode.

And also, gcc vectorizer can support half floats. It is not valid in
this case and hence ARM backend does not provide required patterns and
hooks.

Thanks,
Kugan