Re: [PATCH] xtensa: add HW FPU sequences for DIV/SQRT/RECIP/RSQRT

2016-10-18 Thread Max Filippov
On Tue, Oct 18, 2016 at 11:22 AM, augustine.sterl...@gmail.com
 wrote:
> On Fri, Oct 14, 2016 at 12:14 PM, Max Filippov  wrote:
>>
>> Use new FPU instruction sequences documented in the ISA book to
>> implement __divsf3, __divdf3, __recipsf2, __recipdf2, __rsqrtsf2,
>> __rsqrtdf2 and __ieee754_sqrtf and __ieee754_sqrt.
>>
>> 2013-02-12  Ding-Kai Chen  
>> libgcc/
>> * config/xtensa/ieee754-df.S (__recipdf2, __rsqrtdf2,
>> __ieee754_sqrt): New functions.
>> (__divdf3): Add implementation with new FPU instructions under
>> #if XCHAL_HAVE_DFP_DIV.
>> * config/xtensa/ieee754-sf.S (__recipsf2, __rsqrtsf2,
>> __ieee754_sqrtf): New functions.
>> (__divsf3): Add implementation with new FPU instructions under
>> #if XCHAL_HAVE_FP_DIV.
>> * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _sqrtf, _recipsf2
>> _rsqrtsf2, _sqrt, _recipdf2 and _rsqrtdf2
>
>
> Approved, please apply.

Applied to trunk. Thank you!

-- Max


Re: [PATCH] xtensa: add HW FPU sequences for DIV/SQRT/RECIP/RSQRT

2016-10-18 Thread augustine.sterl...@gmail.com
On Fri, Oct 14, 2016 at 12:14 PM, Max Filippov  wrote:
>
> Use new FPU instruction sequences documented in the ISA book to
> implement __divsf3, __divdf3, __recipsf2, __recipdf2, __rsqrtsf2,
> __rsqrtdf2 and __ieee754_sqrtf and __ieee754_sqrt.
>
> 2013-02-12  Ding-Kai Chen  
> libgcc/
> * config/xtensa/ieee754-df.S (__recipdf2, __rsqrtdf2,
> __ieee754_sqrt): New functions.
> (__divdf3): Add implementation with new FPU instructions under
> #if XCHAL_HAVE_DFP_DIV.
> * config/xtensa/ieee754-sf.S (__recipsf2, __rsqrtsf2,
> __ieee754_sqrtf): New functions.
> (__divsf3): Add implementation with new FPU instructions under
> #if XCHAL_HAVE_FP_DIV.
> * config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _sqrtf, _recipsf2
> _rsqrtsf2, _sqrt, _recipdf2 and _rsqrtdf2


Approved, please apply.


[PATCH] xtensa: add HW FPU sequences for DIV/SQRT/RECIP/RSQRT

2016-10-14 Thread Max Filippov
Use new FPU instruction sequences documented in the ISA book to
implement __divsf3, __divdf3, __recipsf2, __recipdf2, __rsqrtsf2,
__rsqrtdf2 and __ieee754_sqrtf and __ieee754_sqrt.

2013-02-12  Ding-Kai Chen  
libgcc/
* config/xtensa/ieee754-df.S (__recipdf2, __rsqrtdf2,
__ieee754_sqrt): New functions.
(__divdf3): Add implementation with new FPU instructions under
#if XCHAL_HAVE_DFP_DIV.
* config/xtensa/ieee754-sf.S (__recipsf2, __rsqrtsf2,
__ieee754_sqrtf): New functions.
(__divsf3): Add implementation with new FPU instructions under
#if XCHAL_HAVE_FP_DIV.
* config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _sqrtf, _recipsf2
_rsqrtsf2, _sqrt, _recipdf2 and _rsqrtdf2.
---
 libgcc/config/xtensa/ieee754-df.S | 179 +-
 libgcc/config/xtensa/ieee754-sf.S | 156 -
 libgcc/config/xtensa/t-xtensa |   4 +-
 3 files changed, 336 insertions(+), 3 deletions(-)

diff --git a/libgcc/config/xtensa/ieee754-df.S 
b/libgcc/config/xtensa/ieee754-df.S
index 1d9ef46..efb3c41 100644
--- a/libgcc/config/xtensa/ieee754-df.S
+++ b/libgcc/config/xtensa/ieee754-df.S
@@ -1217,8 +1217,59 @@ __muldf3:
 
 #ifdef L_divdf3
 
-   .literal_position
/* Division */
+
+#if XCHAL_HAVE_DFP_DIV
+
+.text
+.align 4
+.global __divdf3
+.type  __divdf3, @function
+__divdf3:
+   leaf_entry  sp, 16
+
+   wfrdf1, xh, xl
+   wfrdf2, yh, yl
+
+
+   div0.d  f3, f2
+   nexp01.df4, f2
+   const.d f0, 1
+   maddn.d f0, f4, f3
+   const.d f5, 0
+   mov.d   f7, f2
+   mkdadj.df7, f1
+   maddn.d f3, f0, f3
+   maddn.d f5, f0, f0
+   nexp01.df1, f1
+   div0.d  f2, f2
+   maddn.d f3, f5, f3
+   const.d f5, 1
+   const.d f0, 0
+   neg.d   f6, f1
+   maddn.d f5, f4, f3
+   maddn.d f0, f6, f2
+   maddn.d f3, f5, f3
+   maddn.d f6, f4, f0
+   const.d f2, 1
+   maddn.d f2, f4, f3
+   maddn.d f0, f6, f3
+   neg.d   f1, f1
+   maddn.d f3, f2, f3
+   maddn.d f1, f4, f0
+   addexpm.d   f0, f7
+   addexp.df3, f7
+   divn.d  f0, f1, f3
+
+   rfr xl, f0
+   rfrdxh, f0
+
+   leaf_return
+
+#else
+
+   .literal_position
+
 __divdf3_aux:
 
/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
@@ -1537,6 +1588,8 @@ __divdf3:
movixl, 0
leaf_return
 
+#endif /* XCHAL_HAVE_DFP_DIV */
+
 #endif /* L_divdf3 */
 
 #ifdef L_cmpdf2
@@ -2388,3 +2441,127 @@ __extendsfdf2:
 #endif /* L_extendsfdf2 */
 
 
+#if XCHAL_HAVE_DFP_SQRT
+#ifdef L_sqrt
+
+.text
+.align 4
+.global __ieee754_sqrt
+.type  __ieee754_sqrt, @function
+__ieee754_sqrt:
+   leaf_entry  sp, 16
+
+   wfrdf1, xh, xl
+
+   sqrt0.d f2, f1
+   const.d f4, 0
+   maddn.d f4, f2, f2
+   nexp01.df3, f1
+   const.d f0, 3
+   addexp.df3, f0
+   maddn.d f0, f4, f3
+   nexp01.df4, f1
+   maddn.d f2, f0, f2
+   const.d f5, 0
+   maddn.d f5, f2, f3
+   const.d f0, 3
+   maddn.d f0, f5, f2
+   neg.d   f6, f4
+   maddn.d f2, f0, f2
+   const.d f0, 0
+   const.d f5, 0
+   const.d f7, 0
+   maddn.d f0, f6, f2
+   maddn.d f5, f2, f3
+   const.d f3, 3
+   maddn.d f7, f3, f2
+   maddn.d f4, f0, f0
+   maddn.d f3, f5, f2
+   neg.d   f2, f7
+   maddn.d f0, f4, f2
+   maddn.d f7, f3, f7
+   mksadj.df2, f1
+   nexp01.df1, f1
+   maddn.d f1, f0, f0
+   neg.d   f3, f7
+   addexpm.d   f0, f2
+   addexp.df3, f2
+   divn.d  f0, f1, f3
+
+   rfr xl, f0
+   rfrdxh, f0
+
+   leaf_return
+
+#endif /* L_sqrt */
+#endif /* XCHAL_HAVE_DFP_SQRT */
+
+#if XCHAL_HAVE_DFP_RECIP
+#ifdef L_recipdf2
+   /* Reciprocal */
+
+   .align  4
+   .global __recipdf2
+   .type   __recipdf2, @function
+__recipdf2:
+   leaf_entry  sp, 16
+
+   wfrdf1, xh, xl
+
+   recip0.df0, f1
+   const.d f2, 2
+   msub.d  f2, f1, f0
+   mul.d   f3, f1, f0
+   const.d f4, 2
+   mul.d   f5, f0, f2
+   msub.d  f4, f3, f2
+   const.d f2, 1
+   mul.d   f0, f5, f4
+   msub.d  f2, f1, f0
+   maddn.d