On penrynn, from 255 to 174c. Unrolling yields no gain.
---
 libavcodec/x86/sbrdsp.asm    |   21 +++++++++++++++++++++
 libavcodec/x86/sbrdsp_init.c |    2 ++
 2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 11a6faf..2b90100 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -24,6 +24,8 @@
 SECTION_RODATA
 ; mask equivalent for multiply by -1.0 1.0
 ps_mask         times 2 dd 1<<31, 0
+ps_mask2        times 2 dd 0, 1<<31
+ps_neg          times 4 dd 1<<31
 
 SECTION_TEXT
 
@@ -203,3 +205,22 @@ cglobal sbr_sum64x5, 1,2,4,z
   cmp     zq, r1q
   jne  .loop
   REP_RET
+
+cglobal sbr_qmf_post_shuffle, 2,3,3,W,z
+  lea       r2q, [zq + (64-4)*4]
+.loop:
+  mova       m0, [r2q]
+  mova       m1, [zq ]
+  xorps      m0, [ps_neg]
+  shufps     m0, m0, 0x1B
+  mova       m2, m0
+  unpcklps   m0, m1
+  unpckhps   m2, m1
+  mova  [Wq +  0], m0
+  mova  [Wq + 16], m2
+  add        Wq, 32
+  sub       r2q, 16
+  add        zq, 16
+  cmp        zq, r2q
+  jl      .loop
+  REP_RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index 108a681..3f6dd97 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -31,6 +31,7 @@ void ff_sbr_hf_gen_sse(float (*X_high)[2], const float 
(*X_low)[2],
                        const float alpha0[2], const float alpha1[2],
                        float bw, int start, int end);
 void ff_sbr_sum64x5_sse(float *z);
+void ff_sbr_qmf_post_shuffle_sse(float W[32][2], const float *z);
 
 void ff_sbrdsp_init_x86(SBRDSPContext *s)
 {
@@ -41,5 +42,6 @@ void ff_sbrdsp_init_x86(SBRDSPContext *s)
         s->hf_g_filt  = ff_sbr_hf_g_filt_sse;
         s->hf_gen     = ff_sbr_hf_gen_sse;
         s->sum64x5    = ff_sbr_sum64x5_sse;
+        s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
     }
 }
-- 
1.7.7.msysgit.0

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to