On Fri, 30 Nov 2012, Christophe Gisquet wrote:

> 497 to 253 cycles under Win64.

cpu is more relevant than os.

> +; r0q=Y   r1q=s_m   r2q=q_filt   r3q=noise  r4q=max_m
> +cglobal hf_apply_noise_main

You can invoke DEFINE_ARGS even if not generating a prologue.

> +  dec       r3q
> +  shl       r4q, 2
> +  lea       r0q, [r0q + 2*r4q]
> +  add       r1q, r4q
> +  add       r2q, r4q
> +  shl       r3q, 3
> +  xorps      m5, m5
> +  neg       r4q
> +.loop:
> +  add       r3q, 16
> +  and       r3q, 0x1ff<<3
> +  movh       m1, [r2q + r4q]
> +  movu       m3, [r3q + sbr_noise_table]
> +  movh       m2, [r2q + r4q + 8]
> +  add       r3q, 16
> +  and       r3q, 0x1ff<<3
> +  movu       m4, [r3q + sbr_noise_table]
> +  unpcklps   m1, m1
> +  unpcklps   m2, m2
> +  mulps      m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
> +  mulps      m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
> +  movh       m3, [r1q + r4q]
> +  movh       m4, [r1q + r4q + 8]

Can these be a single aligned load?

> +  unpcklps   m3, m3
> +  unpcklps   m4, m4
> +  mova       m6, m3
> +  mova       m7, m4
> +  mulps      m3, m0 ; s_m[m] * phi_sign
> +  mulps      m4, m0 ; s_m[m] * phi_sign
> +  cmpps      m6, m5, 0 ; m1 == 0
> +  cmpps      m7, m5, 0 ; m1 == 0

You mean m7 == 0?

> +  andps      m1, m6
> +  andps      m2, m7
> +  movu       m6, [r0q + 2*r4q]
> +  movu       m7, [r0q + 2*r4q + 16]
> +  addps      m6, m1
> +  addps      m7, m2
> +  addps      m6, m3
> +  addps      m7, m4
> +  movu    [r0q + 2*r4q], m6
> +  movu    [r0q + 2*r4q + 16], m7
> +  add       r4q, 16
> +  jl      .loop
> +  ret
> +
> +; sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m,
> +;                      const float *q_filt, int noise,
> +;                      int kx, int m_max)
> +cglobal sbr_hf_apply_noise_0, 4,5,8, Y,s_m,q_filt,noise,kx,m_max
> +  mova       m0, [ps_noise0]
> +  mov       r4d, m_maxm
> +  call      hf_apply_noise_main
> +  RET

TAIL_CALL hf_apply_noise_main, 1

--Loren Merritt
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to