On Sun, 2 Dec 2012, Justin Ruggles wrote:

> +; NOTE: This is not bit-identical with the C version because it clips to
> +;       [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX]
> +
> +INIT_XMM ssse3

INIT_XMM ssse3, atom

> +cglobal scale_samples_s32, 4,4,8, dst, src, len, volume
> +    movd        m4, volumem
> +    pshufd      m4, m4, 0
> +    mova        m5, [pq_128]
> +    pxor        m6, m6
> +    lea       lenq, [lend*4-mmsize]
> +.loop:
> +    ; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8);
> +    mova        m7, [srcq+lenq]
> +    pabsd       m3, m7
> +    pshufd      m0, m3, q0100
> +    pshufd      m1, m3, q0302
> +    pmuludq     m0, m4
> +    pmuludq     m1, m4
> +    paddq       m0, m5
> +    paddq       m1, m5
> +    psrlq       m0, 7
> +    psrlq       m1, 7
> +    shufps      m2, m0, m1, q3131
> +    shufps      m0, m0, m1, q2020
> +    pcmpgtd     m2, m6
> +    por         m0, m2
> +    psrld       m0, 1
> +    psignd      m0, m7
> +    mova  [dstq+lenq], m0
> +    sub       lenq, mmsize
> +    jge .loop
> +    REP_RET

> --- a/libavutil/x86/x86inc.asm
> +++ b/libavutil/x86/x86inc.asm
> @@ -957,6 +957,7 @@ AVX_INSTR cmpsd, 1, 0, 0
>  AVX_INSTR cmpss, 1, 0, 0
>  AVX_INSTR cvtdq2ps, 1, 0, 0
>  AVX_INSTR cvtps2dq, 1, 0, 0
> +AVX_INSTR cvtpd2dq, 1, 0, 0
>  AVX_INSTR divpd, 1, 0, 0
>  AVX_INSTR divps, 1, 0, 0
>  AVX_INSTR divsd, 1, 0, 0

Alphabetical order

--Loren Merritt
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to