On Sun, 2 Dec 2012, Justin Ruggles wrote: > +; NOTE: This is not bit-identical with the C version because it clips to > +; [-INT_MAX, INT_MAX] instead of [INT_MIN, INT_MAX] > + > +INIT_XMM ssse3
INIT_XMM ssse3, atom > +cglobal scale_samples_s32, 4,4,8, dst, src, len, volume > + movd m4, volumem > + pshufd m4, m4, 0 > + mova m5, [pq_128] > + pxor m6, m6 > + lea lenq, [lend*4-mmsize] > +.loop: > + ; src[i] = av_clipl_int32((src[i] * volume + 128) >> 8); > + mova m7, [srcq+lenq] > + pabsd m3, m7 > + pshufd m0, m3, q0100 > + pshufd m1, m3, q0302 > + pmuludq m0, m4 > + pmuludq m1, m4 > + paddq m0, m5 > + paddq m1, m5 > + psrlq m0, 7 > + psrlq m1, 7 > + shufps m2, m0, m1, q3131 > + shufps m0, m0, m1, q2020 > + pcmpgtd m2, m6 > + por m0, m2 > + psrld m0, 1 > + psignd m0, m7 > + mova [dstq+lenq], m0 > + sub lenq, mmsize > + jge .loop > + REP_RET > --- a/libavutil/x86/x86inc.asm > +++ b/libavutil/x86/x86inc.asm > @@ -957,6 +957,7 @@ AVX_INSTR cmpsd, 1, 0, 0 > AVX_INSTR cmpss, 1, 0, 0 > AVX_INSTR cvtdq2ps, 1, 0, 0 > AVX_INSTR cvtps2dq, 1, 0, 0 > +AVX_INSTR cvtpd2dq, 1, 0, 0 > AVX_INSTR divpd, 1, 0, 0 > AVX_INSTR divps, 1, 0, 0 > AVX_INSTR divsd, 1, 0, 0 Alphabetical order --Loren Merritt _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
