On 21/08/15 4:19 AM, Anton Khirnov wrote: > + > + add dstq, dststrideq > + add srcq, srcstrideq > + > +%assign i (i + 1) > +%endrep > + > + dec heightq
This and every other case should be heightd. There's no guarantee the high bits will be zero on every x86_64 target. This is the source of the crashes i was getting. > + jg .loop > + RET > +%endmacro > + > +INIT_XMM sse2 > +GET_PIXELS 4, 8, 1 > +GET_PIXELS 8, 8, 1 > +GET_PIXELS 12, 8, 3 > +GET_PIXELS 16, 8, 2 > +GET_PIXELS 24, 8, 3 > +GET_PIXELS 32, 8, 3 > +GET_PIXELS 48, 8, 3 > +GET_PIXELS 64, 8, 3 > + > +GET_PIXELS 4, 10, 1 > +GET_PIXELS 8, 10, 1 > +GET_PIXELS 12, 10, 3 > +GET_PIXELS 16, 10, 2 > +GET_PIXELS 24, 10, 3 > +GET_PIXELS 32, 10, 3 > +GET_PIXELS 48, 10, 3 > +GET_PIXELS 64, 10, 3 > + > +; hevc_qpel_h/v_<w>_8(int16_t *dst, ptrdiff_t dststride, > +; uint8_t *src, ptrdiff_t srcstride, > +; int height, int mx, int my, int *mcbuffer) > + > +; 8-bit qpel interpolation > +; %1: block width > +; %2: 0 - horizontal; 1 - vertical > +%macro QPEL_8 2 > +%if %2 > + %define postfix v > + %define mvfrac myq Same here and below the else, rename this to mvfracq and add a mvfracd. > + %define pixstride srcstrideq > + %define pixstride3 sstride3q > + %define src_m3 srcm3q > +%else > + %define postfix h > + %define mvfrac mxq > + %define pixstride 1 > + %define pixstride3 3 > + %define src_m3 (srcq - 3) > +%endif > + > +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, > src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg > +%if %2 > + and mvfrac, 0x3 > +%endif > + dec mvfrac > + shl mvfrac, 4 Use mvfracd on these three, it will clear the high bits for the mova below. > + lea coeffsregq, [hevc_qpel_coeffs8] > + mova m0, [coeffsregq + mvfrac] Then use mvfraq here. Replicate this on every function, of course. > + > +%macro PUT_WEIGHTED_PRED 3 > +%if %1 > +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, > weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, > height > +%else > +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, > offset0, dst, dststride, src0, srcstride, height > +%endif > + and heightq, 0x7fffffff You should be able to remove this after the above changes. _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel