I have just missed to change the line mova coef2, [tab_coeff + 16] (I was just testing for coeffIdex 1 ) I will make it for random like mova coef2, [tab_coeff + height * 16]. Please Ignore this.
Regards, Praveen On Fri, Oct 11, 2013 at 10:20 PM, <[email protected]> wrote: > # HG changeset patch > # User Praveen Tiwari > # Date 1381510220 -19800 > # Node ID 5a9160e8b0bdc3117c2417bc29453077488efd8e > # Parent c6d89dc62e191f56f63dbcb1781a6494da50a70d > chroma 4XN block, coeffIdex insted of coeff pointer > > diff -r c6d89dc62e19 -r 5a9160e8b0bd source/common/x86/ipfilter8.asm > --- a/source/common/x86/ipfilter8.asm Fri Oct 11 01:47:53 2013 -0500 > +++ b/source/common/x86/ipfilter8.asm Fri Oct 11 22:20:20 2013 +0530 > @@ -26,107 +26,58 @@ > %include "x86inc.asm" > %include "x86util.asm" > > -%if ARCH_X86_64 == 0 > - > SECTION_RODATA 32 > -tab_leftmask: db -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 > - > tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 > - db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 > > tab_c_512: times 8 dw 512 > > +tab_coeff: db 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0 > + db -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, > 10, -2 > + db -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, > 16, -2 > + db -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, > 28, -4 > + db -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, > 36, -4 > + db -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, > 46, -6 > + db -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, > 54, -4 > + db -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, > 58, -2 > + > SECTION .text > > -%macro FILTER_H4 3 > - movu %1, [src + col - 1] > - pshufb %2, %1, Tm4 > +%macro FILTER_H4_w4 3 > + movu %1, [srcq - 1] > + pshufb %2, %1, Tm0 > pmaddubsw %2, coef2 > - pshufb %1, %1, Tm5 > - pmaddubsw %1, coef2 > phaddw %2, %1 > pmulhrsw %2, %3 > packuswb %2, %2 > %endmacro > > +%macro FILTER_H4_w4_CALL 0 > + FILTER_H4_w4 x0, x1, x2 > + > + movd [dstq], x1 > + > + add srcq, srcstrideq > + add dstq, dststrideq > +%endmacro > + > > > ;----------------------------------------------------------------------------- > -; void filterHorizontal_p_p_4(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, int width, int height, short const *coeff) > +; void interp_4tap_horiz_pp_w4(pixel *src, intptr_t srcStride, pixel > *dst, intptr_t dstStride, int height, int coeffIdx) > > > ;----------------------------------------------------------------------------- > INIT_XMM sse4 > -cglobal filterHorizontal_p_p_4, 0, 7, 8 > -%define src r0 > -%define dst r1 > -%define row r2 > -%define col r3 > -%define width r4 > -%define widthleft r5 > -%define mask_offset r6 > -%define coef2 m7 > -%define x3 m6 > -%define Tm5 m5 > -%define Tm4 m4 > -%define x2 m3 > -%define x1 m2 > -%define x0 m1 > -%define leftmask m0 > -%define tmp r0 > -%define tmp1 r1 > - > - mov tmp, r6m > - movu coef2, [tmp] > - packsswb coef2, coef2 > - pshufd coef2, coef2, 0 > +cglobal interp_4tap_horiz_pp_w4, 6, 6, 5, src, srcstride, dst, dststride, > height, coeffIdx > +%define coef2 m4 > +%define Tm0 m3 > +%define x2 m2 > +%define x1 m1 > +%define x0 m0 > > - mova x3, [tab_c_512] > + mova coef2, [tab_coeff + 16] > + mova x2, [tab_c_512] > + mova Tm0, [tab_Tm] > > - mov width, r4m > - mov widthleft, width > - and width, ~7 > - and widthleft, 7 > - mov mask_offset, widthleft > - neg mask_offset > +.loop > +FILTER_H4_w4_CALL > +dec r4d > +jnz .loop > +RET > > - movq leftmask, [tab_leftmask + (7 + mask_offset)] > - mova Tm4, [tab_Tm] > - mova Tm5, [tab_Tm + 16] > - > - mov src, r0m > - mov dst, r2m > - mov row, r5m > - > -_loop_row: > - xor col, col > - > -_loop_col: > - FILTER_H4 x0, x1, x3 > - movh [dst + col], x1 > - > - add col, 8 > - > - cmp col, width > - jl _loop_col > - > -_end_col: > - test widthleft, widthleft > - jz _next_row > - > - movq x2, [dst + col] > - FILTER_H4 x0, x1, x3 > - pblendvb x2, x2, x1, leftmask > - movh [dst + col], x2 > - > -_next_row: > - add src, r1m > - add dst, r3m > - dec row > - > - test row, row > - jz _end_row > - > - jmp _loop_row > - > -_end_row: > - > - RET > - > -%endif ; ARCH_X86_64 == 0 >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
