ohh... It will be mova coef2, [tab_coeff + coeffIdx * 16].
On Fri, Oct 11, 2013 at 11:21 PM, Praveen Tiwari < [email protected]> wrote: > I have just missed to change the line mova coef2, > [tab_coeff + 16] (I was just testing for coeffIdex 1 ) I will make it for > random like mova coef2, [tab_coeff + height * 16]. Please > Ignore this. > > Regards, > Praveen > > > On Fri, Oct 11, 2013 at 10:20 PM, <[email protected]> wrote: > >> # HG changeset patch >> # User Praveen Tiwari >> # Date 1381510220 -19800 >> # Node ID 5a9160e8b0bdc3117c2417bc29453077488efd8e >> # Parent c6d89dc62e191f56f63dbcb1781a6494da50a70d >> chroma 4XN block, coeffIdex insted of coeff pointer >> >> diff -r c6d89dc62e19 -r 5a9160e8b0bd source/common/x86/ipfilter8.asm >> --- a/source/common/x86/ipfilter8.asm Fri Oct 11 01:47:53 2013 -0500 >> +++ b/source/common/x86/ipfilter8.asm Fri Oct 11 22:20:20 2013 +0530 >> @@ -26,107 +26,58 @@ >> %include "x86inc.asm" >> %include "x86util.asm" >> >> -%if ARCH_X86_64 == 0 >> - >> SECTION_RODATA 32 >> -tab_leftmask: db -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 >> - >> tab_Tm: db 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 >> - db 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 >> >> tab_c_512: times 8 dw 512 >> >> +tab_coeff: db 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0, 0, 64, 0, 0 >> + db -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, >> 10, -2 >> + db -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, >> 16, -2 >> + db -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, >> 28, -4 >> + db -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, >> 36, -4 >> + db -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, >> 46, -6 >> + db -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, >> 54, -4 >> + db -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, >> 58, -2 >> + >> SECTION .text >> >> -%macro FILTER_H4 3 >> - movu %1, [src + col - 1] >> - pshufb %2, %1, Tm4 >> +%macro FILTER_H4_w4 3 >> + movu %1, [srcq - 1] >> + pshufb %2, %1, Tm0 >> pmaddubsw %2, coef2 >> - pshufb %1, %1, Tm5 >> - pmaddubsw %1, coef2 >> phaddw %2, %1 >> pmulhrsw %2, %3 >> packuswb %2, %2 >> %endmacro >> >> +%macro FILTER_H4_w4_CALL 0 >> + FILTER_H4_w4 x0, x1, x2 >> + >> + movd [dstq], x1 >> + >> + add srcq, srcstrideq >> + add dstq, dststrideq >> +%endmacro >> + >> >> >> ;----------------------------------------------------------------------------- >> -; void filterHorizontal_p_p_4(pixel *src, intptr_t srcStride, pixel >> *dst, intptr_t dstStride, int width, int height, short const *coeff) >> +; void interp_4tap_horiz_pp_w4(pixel *src, intptr_t srcStride, pixel >> *dst, intptr_t dstStride, int height, int coeffIdx) >> >> >> ;----------------------------------------------------------------------------- >> INIT_XMM sse4 >> -cglobal filterHorizontal_p_p_4, 0, 7, 8 >> -%define src r0 >> -%define dst r1 >> -%define row r2 >> -%define col r3 >> -%define width r4 >> -%define widthleft r5 >> -%define mask_offset r6 >> -%define coef2 m7 >> -%define x3 m6 >> -%define Tm5 m5 >> -%define Tm4 m4 >> -%define x2 m3 >> -%define x1 m2 >> -%define x0 m1 >> -%define leftmask m0 >> -%define tmp r0 >> -%define tmp1 r1 >> - >> - mov tmp, r6m >> - movu coef2, [tmp] >> - packsswb coef2, coef2 >> - pshufd coef2, coef2, 0 >> +cglobal interp_4tap_horiz_pp_w4, 6, 6, 5, src, srcstride, dst, >> dststride, height, coeffIdx >> +%define coef2 m4 >> +%define Tm0 m3 >> +%define x2 m2 >> +%define x1 m1 >> +%define x0 m0 >> >> - mova x3, [tab_c_512] >> + mova coef2, [tab_coeff + 16] >> + mova x2, [tab_c_512] >> + mova Tm0, [tab_Tm] >> >> - mov width, r4m >> - mov widthleft, width >> - and width, ~7 >> - and widthleft, 7 >> - mov mask_offset, widthleft >> - neg mask_offset >> +.loop >> +FILTER_H4_w4_CALL >> +dec r4d >> +jnz .loop >> +RET >> >> - movq leftmask, [tab_leftmask + (7 + mask_offset)] >> - mova Tm4, [tab_Tm] >> - mova Tm5, [tab_Tm + 16] >> - >> - mov src, r0m >> - mov dst, r2m >> - mov row, r5m >> - >> -_loop_row: >> - xor col, col >> - >> -_loop_col: >> - FILTER_H4 x0, x1, x3 >> - movh [dst + col], x1 >> - >> - add col, 8 >> - >> - cmp col, width >> - jl _loop_col >> - >> -_end_col: >> - test widthleft, widthleft >> - jz _next_row >> - >> - movq x2, [dst + col] >> - FILTER_H4 x0, x1, x3 >> - pblendvb x2, x2, x1, leftmask >> - movh [dst + col], x2 >> - >> -_next_row: >> - add src, r1m >> - add dst, r3m >> - dec row >> - >> - test row, row >> - jz _end_row >> - >> - jmp _loop_row >> - >> -_end_row: >> - >> - RET >> - >> -%endif ; ARCH_X86_64 == 0 >> > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
