Replaced in both C and asm codes, sent fix.

Regards,
Praveen Tiwari


On Wed, Nov 20, 2013 at 6:11 PM, chen <[email protected]> wrote:

>
> >+;-----------------------------------------------------------------------------
>
> >+; void pixel_add_ps_%1x%2(pixel *dest, int destride, pixel *src0, int16_t 
> >*scr1, int srcStride0, int srcStride1)
>
> >+;-----------------------------------------------------------------------------
> use intprt_t type  for stride is better
>
> >+%macro PIXEL_ADD_PS_W4_H4 2
> >+INIT_XMM sse4
>
> >+cglobal pixel_add_ps_%1x%2, 6, 7, 2, dest, destride, src0, scr1, 
> >srcStride0, srcStride1
> >+
> >+add         r5,            r5
> >+
> >+mov         r6d,           %2/4
> >+
> >+.loop
> >+      movd        m0,            [r2]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0],          m0
> >+
> >+      movd        m0,            [r2 + r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + r1],     m0
> >+
> >+      movd        m0,            [r2 + 2 * r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + 2 * r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + 2 * r1], m0
> >+
> >+      lea         r0,            [r0 + 2 * r1]
> >+      lea         r2,            [r2 + 2 * r4]
> >+      lea         r3,            [r3 + 2 * r5]
> >+
> >+      movd        m0,            [r2 + r4]
> >+      pmovzxbw    m0,            m0
> >+      movh        m1,            [r3 + r5]
> >+
> >+      paddw       m0,            m1
> >+      packuswb    m0,            m0
> >+
> >+      movd        [r0 + r1],     m0
> >+
> >+      lea         r0,            [r0 + 2 * r1]
> >+      lea         r2,            [r2 + 2 * r4]
> >+      lea         r3,            [r3 + 2 * r5]
> >+
> >+      dec         r6d
> >+      jnz         .loop
> >+
> >+RET
> >+%endmacro
> >+
> >+PIXEL_ADD_PS_W4_H4   4,  8
> >+PIXEL_ADD_PS_W4_H4   4, 16
> >_______________________________________________
> >x265-devel mailing list
> >[email protected]
> >https://mailman.videolan.org/listinfo/x265-devel
>
> _______________________________________________
> x265-devel mailing list
> [email protected]
> https://mailman.videolan.org/listinfo/x265-devel
>
>
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to