On Tue, Nov 21, 2017 at 10:37 AM, <vign...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Vignesh Vijayakumar<vign...@multicorewareinc.com> > # Date 1509595798 -19800 > # Thu Nov 02 09:39:58 2017 +0530 > # Node ID 182bfd0d5af929a801a08b35ee863d79eadb2833 > # Parent dae558b40d9901d5498bb989c96ae8acc5b63cdf > x86: Modify asm codes for NASM compatibility > Pushed series to default branch. > > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/blockcopy8.asm > --- a/source/common/x86/blockcopy8.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/blockcopy8.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -3850,7 +3850,7 @@ > mov r4d, %2/4 > add r1, r1 > add r3, r3 > -.loop > +.loop: > movu m0, [r2] > movu m1, [r2 + 16] > movu m2, [r2 + 32] > @@ -3905,7 +3905,7 @@ > lea r5, [3 * r3] > lea r6, [3 * r1] > > -.loop > +.loop: > movu m0, [r2] > movu xm1, [r2 + 32] > movu [r0], m0 > @@ -5085,7 +5085,7 @@ > pxor m4, m4 > pxor m5, m5 > > -.loop > +.loop: > ; row 0 > movu m0, [r1] > movu m1, [r1 + 16] > @@ -5196,7 +5196,7 @@ > pxor m4, m4 > pxor m5, m5 > > -.loop > +.loop: > ; row 0 > movu m0, [r1] > movu m1, [r1 + 16] > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/intrapred8.asm > --- a/source/common/x86/intrapred8.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/intrapred8.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -2148,7 +2148,7 @@ > paddw m0, m1 > packuswb m0, m0 > > - movd r2, m0 > + movd r2d, m0 > mov [r0], r2b > shr r2, 8 > mov [r0 + r1], r2b > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter16.asm > --- a/source/common/x86/ipfilter16.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/ipfilter16.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -9103,7 +9103,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -9156,7 +9156,7 @@ > ; load constant > mova m1, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > psllw m0, (14 - BIT_DEPTH) > psubw m0, m1 > @@ -9277,7 +9277,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -9351,7 +9351,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -9405,7 +9405,7 @@ > ; load constant > mova m4, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > movu m2, [r0 + r1 * 2] > @@ -9510,7 +9510,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -9583,7 +9583,7 @@ > ; load constant > mova m4, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > movu m2, [r0 + r1 * 2] > @@ -9758,7 +9758,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -9869,7 +9869,7 @@ > ; load constant > mova m4, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > movu m2, [r0 + r1 * 2] > @@ -9952,7 +9952,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + 32] > psllw m0, (14 - BIT_DEPTH) > @@ -10017,7 +10017,7 @@ > ; load constant > mova m2, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > psllw m0, (14 - BIT_DEPTH) > @@ -10081,7 +10081,7 @@ > ; load constant > mova m4, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r1] > movu m2, [r0 + r1 * 2] > @@ -10214,7 +10214,7 @@ > ; load constant > mova m3, [pw_2000] > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + 32] > movu m2, [r0 + 64] > @@ -10314,7 +10314,7 @@ > > .preloop: > lea r6, [r3 * 3] > -.loop > +.loop: > ; Row 0 > movu xm3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > movu xm4, [r0 + 2] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > @@ -10381,7 +10381,7 @@ > packssdw xm4, xm4 > > movq [r2], xm3 > ;row 0 > -.end > +.end: > RET > %endif > %endmacro > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/ipfilter8.asm > --- a/source/common/x86/ipfilter8.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/ipfilter8.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -324,7 +324,7 @@ > paddw m0, m5 > psraw m0, 6 > packuswb m0, m0 > - movd r4, m0 > + movd r4d, m0 > mov [dstq], r4w > shr r4, 16 > mov [dstq + dststrideq], r4w > @@ -3471,7 +3471,7 @@ > phaddw %2, %2 > pmulhrsw %2, %3 > packuswb %2, %2 > - movd r4, %2 > + movd r4d, %2 > mov [dstq], r4w > shr r4, 16 > mov [dstq + dststrideq], r4w > @@ -5336,7 +5336,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] ; [x x x > x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > @@ -5441,7 +5441,7 @@ > > .preloop: > lea r6, [r3 * 3] > -.loop > +.loop: > ; Row 0-1 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > ; shuffled based on the col order tab_Lm > @@ -5502,7 +5502,7 @@ > movq [r2], xm3 > movhps [r2 + r3], xm3 > movq [r2 + r3 * 2], xm4 > -.end > +.end: > RET > %endif > %endmacro > @@ -5592,7 +5592,7 @@ > paddw xm1, xm2 > psubw xm1, xm0 > movu [r2], xm1 > ;row 0 > -.end > +.end: > RET > %endif > %endmacro ; IPFILTER_LUMA_PS_8xN_AVX2 > @@ -5634,7 +5634,7 @@ > sub r0, r8 > ; r0(src)-r8 > add r9, 7 > ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop) > > -.label > +.label: > ; Row 0 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m4, m3, m6 > ; row 0 (col 4 to 7) > @@ -12374,7 +12374,7 @@ > mova m4, [pb_128] > mova m5, [tab_c_64_n64] > > -.loop > +.loop: > movh m0, [r0] > punpcklbw m0, m4 > pmaddubsw m0, m5 > @@ -25491,7 +25491,7 @@ > sub r0, r1 > add r4d, 3 > > -.loop > +.loop: > ; Row 0 > movu m2, [r0] > movu m3, [r0 + 1] > @@ -25553,7 +25553,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] ; [x x x > x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > @@ -25607,7 +25607,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > pshufb m3, m1 > @@ -25670,7 +25670,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > pshufb m3, m1 > @@ -25743,7 +25743,7 @@ > je .label > sub r0 , r1 > > -.label > +.label: > ; Row 0-1 > movu xm3, [r0] > vinserti128 m3, m3, [r0 + r1], 1 > @@ -25795,7 +25795,7 @@ > movq [r2+r3], xm4 > lea r2, [r2 + r3 * 2] > movhps [r2], xm3 > -.end > +.end: > RET > > cglobal interp_4tap_horiz_ps_4x2, 4,7,5 > @@ -25823,7 +25823,7 @@ > je .label > sub r0 , r1 > > -.label > +.label: > ; Row 0-1 > movu xm3, [r0] > vinserti128 m3, m3, [r0 + r1], 1 > @@ -25864,7 +25864,7 @@ > movq [r2+r3], xm4 > lea r2, [r2 + r3 * 2] > movhps [r2], xm3 > -.end > +.end: > RET > > ;----------------------------------------------------------- > ------------------------------------------------------------------ > @@ -25899,7 +25899,7 @@ > sub r0 , r1 > > > -.loop > +.loop: > sub r4d, 4 > ; Row 0-1 > movu xm3, [r0] > @@ -25955,7 +25955,7 @@ > movq [r2+r3], xm4 > lea r2, [r2 + r3 * 2] > movhps [r2], xm3 > -.end > +.end: > RET > %endmacro > > @@ -25993,7 +25993,7 @@ > sub r0 , r1 > add r6d , 1 > > -.loop > +.loop: > dec r6d > ; Row 0 > vbroadcasti128 m3, [r0] > @@ -26032,7 +26032,7 @@ > psubw m3, m5 > vpermq m3, m3, 11011000b > movu [r2], xm3 > -.end > +.end: > RET > > INIT_YMM avx2 > @@ -26237,7 +26237,7 @@ > > dec r0 > > -.loop > +.loop: > sub r4d, 4 > ; Row 0-1 > movu xm3, [r0] ; [x x x > x x A 9 8 7 6 5 4 3 2 1 0] > @@ -26306,9 +26306,9 @@ > sub r0, r6 > add r4d, 7 > > -.label > +.label: > lea r6, [pw_2000] > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m4, m3, m6 > ; row 0 (col 4 to 7) > @@ -26405,9 +26405,9 @@ > sub r0, r6 > ; r0(src)-r6 > add r4d, 7 > ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in > loop) > > -.label > +.label: > lea r6, [interp8_hps_shuf] > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m4, m3, m6 > ; row 0 (col 4 to 7) > @@ -26736,9 +26736,9 @@ > sub r0, r6 > ; r0(src)-r6 > add r4d, 7 > ; blkheight += N - 1 > > -.label > +.label: > lea r6, [pw_2000] > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m4, m3, m6 > ; row 0 (col 4 to 7) > @@ -26880,7 +26880,7 @@ > sub r0 , r1 > inc r6d > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > pshufb m3, m1 > @@ -26915,7 +26915,7 @@ > psubw m3, m5 > vpermq m3, m3, 11011000b > movu [r2], xm3 > -.end > +.end: > RET > %endmacro > > @@ -26945,7 +26945,7 @@ > jz .label > sub r0, r1 > > -.label > +.label: > lea r6, [r1 * 3] > movq xm1, [r0] > movhps xm1, [r0 + r1] > @@ -26985,7 +26985,7 @@ > movd [r2], xm1 > pextrd [r2 + r3], xm1, 1 > pextrd [r2 + r3 * 2], xm1, 2 > -.end > +.end: > RET > > INIT_YMM avx2 > @@ -27005,7 +27005,7 @@ > jz .label > sub r0, r1 > > -.label > +.label: > mova m4, [interp4_hpp_shuf] > mova m5, [pw_1] > dec r0 > @@ -27062,7 +27062,7 @@ > movd [r2], xm1 > pextrd [r2 + r3], xm1, 1 > movd [r2 + r3 * 2], xm2 > -.end > +.end: > RET > > INIT_YMM avx2 > @@ -27217,7 +27217,7 @@ > sub r0 , r1 > inc r6d > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > pshufb m3, m1 > @@ -27254,7 +27254,7 @@ > vextracti128 xm4, m3, 1 > movq [r2], xm3 > movd [r2+8], xm4 > -.end > +.end: > RET > > INIT_YMM avx2 > @@ -27285,7 +27285,7 @@ > lea r6, [r1 * 3] > ; r6 = (N / 2 - 1) * srcStride > sub r0, r6 > ; r0(src)-r6 > add r4d, 7 > -.loop > +.loop: > > ; Row 0 > > @@ -27350,9 +27350,9 @@ > sub r0, r6 > ; r0(src)-r6 > add r4d, 7 > ; blkheight += N - 1 (7 - 1 = 6 ; since the last one row not in loop) > > -.label > +.label: > lea r6, [interp8_hps_shuf] > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] > ; [x x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m4, m3, m6 > ; row 0 (col 4 to 7) > @@ -27430,7 +27430,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] ; [x x > x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > @@ -27988,7 +27988,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] ; [x x > x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > @@ -28067,7 +28067,7 @@ > sub r0 , r1 > add r6d , 3 > > -.loop > +.loop: > ; Row 0 > vbroadcasti128 m3, [r0] ; [x > x x x x A 9 8 7 6 5 4 3 2 1 0] > pshufb m3, m1 > @@ -28114,7 +28114,7 @@ > jz .label > sub r0, r1 > > -.label > +.label: > mova m4, [interp4_hps_shuf] > mova m5, [pw_1] > dec r0 > @@ -28209,7 +28209,7 @@ > movd [r2], xm1 > pextrd [r2 + r3], xm1, 1 > movd [r2 + r3 * 2], xm2 > -.end > +.end: > RET > > INIT_YMM avx2 > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/loopfilter.asm > --- a/source/common/x86/loopfilter.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/loopfilter.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -374,7 +374,7 @@ > pxor m0, m0 ; m0 = 0 > mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2, 2, > 2, 2, 2, 2, 2, 2, 2, 2, 2] > shr r4d, 4 > -.loop > +.loop: > movu m7, [r0] > movu m5, [r0 + 16] > movu m3, [r0 + r3] > @@ -430,7 +430,7 @@ > mova m6, [pb_2] ; m6 = [2, 2, 2, 2, 2, 2, > 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] > mova m7, [pb_128] > shr r4d, 4 > -.loop > +.loop: > movu m1, [r0] ; m1 = pRec[x] > movu m2, [r0 + r3] ; m2 = pRec[x + iStride] > > @@ -478,7 +478,7 @@ > mova m4, [pb_2] > shr r4d, 4 > mova m0, [pw_pixel_max] > -.loop > +.loop: > movu m5, [r0] > movu m3, [r0 + r3] > > @@ -523,7 +523,7 @@ > mova xm6, [pb_2] ; xm6 = [2, 2, 2, 2, 2, > 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] > mova xm7, [pb_128] > shr r4d, 4 > -.loop > +.loop: > movu xm1, [r0] ; xm1 = pRec[x] > movu xm2, [r0 + r3] ; xm2 = pRec[x + > iStride] > > @@ -572,7 +572,7 @@ > mov r5d, r4d > shr r4d, 4 > mov r6, r0 > -.loop > +.loop: > movu m7, [r0] > movu m5, [r0 + 16] > movu m3, [r0 + r3] > @@ -674,7 +674,7 @@ > pxor m0, m0 ; m0 = 0 > mova m7, [pb_128] > shr r4d, 4 > -.loop > +.loop: > movu m1, [r0] ; m1 = pRec[x] > movu m2, [r0 + r3] ; m2 = pRec[x + > iStride] > > @@ -748,7 +748,7 @@ > mova m4, [pw_pixel_max] > vbroadcasti128 m6, [r2] ; m6 = m_iOffsetEo > shr r4d, 4 > -.loop > +.loop: > movu m7, [r0] > movu m5, [r0 + r3] > movu m1, [r0 + r3 * 2] > @@ -804,7 +804,7 @@ > vbroadcasti128 m5, [pb_128] > vbroadcasti128 m6, [r2] ; m6 = > m_iOffsetEo > shr r4d, 4 > -.loop > +.loop: > movu xm1, [r0] ; m1 = > pRec[x] > movu xm2, [r0 + r3] ; m2 = > pRec[x + iStride] > vinserti128 m1, m1, xm2, 1 > @@ -859,7 +859,7 @@ > movh m6, [r0 + r4 * 2] > movhps m6, [r1 + r4] > > -.loop > +.loop: > movu m7, [r0] > movu m5, [r0 + 16] > movu m3, [r0 + r5 + 2] > @@ -918,7 +918,7 @@ > movh m5, [r0 + r4] > movhps m5, [r1 + r4] > > -.loop > +.loop: > movu m1, [r0] ; m1 = rec[x] > movu m2, [r0 + r5 + 1] ; m2 = rec[x + stride + 1] > pxor m3, m1, m7 > @@ -970,7 +970,7 @@ > movhps xm4, [r1 + r4] > vbroadcasti128 m5, [r3] > mova m6, [pw_pixel_max] > -.loop > +.loop: > movu m1, [r0] > movu m3, [r0 + r5 + 2] > > @@ -1061,7 +1061,7 @@ > movhps xm4, [r1 + r4] > vbroadcasti128 m5, [r3] > > -.loop > +.loop: > movu m1, [r0] > movu m7, [r0 + 32] > movu m3, [r0 + r5 + 2] > @@ -1567,11 +1567,11 @@ > movu m4, [r1 + 16] ; offset[16-31] > pxor m7, m7 > > -.loopH > +.loopH: > mov r5d, r2d > xor r6, r6 > > -.loopW > +.loopW: > movu m2, [r0 + r6] > movu m5, [r0 + r6 + 16] > psrlw m0, m2, (BIT_DEPTH - 5) > @@ -1617,11 +1617,11 @@ > movu m3, [r1 + 0] ; offset[0-15] > movu m4, [r1 + 16] ; offset[16-31] > pxor m7, m7 ; m7 =[0] > -.loopH > +.loopH: > mov r5d, r2d > xor r6, r6 > > -.loopW > +.loopW: > movu m2, [r0 + r6] ; m0 = [rec] > psrlw m1, m2, 3 > pand m1, [pb_31] ; m1 = [index] > @@ -1670,9 +1670,9 @@ > mov r6d, r3d > shr r3d, 1 > > -.loopH > +.loopH: > mov r5d, r2d > -.loopW > +.loopW: > movu m2, [r0] > movu m5, [r0 + r4] > psrlw m0, m2, (BIT_DEPTH - 5) > @@ -1751,9 +1751,9 @@ > shr r2d, 4 > mov r1d, r3d > shr r3d, 1 > -.loopH > +.loopH: > mov r5d, r2d > -.loopW > +.loopW: > movu xm2, [r0] ; m2 = [rec] > vinserti128 m2, m2, [r0 + r4], 1 > psrlw m1, m2, 3 > @@ -1789,7 +1789,7 @@ > test r1b, 1 > jz .end > mov r5d, r2d > -.loopW1 > +.loopW1: > movu xm2, [r0] ; m2 = [rec] > psrlw xm1, xm2, 3 > pand xm1, xm7 ; m1 = [index] > @@ -1811,7 +1811,7 @@ > add r0, 16 > dec r5d > jnz .loopW1 > -.end > +.end: > RET > %endif > > @@ -1827,7 +1827,7 @@ > add r3d, 1 > mov r5, r0 > movu m4, [r0 + r4] > -.loop > +.loop: > movu m1, [r1] ; m2 = pRec[x] > movu m2, [r2] ; m3 = pTmpU[x] > > @@ -1921,7 +1921,7 @@ > mov r5, r0 > movu m4, [r0 + r4] > > -.loop > +.loop: > movu m1, [r1] ; m2 = pRec[x] > movu m2, [r2] ; m3 = pTmpU[x] > > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/mc-a.asm > --- a/source/common/x86/mc-a.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/mc-a.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -4115,7 +4115,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_W8 > dec r9d > jnz .loop > @@ -4129,7 +4129,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 8 > -.loop > +.loop: > pixel_avg_W8 > dec r9d > jnz .loop > @@ -4697,7 +4697,7 @@ > lea r8, [r1 * 3] > mov r9d, 4 > > -.loop > +.loop: > movu m0, [r2] > movu m1, [r4] > pavgw m0, m1 > @@ -4834,7 +4834,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_H16 > dec r9d > jnz .loop > @@ -4848,7 +4848,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_H16 > pixel_avg_H16 > dec r9d > @@ -4863,7 +4863,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_H16 > pixel_avg_H16 > pixel_avg_H16 > @@ -4887,7 +4887,7 @@ > lea r8, [r1 * 3] > mov r9d, 8 > > -.loop > +.loop: > movu m0, [r2] > movu m1, [r4] > pavgw m0, m1 > @@ -4987,7 +4987,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 2 > -.loop > +.loop: > pixel_avg_W32 > dec r9d > jnz .loop > @@ -5001,7 +5001,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_W32 > dec r9d > jnz .loop > @@ -5015,7 +5015,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 6 > -.loop > +.loop: > pixel_avg_W32 > dec r9d > jnz .loop > @@ -5029,7 +5029,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 8 > -.loop > +.loop: > pixel_avg_W32 > dec r9d > jnz .loop > @@ -5043,7 +5043,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 16 > -.loop > +.loop: > pixel_avg_W32 > dec r9d > jnz .loop > @@ -5141,7 +5141,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 4 > -.loop > +.loop: > pixel_avg_W64 > dec r9d > jnz .loop > @@ -5155,7 +5155,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 8 > -.loop > +.loop: > pixel_avg_W64 > dec r9d > jnz .loop > @@ -5169,7 +5169,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 12 > -.loop > +.loop: > pixel_avg_W64 > dec r9d > jnz .loop > @@ -5183,7 +5183,7 @@ > lea r7, [r5 * 3] > lea r8, [r1 * 3] > mov r9d, 16 > -.loop > +.loop: > pixel_avg_W64 > dec r9d > jnz .loop > @@ -5204,7 +5204,7 @@ > lea r8, [r1 * 3] > mov r9d, 16 > > -.loop > +.loop: > movu m0, [r2] > movu m1, [r4] > pavgw m0, m1 > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/pixel-util8.asm > --- a/source/common/x86/pixel-util8.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/pixel-util8.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -1785,7 +1785,7 @@ > movu [r1], xm7 > je .nextH > > -.width6 > +.width6: > cmp r6d, 6 > jl .width4 > movq [r1], xm7 > @@ -4937,7 +4937,7 @@ > lea r9, [r4 * 3] > lea r8, [r5 * 3] > > -.loop > +.loop: > pmovzxbw m0, [r2] > pmovzxbw m1, [r3] > pmovzxbw m2, [r2 + r4] > @@ -5150,7 +5150,7 @@ > lea r7, [r4 * 3] > lea r8, [r5 * 3] > > -.loop > +.loop: > movu m0, [r2] > movu m1, [r2 + 32] > movu m2, [r3] > @@ -5557,7 +5557,7 @@ > lea r7, [r4 * 3] > lea r8, [r5 * 3] > > -.loop > +.loop: > movu m0, [r2] > movu m1, [r2 + 32] > movu m2, [r2 + 64] > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/sad-a.asm > --- a/source/common/x86/sad-a.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/sad-a.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -5631,7 +5631,7 @@ > xorps m5, m5 > mov r4d, 4 > > -.loop > +.loop: > movu m1, [r0] ; row 0 of pix0 > movu m2, [r2] ; row 0 of pix1 > movu m3, [r0 + r1] ; row 1 of pix0 > @@ -5676,7 +5676,7 @@ > mov r4d, 6 > lea r5, [r1 * 3] > lea r6, [r3 * 3] > -.loop > +.loop: > movu m1, [r0] ; row 0 of pix0 > movu m2, [r2] ; row 0 of pix1 > movu m3, [r0 + r1] ; row 1 of pix0 > @@ -5718,7 +5718,7 @@ > lea r5, [r1 * 3] > lea r6, [r3 * 3] > > -.loop > +.loop: > movu m1, [r0] ; row 0 of pix0 > movu m2, [r2] ; row 0 of pix1 > movu m3, [r0 + r1] ; row 1 of pix0 > @@ -5759,7 +5759,7 @@ > lea r5, [r1 * 3] > lea r6, [r3 * 3] > > -.loop > +.loop: > movu m1, [r0] ; row 0 of pix0 > movu m2, [r2] ; row 0 of pix1 > movu m3, [r0 + r1] ; row 1 of pix0 > @@ -5822,7 +5822,7 @@ > mov r4d, 64/4 > lea r5, [r1 * 3] > lea r6, [r3 * 3] > -.loop > +.loop: > movu m1, [r0] ; row 0 of pix0 > movu m2, [r2] ; row 0 of pix1 > movu m3, [r0 + r1] ; row 1 of pix0 > @@ -5873,7 +5873,7 @@ > xorps m0, m0 > xorps m5, m5 > mov r4d, 4 > -.loop > +.loop: > movu m1, [r0] ; first 32 of row 0 of pix0 > movu m2, [r2] ; first 32 of row 0 of pix1 > movu m3, [r0 + 32] ; second 32 of row 0 of pix0 > @@ -5936,7 +5936,7 @@ > xorps m0, m0 > xorps m5, m5 > mov r4d, 16 > -.loop > +.loop: > movu m1, [r0] ; first 32 of row 0 of pix0 > movu m2, [r2] ; first 32 of row 0 of pix1 > movu m3, [r0 + 32] ; second 32 of row 0 of pix0 > @@ -5978,7 +5978,7 @@ > mov r4d, 12 > lea r5, [r1 * 3] > lea r6, [r3 * 3] > -.loop > +.loop: > movu m1, [r0] ; first 32 of row 0 of pix0 > movu m2, [r2] ; first 32 of row 0 of pix1 > movu m3, [r0 + 32] ; second 32 of row 0 of pix0 > @@ -6040,7 +6040,7 @@ > mov r4d, 8 > lea r5, [r1 * 3] > lea r6, [r3 * 3] > -.loop > +.loop: > movu m1, [r0] ; first 32 of row 0 of pix0 > movu m2, [r2] ; first 32 of row 0 of pix1 > movu m3, [r0 + 32] ; second 32 of row 0 of pix0 > diff -r dae558b40d99 -r 182bfd0d5af9 source/common/x86/seaintegral.asm > --- a/source/common/x86/seaintegral.asm Tue Nov 21 09:40:16 2017 +0530 > +++ b/source/common/x86/seaintegral.asm Thu Nov 02 09:39:58 2017 +0530 > @@ -36,7 +36,7 @@ > mov r2, r1 > shl r2, 4 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -54,7 +54,7 @@ > mov r2, r1 > shl r2, 5 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -75,7 +75,7 @@ > shl r3, 4 > add r2, r3 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -93,7 +93,7 @@ > mov r2, r1 > shl r2, 6 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -114,7 +114,7 @@ > shl r3, 5 > add r2, r3 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -132,7 +132,7 @@ > mov r2, r1 > shl r2, 7 > > -.loop > +.loop: > movu m0, [r0] > movu m1, [r0 + r2] > psubd m1, m0 > @@ -264,7 +264,7 @@ > movu [r0 + r3], xm0 > jmp .end > > -.end > +.end: > RET > %endif > > @@ -379,7 +379,7 @@ > movu [r0 + r3], m0 > jmp .end > > -.end > +.end: > RET > %endif > > @@ -577,7 +577,7 @@ > movu [r0 + r3], xm0 > jmp .end > > -.end > +.end: > RET > %endif > > @@ -740,7 +740,7 @@ > movu [r0 + r3], m0 > jmp .end > > -.end > +.end: > RET > %endif > > @@ -883,7 +883,7 @@ > movu [r0 + r3], m0 > jmp .end > > -.end > +.end: > RET > > %macro INTEGRAL_THIRTYTWO_HORIZONTAL_16 0 > @@ -1058,5 +1058,5 @@ > movu [r0 + r3], m0 > jmp .end > > -.end > +.end: > RET > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel