>+cglobal intra_pred_ang16_33, 4,7,8
>+    xor         r6d,       r6d
>+    inc         r6d
>+    xchg        r2,        r3
>+    lea         r3,        [ang_table + 16 * 16]
you didn't need r3 anymore, so 'mov r2, r3' is faster
 
>+    add         r1,        r1
>+    lea         r4,        [r1 * 3]
>+
>+    call        ang16_mode_3_33
move ang16_mode_3_33 near this function is more cache performance


>+    lea         r2,        [r2 + 16]
>+    lea         r0,        [r0 + 16]
>+
>+    call        ang16_mode_3_33
>+
>+    RET
>+
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to