> +%macro rgbToY_fn 2
Capitalized macro names please, unless libav has decided to drop this
standard convention for some reason.
> +cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
> +%ifdef ARCH_X86_64
> + movsxd wq, wd
> +%endif
> + add dstq, wq
> + neg wq
> + pxor m7, m7
> + mova m4, [rgb_Yrnd]
> +%if mmsize == 8
> + mova m5, [%2_Ycoeff_12x4]
> + mova m6, [%2_Ycoeff_3x56]
> +%define coeff1 m5
> +%define coeff2 m6
> +%else ; mmsize == 16
> +%ifdef ARCH_X86_64
> + mova m8, [%2_Ycoeff_12x4]
> + mova m9, [%2_Ycoeff_3x56]
Comma alignment.
> +%define coeff1 m8
> +%define coeff2 m9
> +%else ; x86-32
> +%define coeff1 [%2_Ycoeff_12x4]
> +%define coeff2 [%2_Ycoeff_3x56]
> +%endif ; x86-32/64
> +%endif ; mmsize = 8/16
> +.loop:
> + movd m0, [srcq+0] ; (byte) { B0, G0, R0, B1 }
> + movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 }
> + movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 }
> + movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 }
> +%if mmsize == 16
> + punpckldq m0, m2
> + punpckldq m1, m3
> + movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 }
> + movd m3, [srcq+14] ; (byte) { R4, B5, G5, R5 }
> + movd m5, [srcq+18] ; (byte) { B6, G6, R6, B7 }
> + movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 }
> + punpckldq m2, m5
> + punpckldq m3, m6
> +%endif ; mmsize == 16
When the pigs can't be beaten, you need to call the Mighty
Eagle^H^H^H^H^Hpshufb. Seriously, this code looks kinda awful.
> + add srcq, 3 * mmsize / 2
> + punpcklbw m0, m7 ; (word) { B0, G0, R0, B1 }
> + punpcklbw m1, m7 ; (word) { R0, B1, G1, R1 }
> + punpcklbw m2, m7 ; (word) { B2, G2, R2, B3 }
> + punpcklbw m3, m7 ; (word) { R2, B3, G3, R3 }
> + pmaddwd m0, coeff1 ; (dword) { B0*BY + G0*GY, B1*BY }
> + pmaddwd m1, coeff2 ; (dword) { R0*RY, G1+GY + R1*RY }
> + pmaddwd m2, coeff1 ; (dword) { B2*BY + G2*GY, B3*BY }
> + pmaddwd m3, coeff2 ; (dword) { R2*RY, G3+GY + R3*RY }
A lower-precision SSSE3-based maddubsw version might be applicable
here for later work?
Would this be faster with pmulhw and a different byte ordering?
Jason
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel