# HG changeset patch # User Praveen Tiwari # Date 1410952504 -19800 # Node ID 530c1824c585870c07ba13623cb92b21637a8514 # Parent a2dcc12bd36f41a99c346870cc4c23c1e313665b denoiseDct asm code: nit faulty code, need a new SSE version
diff -r a2dcc12bd36f -r 530c1824c585 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Sep 17 16:33:52 2014 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Sep 17 16:45:04 2014 +0530 @@ -1565,7 +1565,6 @@ p.idct[IDCT_4x4] = x265_idct4_sse2; p.idct[IDST_4x4] = x265_idst4_sse2; p.planecopy_sp = x265_downShift_16_sse2; - //p.denoiseDct = x265_denoise_dct_sse2; p.copy_shl[BLOCK_4x4] = x265_copy_shl_4_sse2; p.copy_shl[BLOCK_8x8] = x265_copy_shl_8_sse2; p.copy_shl[BLOCK_16x16] = x265_copy_shl_16_sse2; @@ -1605,7 +1604,6 @@ p.dct[DST_4x4] = x265_dst4_ssse3; p.idct[IDCT_8x8] = x265_idct8_ssse3; p.count_nonzero = x265_count_nonzero_ssse3; - //p.denoiseDct = x265_denoise_dct_ssse3; } if (cpuMask & X265_CPU_SSE4) { @@ -1709,7 +1707,6 @@ p.ssim_4x4x2_core = x265_pixel_ssim_4x4x2_core_avx; p.ssim_end_4 = x265_pixel_ssim_end4_avx; - //p.denoiseDct = x265_denoise_dct_avx; } if (cpuMask & X265_CPU_XOP) { diff -r a2dcc12bd36f -r 530c1824c585 source/common/x86/dct8.asm --- a/source/common/x86/dct8.asm Wed Sep 17 16:33:52 2014 +0530 +++ b/source/common/x86/dct8.asm Wed Sep 17 16:45:04 2014 +0530 @@ -1054,102 +1054,6 @@ RET -; TODO: split into two version after coeff_t changed -%if 1 ;HIGH_BIT_DEPTH -;----------------------------------------------------------------------------- -; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size ) -;----------------------------------------------------------------------------- -%macro DENOISE_DCT 0 -cglobal denoise_dct, 4,4,6 - pxor m5, m5 - movsxdifnidn r3, r3d -.loop: - mova m2, [r0+r3*4-2*mmsize] - mova m3, [r0+r3*4-1*mmsize] - ABSD m0, m2 - ABSD m1, m3 - paddd m4, m0, [r1+r3*4-2*mmsize] - psubd m0, [r2+r3*4-2*mmsize] - mova [r1+r3*4-2*mmsize], m4 - paddd m4, m1, [r1+r3*4-1*mmsize] - psubd m1, [r2+r3*4-1*mmsize] - mova [r1+r3*4-1*mmsize], m4 - pcmpgtd m4, m0, m5 - pand m0, m4 - pcmpgtd m4, m1, m5 - pand m1, m4 - PSIGND m0, m2 - PSIGND m1, m3 - mova [r0+r3*4-2*mmsize], m0 - mova [r0+r3*4-1*mmsize], m1 - sub r3d, mmsize/2 - jg .loop - RET -%endmacro - -%if ARCH_X86_64 == 0 -INIT_MMX mmx -DENOISE_DCT -%endif -INIT_XMM sse2 -DENOISE_DCT -INIT_XMM ssse3 -DENOISE_DCT -INIT_XMM avx -DENOISE_DCT -INIT_YMM avx2 -DENOISE_DCT - -%else ; !HIGH_BIT_DEPTH - -;----------------------------------------------------------------------------- -; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size ) -;----------------------------------------------------------------------------- -%macro DENOISE_DCT 0 -cglobal denoise_dct, 4,4,7 - pxor m6, m6 - movsxdifnidn r3, r3d -.loop: - mova m2, [r0+r3*2-2*mmsize] - mova m3, [r0+r3*2-1*mmsize] - ABSW m0, m2, sign - ABSW m1, m3, sign - psubusw m4, m0, [r2+r3*2-2*mmsize] - psubusw m5, m1, [r2+r3*2-1*mmsize] - PSIGNW m4, m2 - PSIGNW m5, m3 - mova [r0+r3*2-2*mmsize], m4 - mova [r0+r3*2-1*mmsize], m5 - punpcklwd m2, m0, m6 - punpcklwd m3, m1, m6 - punpckhwd m0, m6 - punpckhwd m1, m6 - paddd m2, [r1+r3*4-4*mmsize] - paddd m0, [r1+r3*4-3*mmsize] - paddd m3, [r1+r3*4-2*mmsize] - paddd m1, [r1+r3*4-1*mmsize] - mova [r1+r3*4-4*mmsize], m2 - mova [r1+r3*4-3*mmsize], m0 - mova [r1+r3*4-2*mmsize], m3 - mova [r1+r3*4-1*mmsize], m1 - sub r3, mmsize - jg .loop -%if (mmsize == 8) - EMMS -%endif - RET -%endmacro - -%if ARCH_X86_64 == 0 -INIT_MMX mmx -DENOISE_DCT -%endif -INIT_XMM sse2 -DENOISE_DCT -INIT_XMM ssse3 -DENOISE_DCT -INIT_XMM avx -DENOISE_DCT INIT_YMM avx2 cglobal denoise_dct, 4,4,4 @@ -1172,7 +1076,6 @@ jg .loop RET -%endif ; !HIGH_BIT_DEPTH %macro DCT16_PASS_1_E 2 vpbroadcastq m7, [r7 + %1] _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel