Dynamic Range: 10 + 10 + 6 + 6 = 32
so we didn't need upgrade sse_ret_t to 64bits At 2015-09-30 18:56:51,"Divya Manivannan" <[email protected]> wrote: ># HG changeset patch ># User Divya Manivannan <[email protected]> ># Date 1443607299 -19800 ># Wed Sep 30 15:31:39 2015 +0530 ># Node ID aadec6615a3d5f33b4fdb00079e236b019ef1e95 ># Parent 6e7761bdfe23addb862483f8407b388800de7d92 >sse: fix overflow in sse_ss for 10 bit > >diff -r 6e7761bdfe23 -r aadec6615a3d source/common/common.h >--- a/source/common/common.h Wed Sep 30 14:57:15 2015 +0530 >+++ b/source/common/common.h Wed Sep 30 15:31:39 2015 +0530 >@@ -135,7 +135,7 @@ > typedef int32_t ssum2_t; // Signed sum > #endif // if HIGH_BIT_DEPTH > >-#if X265_DEPTH <= 10 >+#if X265_DEPTH < 10 > typedef uint32_t sse_ret_t; > #else > typedef uint64_t sse_ret_t; >diff -r 6e7761bdfe23 -r aadec6615a3d source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Wed Sep 30 14:57:15 2015 +0530 >+++ b/source/common/x86/asm-primitives.cpp Wed Sep 30 15:31:39 2015 +0530 >@@ -1006,10 +1006,11 @@ > p.chroma[X265_CSP_I422].cu[BLOCK_422_4x8].sse_pp = > (pixel_sse_t)PFX(pixel_ssd_ss_4x8_mmx2); > p.chroma[X265_CSP_I422].cu[BLOCK_422_8x16].sse_pp = > (pixel_sse_t)PFX(pixel_ssd_ss_8x16_sse2); > p.chroma[X265_CSP_I422].cu[BLOCK_422_16x32].sse_pp = > (pixel_sse_t)PFX(pixel_ssd_ss_16x32_sse2); >-#if X265_DEPTH <= 10 >- p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2); >- ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2); >-#endif >+ >+ // sse_ss primitive need to be fixed for 10 and 12 bit >+ //p.cu[BLOCK_4x4].sse_ss = PFX(pixel_ssd_ss_4x4_mmx2); >+ //ALL_LUMA_CU(sse_ss, pixel_ssd_ss, sse2); >+ > p.cu[BLOCK_4x4].dct = PFX(dct4_sse2); > p.cu[BLOCK_8x8].dct = PFX(dct8_sse2); > p.cu[BLOCK_4x4].idct = PFX(idct4_sse2); >@@ -1535,11 +1536,12 @@ > p.cu[BLOCK_16x16].ssd_s = PFX(pixel_ssd_s_16_avx2); > p.cu[BLOCK_32x32].ssd_s = PFX(pixel_ssd_s_32_avx2); > >+ // sse_ss primitive need to be fixed for 10 and 12 bit >+ //p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2); >+ //p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2); >+ //p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2); >+ > #if X265_DEPTH <= 10 >- p.cu[BLOCK_16x16].sse_ss = PFX(pixel_ssd_ss_16x16_avx2); >- p.cu[BLOCK_32x32].sse_ss = PFX(pixel_ssd_ss_32x32_avx2); >- p.cu[BLOCK_64x64].sse_ss = PFX(pixel_ssd_ss_64x64_avx2); >- > p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_avx2); > p.cu[BLOCK_32x32].sse_pp = PFX(pixel_ssd_32x32_avx2); > p.cu[BLOCK_64x64].sse_pp = PFX(pixel_ssd_64x64_avx2); >diff -r 6e7761bdfe23 -r aadec6615a3d source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Wed Sep 30 14:57:15 2015 +0530 >+++ b/source/common/x86/pixel.h Wed Sep 30 15:31:39 2015 +0530 >@@ -39,7 +39,7 @@ > pixel PFX(planeClipAndMax_avx2)(pixel *src, intptr_t stride, int width, int > height, uint64_t *outsum, const pixel minPix, const pixel maxPix); > > #define DECL_PIXELS(cpu) \ >- FUNCDEF_PU(uint32_t, pixel_ssd, cpu, const pixel*, intptr_t, const >pixel*, intptr_t); \ >+ FUNCDEF_PU(sse_ret_t, pixel_ssd, cpu, const pixel*, intptr_t, const >pixel*, intptr_t); \ > FUNCDEF_PU(int, pixel_sa8d, cpu, const pixel*, intptr_t, const pixel*, > intptr_t); \ > FUNCDEF_PU(void, pixel_sad_x3, cpu, const pixel*, const pixel*, const > pixel*, const pixel*, intptr_t, int32_t*); \ > FUNCDEF_PU(void, pixel_sad_x4, cpu, const pixel*, const pixel*, const > pixel*, const pixel*, const pixel*, intptr_t, int32_t*); \ >@@ -48,7 +48,7 @@ > FUNCDEF_PU(void, pixel_sub_ps, cpu, int16_t* a, intptr_t dstride, const > pixel* b0, const pixel* b1, intptr_t sstride0, intptr_t sstride1); \ > FUNCDEF_CHROMA_PU(int, pixel_satd, cpu, const pixel*, intptr_t, const > pixel*, intptr_t); \ > FUNCDEF_CHROMA_PU(int, pixel_sad, cpu, const pixel*, intptr_t, const > pixel*, intptr_t); \ >- FUNCDEF_CHROMA_PU(uint32_t, pixel_ssd_ss, cpu, const int16_t*, intptr_t, >const int16_t*, intptr_t); \ >+ FUNCDEF_CHROMA_PU(sse_ret_t, pixel_ssd_ss, cpu, const int16_t*, intptr_t, >const int16_t*, intptr_t); \ > FUNCDEF_CHROMA_PU(void, addAvg, cpu, const int16_t*, const int16_t*, > pixel*, intptr_t, intptr_t, intptr_t); \ > FUNCDEF_CHROMA_PU(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \ > FUNCDEF_TU_S(int, pixel_ssd_s, cpu, const int16_t*, intptr_t); \ >diff -r 6e7761bdfe23 -r aadec6615a3d source/encoder/rdcost.h >--- a/source/encoder/rdcost.h Wed Sep 30 14:57:15 2015 +0530 >+++ b/source/encoder/rdcost.h Wed Sep 30 15:31:39 2015 +0530 >@@ -91,7 +91,7 @@ > > inline uint64_t calcRdCost(sse_ret_t distortion, uint32_t bits) const > { >-#if X265_DEPTH <= 10 >+#if X265_DEPTH < 10 > X265_CHECK(bits <= (UINT64_MAX - 128) / m_lambda2, > "calcRdCost wrap detected dist: %u, bits %u, lambda: " > X265_LL "\n", > distortion, bits, m_lambda2); >@@ -130,7 +130,7 @@ > > inline sse_ret_t scaleChromaDist(uint32_t plane, sse_ret_t dist) const > { >-#if X265_DEPTH <= 10 >+#if X265_DEPTH < 10 > X265_CHECK(dist <= (UINT64_MAX - 128) / m_chromaDistWeight[plane - 1], > "scaleChromaDist wrap detected dist: %u, lambda: %u\n", > dist, m_chromaDistWeight[plane - 1]); >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
