I understand. If High_Bit)Depth = 1, you could still choose uint32 version for the primitives but in ssim_end_1(int..) you might get an overflow when bitdepth > 9, as mentioned in the comments there. I could use a 64bit int there for all cases to store intermediary calculations , if you want to remove those if conditions.
On Sat, Oct 5, 2013 at 12:53 AM, Steve Borho <[email protected]> wrote: > > > > On Fri, Oct 4, 2013 at 6:21 AM, Aarthi Thirumalai < > [email protected]> wrote: > >> # HG changeset patch >> # User Aarthi Thirumalai >> # Date 1380885375 -19800 >> # Fri Oct 04 16:46:15 2013 +0530 >> # Node ID 92641f3d3195b8da2275cfc44b1921d8f81a54bc >> # Parent bf14f75b8cf99806c75cdc1a50b28b6cf265e3bd >> primitives: added C primitives to compute SSIM >> >> diff -r bf14f75b8cf9 -r 92641f3d3195 source/common/pixel.cpp >> --- a/source/common/pixel.cpp Fri Oct 04 01:39:22 2013 -0500 >> +++ b/source/common/pixel.cpp Fri Oct 04 16:46:15 2013 +0530 >> @@ -653,6 +653,85 @@ >> } >> } >> >> +/* structural similarity metric */ >> +template<class T1> >> +void ssim_4x4x2_core(const pixel *pix1, intptr_t stride1, const pixel >> *pix2, intptr_t stride2, T1 sums[2][4]) >> +{ >> + for (int z = 0; z < 2; z++) >> + { >> + T1 s1 = 0, s2 = 0, ss = 0, s12 = 0; >> + for (int y = 0; y < 4; y++) >> + { >> + for (int x = 0; x < 4; x++) >> + { >> + T1 a = pix1[x + y * stride1]; >> + T1 b = pix2[x + y * stride2]; >> + s1 += a; >> + s2 += b; >> + ss += a * a; >> + ss += b * b; >> + s12 += a * b; >> + } >> + } >> + >> + sums[z][0] = s1; >> + sums[z][1] = s2; >> + sums[z][2] = ss; >> + sums[z][3] = s12; >> + pix1 += 4; >> + pix2 += 4; >> + } >> +} >> + >> +template<class T1> >> +float ssim_end_4(T1 sum0[5][4], T1 sum1[5][4], int width) >> +{ >> + float ssim = 0.0; >> + >> + for (int i = 0; i < width; i++) >> + { >> + ssim += ssim_end_1(sum0[i][0] + sum0[i + 1][0] + sum1[i][0] + >> sum1[i + 1][0], >> + sum0[i][1] + sum0[i + 1][1] + sum1[i][1] + >> sum1[i + 1][1], >> + sum0[i][2] + sum0[i + 1][2] + sum1[i][2] + >> sum1[i + 1][2], >> + sum0[i][3] + sum0[i + 1][3] + sum1[i][3] + >> sum1[i + 1][3]); >> + } >> + >> + return ssim; >> +} >> + >> +float ssim_end_1(int s1, int s2, int ss, int s12) >> +{ >> + static const uint32_t pixelMax = (1 << X265_DEPTH) - 1; >> + >> + /* Maximum value for 10-bit is: ss*64 = (2^10-1)^2*16*4*64 = >> 4286582784, which will overflow in some cases. >> + * s1*s1, s2*s2, and s1*s2 also obtain this value for edge cases: >> ((2^10-1)*16*4)^2 = 4286582784. >> + * Maximum value for 9-bit is: ss*64 = (2^9-1)^2*16*4*64 = >> 1069551616, which will not overflow. */ >> + >> +#if X265_DEPTH > 9 >> +#define type int64_t >> +#else >> +#define type int32_t >> +#endif >> > > We keep cycling on this issue. > > With HIGH_BIT_DEPTH=1, X265_DEPTH evaluates to g_bitDepth (a global > variable) > With HIGH_BIT_DEPTH=0, X265_DEPTH evaluates to 8 > > So with high bit depth builds the bit depth is a runtime option (it could > be 8, 10, or 12). For 8bpp builds the bit depth must be 8. > > HIGH_BIT_DEPTH=0 builds will probably always use the uint32 version of > this function. The HIGH_BIT_DEPTH=1 builds will have to choose between > uint32 or float at runtime. > > >> + >> + static const type ssim_c1 = (type)(.01 * .01 * pixelMax * pixelMax * >> 64 + .5); >> + static const type ssim_c2 = (type)(.03 * .03 * pixelMax * pixelMax * >> 64 * 63 + .5); >> + type vars = ss * 64 - s1 * s1 - s2 * s2; >> + type covar = s12 * 64 - s1 * s2; >> + return (float)(2 * s1 * s2 + ssim_c1) * (float)(2 * covar + ssim_c2) >> + / ((float)(s1 * s1 + s2 * s2 + ssim_c1) * (float)(vars + >> ssim_c2)); >> +} >> + >> +float ssim_end_1(float s1, float s2, float ss, float s12) >> +{ >> + static const float pixelMax = (1 << X265_DEPTH) - 1; >> + static const float ssim_c1 = (float)(.01 * .01 * pixelMax * pixelMax >> * 64); >> + static const float ssim_c2 = (float)(.03 * .03 * pixelMax * pixelMax >> * 64 * 63); >> + float vars = ss * 64 - s1 * s1 - s2 * s2; >> + float covar = s12 * 64 - s1 * s2; >> + >> + return (2 * s1 * s2 + ssim_c1) * (2 * covar + ssim_c2) >> + / ((s1 * s1 + s2 * s2 + ssim_c1) * (vars + ssim_c2)); >> +} >> } // end anonymous namespace >> >> namespace x265 { >> @@ -870,5 +949,10 @@ >> p.scale1D_128to64 = scale1D_128to64; >> p.scale2D_64to32 = scale2D_64to32; >> p.frame_init_lowres_core = frame_init_lowres_core; >> + >> + p.ssim_4x4x2_core_float = ssim_4x4x2_core<float>; >> + p.ssim_4x4x2_core_int = ssim_4x4x2_core<int>; >> + p.ssim_end4_float = ssim_end_4<float>; >> + p.ssim_end4_int = ssim_end_4<int>; >> } >> } >> diff -r bf14f75b8cf9 -r 92641f3d3195 source/common/primitives.h >> --- a/source/common/primitives.h Fri Oct 04 01:39:22 2013 -0500 >> +++ b/source/common/primitives.h Fri Oct 04 16:46:15 2013 +0530 >> @@ -235,6 +235,10 @@ >> typedef void (*scale_t)(pixel *dst, pixel *src, intptr_t stride); >> typedef void (*downscale_t)(pixel *src0, pixel *dstf, pixel *dsth, pixel >> *dstv, pixel *dstc, >> intptr_t src_stride, intptr_t dst_stride, >> int width, int height); >> +typedef void (*ssim_4x4x2_core_int_t)(const pixel *pix1, intptr_t >> stride1, const pixel *pix2, intptr_t stride2, int sums[2][4]); >> +typedef void (*ssim_4x4x2_core_float_t)(const pixel *pix1, intptr_t >> stride1, const pixel *pix2, intptr_t stride2, float sums[2][4]); >> +typedef float (*ssim_end4_int_t)(int sum0[5][4], int sum1[5][4], int >> width); >> +typedef float (*ssim_end4_float_t)(float sum0[5][4], float sum1[5][4], >> int width); >> >> /* Define a structure containing function pointers to optimized encoder >> * primitives. Each pointer can reference either an assembly routine, >> @@ -301,6 +305,13 @@ >> scale_t scale1D_128to64; >> scale_t scale2D_64to32; >> downscale_t frame_init_lowres_core; >> + >> +/* If the pixel depth >15 , use the ssim_float primitives to prevent >> overflow, else >> + * ssim_int primitves should be sufficient. */ >> + ssim_4x4x2_core_int_t ssim_4x4x2_core_int; >> + ssim_4x4x2_core_float_t ssim_4x4x2_core_float; >> + ssim_end4_int_t ssim_end4_int; >> + ssim_end4_float_t ssim_end4_float; >> }; >> >> /* This copy of the table is what gets used by the encoder. >> _______________________________________________ >> x265-devel mailing list >> [email protected] >> https://mailman.videolan.org/listinfo/x265-devel >> > > > > -- > Steve Borho > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
