On Wed, Feb 12, 2020 at 3:55 PM Srikanth Kurapati < srikanth.kurap...@multicorewareinc.com> wrote:
> > Addressed all the comments. > I would like to understand the reason for computing the edge histogram on > pixel depth. Why can't we do it on source depth? > Ans: edge pixel range is determined by x265 build bit-depth and not > source depth and all pictures are modified to match with the x265 build bit > depth and not the source depth. > I understand this. But why is the edge info computed on internal pixel depth? Do you any accuracy difference in computing edge info for internal bit depth? I see this depth conversion as a redundant step here as it is done again during picture to frame conversion phase. I would also like to understand the performance impact of this conversion. Please clarify. > > On Wed, Feb 12, 2020 at 12:24 PM Aruna Matheswaran < > ar...@multicorewareinc.com> wrote: > >> >> >> On Tue, Feb 11, 2020 at 12:01 PM <srikanth.kurap...@multicorewareinc.com> >> wrote: >> >>> # HG changeset patch >>> # User Srikanth Kurapati >>> # Date 1580113966 -19800 >>> # Mon Jan 27 14:02:46 2020 +0530 >>> # Node ID a542cabea72ac65f8e19da1bd2e5e75c871d3d42 >>> # Parent 30d303b38c7bd1733aedd01a3f738fb08ec1488c >>> Fix: Segmentation fault for hist-scenecut option in 16bpp builds. >>> >>> This patch fixes segmentation fault due to bit depth mismatch between >>> source >>> input and x265 builds encountered for hist-scenecut feature. >>> >> >> The patch not just fixes the segfault in high bit-depth encodes, but also >> fixes incorrect edge computation that was happening earlier with 10bit >> source and 8bpp builds. >> Please clarify the same in the commit message. >> >> Also, I would like to understand the reason for computing the edge >> histogram on pixel depth. Why can't we do it on source depth? >> >>> >>> diff -r 30d303b38c7b -r a542cabea72a source/common/common.h >>> --- a/source/common/common.h Wed Jan 29 12:19:07 2020 +0530 >>> +++ b/source/common/common.h Mon Jan 27 14:02:46 2020 +0530 >>> @@ -131,7 +131,6 @@ >>> typedef int64_t ssum2_t; >>> #define SHIFT_TO_BITPLANE 9 >>> #define HISTOGRAM_BINS 1024 >>> -#define SHIFT 1 >>> #else >>> typedef uint8_t pixel; >>> typedef uint16_t sum_t; >>> @@ -140,7 +139,6 @@ >>> typedef int32_t ssum2_t; // Signed sum >>> #define SHIFT_TO_BITPLANE 7 >>> #define HISTOGRAM_BINS 256 >>> -#define SHIFT 0 >>> #endif // if HIGH_BIT_DEPTH >>> >>> #if X265_DEPTH < 10 >>> diff -r 30d303b38c7b -r a542cabea72a source/encoder/encoder.cpp >>> --- a/source/encoder/encoder.cpp Wed Jan 29 12:19:07 2020 +0530 >>> +++ b/source/encoder/encoder.cpp Mon Jan 27 14:02:46 2020 +0530 >>> @@ -220,9 +220,9 @@ >>> { >>> for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; >>> i++) >>> { >>> - m_planeSizes[i] = m_param->sourceWidth * >>> m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]; >>> - } >>> - uint32_t pixelbytes = m_param->sourceBitDepth > 8 ? 2 : 1; >>> + m_planeSizes[i] = (m_param->sourceWidth >> >>> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> >>> x265_cli_csps[m_param->internalCsp].height[i]); >>> + } >>> + uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; >>> m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); >>> m_edgeHistThreshold = m_param->edgeTransitionThreshold; >>> m_chromaHistThreshold = m_edgeHistThreshold * 10.0; >>> @@ -231,6 +231,23 @@ >>> m_scaledEdgeThreshold = x265_min(m_scaledEdgeThreshold, >>> MAX_SCENECUT_THRESHOLD); >>> m_scaledChromaThreshold = m_chromaHistThreshold * >>> SCENECUT_STRENGTH_FACTOR; >>> m_scaledChromaThreshold = x265_min(m_scaledChromaThreshold, >>> MAX_SCENECUT_THRESHOLD); >>> + if (!(m_param->sourceBitDepth == 8 && m_param->internalBitDepth >>> == 8)) >>> >> >> This extra memory allocation for plane buffers is required only if the >> source picture depth and build depth differs. Why does this check compare >> them to 8-bit always? >> This would allocate memory unnecessarily for 10bpp builds with 10bit >> source. >> >>> + { >>> + int size = m_param->sourceWidth * m_param->sourceHeight; >>> + int hshift = CHROMA_H_SHIFT(m_param->internalCsp); >>> + int vshift = CHROMA_V_SHIFT(m_param->internalCsp); >>> + int widthC = m_param->sourceWidth >> hshift; >>> + int heightC = m_param->sourceHeight >> vshift; >>> + >>> + m_inputPic[0] = X265_MALLOC(pixel, size); >>> + if (m_param->internalCsp != X265_CSP_I400) >>> + { >>> + for (int j = 1; j < 3; j++) >>> + { >>> + m_inputPic[j] = X265_MALLOC(pixel, widthC * >>> heightC); >>> + } >>> + } >>> + } >>> } >>> >>> // Do not allow WPP if only one row or fewer than 3 columns, it is >>> pointless and unstable >>> @@ -874,6 +891,21 @@ >>> { >>> X265_FREE_ZERO(m_edgePic); >>> } >>> + >>> + if (!(m_param->sourceBitDepth == 8 && m_param->internalBitDepth >>> == 8)) >>> + { >>> >> Same here. >> >>> + for (int i = 0; i < 3; i++) >>> + { >>> + if (i == 0) >>> + { >>> + X265_FREE_ZERO(m_inputPic[i]); >>> + } >>> + else if (i >= 1 && m_param->internalCsp != >>> X265_CSP_I400) >>> + { >>> >> This condition needs to be optimized. "i >= 1" is unwanted. >> >>> + X265_FREE_ZERO(m_inputPic[i]); >>> + } >>> + } >>> + } >>> } >>> >>> for (int i = 0; i < m_param->frameNumThreads; i++) >>> @@ -1337,11 +1369,87 @@ >>> >>> bool Encoder::computeHistograms(x265_picture *pic) >>> { >>> - pixel *src = (pixel *) pic->planes[0]; >>> + pixel *src = NULL, *planeV = NULL, *planeU = NULL; >>> + uint32_t widthC, heightC; >>> + int hshift, vshift; >>> + >>> + hshift = CHROMA_H_SHIFT(pic->colorSpace); >>> + vshift = CHROMA_V_SHIFT(pic->colorSpace); >>> + widthC = pic->width >> hshift; >>> + heightC = pic->height >> vshift; >>> + >>> + if (pic->bitDepth == 8 && X265_DEPTH == 8) >>> + { >>> + src = (pixel*)pic->planes[0]; >>> + if (m_param->internalCsp != X265_CSP_I400) >>> + { >>> + planeU = (pixel*)pic->planes[1]; >>> + planeV = (pixel*)pic->planes[2]; >>> + } >>> + } >>> + else if (pic->bitDepth == 8 && X265_DEPTH > 8) >>> + { >>> + int shift = (X265_DEPTH - 8); >>> + uint8_t *yChar, *uChar, *vChar; >>> + >>> + yChar = (uint8_t*)pic->planes[0]; >>> + primitives.planecopy_cp(yChar, pic->stride[0] / sizeof(*yChar), >>> m_inputPic[0], pic->stride[0] / sizeof(*yChar), pic->width, pic->height, >>> shift); >>> + >>> + if (m_param->internalCsp != X265_CSP_I400) >>> + { >>> + uChar = (uint8_t*)pic->planes[1]; >>> + vChar = (uint8_t*)pic->planes[2]; >>> + primitives.planecopy_cp(uChar, pic->stride[1] / >>> sizeof(*uChar), m_inputPic[1], pic->stride[1] / sizeof(*uChar), widthC, >>> heightC, shift); >>> + primitives.planecopy_cp(vChar, pic->stride[2] / >>> sizeof(*vChar), m_inputPic[2], pic->stride[2] / sizeof(*vChar), widthC, >>> heightC, shift); >>> + } >>> + } >>> + else >>> + { >>> + uint16_t *yShort, *uShort, *vShort; >>> + /* mask off bits that are supposed to be zero */ >>> + uint16_t mask = (1 << X265_DEPTH) - 1; >>> + int shift = abs(pic->bitDepth - X265_DEPTH); >>> >> Again, this shifting is unnecessary for 10bit builds + 10bit source. >> >>> + >>> + yShort = (uint16_t*)pic->planes[0]; >>> + if (pic->bitDepth > X265_DEPTH) >>> + { >>> + /* shift right and mask pixels to final size */ >>> + primitives.planecopy_sp(yShort, pic->stride[0] / >>> sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), >>> pic->width, pic->height, shift, mask); >>> + } >>> + else /* Case for (pic.bitDepth <= X265_DEPTH) */ >>> + { >>> + /* shift left and mask pixels to final size */ >>> + primitives.planecopy_sp_shl(yShort, pic->stride[0] / >>> sizeof(*yShort), m_inputPic[0], pic->stride[0] / sizeof(*yShort), >>> pic->width, pic->height, shift, mask); >>> + } >>> + >>> + if (m_param->internalCsp != X265_CSP_I400) >>> + { >>> + uShort = (uint16_t*)pic->planes[1]; >>> + vShort = (uint16_t*)pic->planes[2]; >>> + >>> + if (pic->bitDepth > X265_DEPTH) >>> + { >>> + primitives.planecopy_sp(uShort, pic->stride[1] / >>> sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, >>> heightC, shift, mask); >>> + primitives.planecopy_sp(vShort, pic->stride[2] / >>> sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, >>> heightC, shift, mask); >>> + } >>> + else /* Case for (pic.bitDepth <= X265_DEPTH) */ >>> + { >>> + primitives.planecopy_sp_shl(uShort, pic->stride[1] / >>> sizeof(*uShort), m_inputPic[1], pic->stride[1] / sizeof(*uShort), widthC, >>> heightC, shift, mask); >>> + primitives.planecopy_sp_shl(vShort, pic->stride[2] / >>> sizeof(*vShort), m_inputPic[2], pic->stride[2] / sizeof(*vShort), widthC, >>> heightC, shift, mask); >>> + } >>> + } >>> + } >>> + >>> + if (!(pic->bitDepth == 8 && X265_DEPTH == 8)) >>> + { >>> + src = m_inputPic[0]; >>> + planeU = m_inputPic[1]; >>> + planeV = m_inputPic[2]; >>> >> Redundant code. >> >>> + } >>> + >>> size_t bufSize = sizeof(pixel) * m_planeSizes[0]; >>> int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; >>> - int32_t numBytes = m_param->sourceBitDepth > 8 ? 2 : 1; >>> - memset(m_edgePic, 0, bufSize * numBytes); >>> + memset(m_edgePic, 0, bufSize); >>> >>> if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, >>> pic->width, false, 1)) >>> { >>> @@ -1350,10 +1458,9 @@ >>> } >>> >>> pixel pixelVal; >>> - int64_t size = pic->height * (pic->stride[0] >> SHIFT); >>> int32_t *edgeHist = m_curEdgeHist; >>> memset(edgeHist, 0, 2 * sizeof(int32_t)); >>> - for (int64_t i = 0; i < size; i++) >>> + for (int64_t i = 0; i < m_planeSizes[0]; i++) >>> { >>> if (!m_edgePic[i]) >>> edgeHist[0]++; >>> @@ -1364,16 +1471,12 @@ >>> if (pic->colorSpace != X265_CSP_I400) >>> { >>> /* U Histogram Calculation */ >>> - int32_t HeightL = (pic->height >> >>> x265_cli_csps[pic->colorSpace].height[1]); >>> - size = HeightL * (pic->stride[1] >> SHIFT); >>> int32_t *uHist = m_curUVHist[0]; >>> - pixel *chromaPlane = (pixel *) pic->planes[1]; >>> - >>> memset(uHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >>> >>> - for (int64_t i = 0; i < size; i++) >>> - { >>> - pixelVal = chromaPlane[i]; >>> + for (int64_t i = 0; i < m_planeSizes[1]; i++) >>> + { >>> + pixelVal = planeU[i]; >>> uHist[pixelVal]++; >>> } >>> >>> @@ -1381,15 +1484,12 @@ >>> if (planeCount == 3) >>> { >>> pixelVal = 0; >>> - int32_t heightV = (pic->height >> >>> x265_cli_csps[pic->colorSpace].height[2]); >>> - size = heightV * (pic->stride[2] >> SHIFT); >>> int32_t *vHist = m_curUVHist[1]; >>> - chromaPlane = (pixel *) pic->planes[2]; >>> - >>> memset(vHist, 0, HISTOGRAM_BINS * sizeof(int32_t)); >>> - for (int64_t i = 0; i < size; i++) >>> + >>> + for (int64_t i = 0; i < m_planeSizes[2]; i++) >>> { >>> - pixelVal = chromaPlane[i]; >>> + pixelVal = planeV[i]; >>> vHist[pixelVal]++; >>> } >>> for (int i = 0; i < HISTOGRAM_BINS; i++) >>> diff -r 30d303b38c7b -r a542cabea72a source/encoder/encoder.h >>> --- a/source/encoder/encoder.h Wed Jan 29 12:19:07 2020 +0530 >>> +++ b/source/encoder/encoder.h Mon Jan 27 14:02:46 2020 +0530 >>> @@ -255,6 +255,7 @@ >>> >>> /* For histogram based scene-cut detection */ >>> pixel* m_edgePic; >>> + pixel* m_inputPic[3]; >>> int32_t m_curUVHist[2][HISTOGRAM_BINS]; >>> int32_t m_curMaxUVHist[HISTOGRAM_BINS]; >>> int32_t m_prevMaxUVHist[HISTOGRAM_BINS]; >>> _______________________________________________ >>> x265-devel mailing list >>> x265-devel@videolan.org >>> https://mailman.videolan.org/listinfo/x265-devel >>> >> >> >> -- >> Regards, >> *Aruna Matheswaran,* >> Video Codec Engineer, >> Media & AI analytics BU, >> >> >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > -- > *With Regards,* > *Srikanth Kurapati.* > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, *Aruna Matheswaran,* Video Codec Engineer, Media & AI analytics BU,
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel