On Nov 7, 2013 11:24 PM, "Steve Borho" <st...@borho.org> wrote: > > > > > On Thu, Nov 7, 2013 at 5:17 AM, Aarthi Thirumalai < aar...@multicorewareinc.com> wrote: >> >> # HG changeset patch >> # User Aarthi Thirumalai >> # Date 1383823017 -19800 >> # Thu Nov 07 16:46:57 2013 +0530 >> # Node ID 0823d243b4e2f9eee6323a9750eeb811bccbad77 >> # Parent 85002898f5b4308547af6ce464bbdff5f360fa13 >> aq: bug fix.extend border of TComPic::m_origPicYuv to a multiple of 16 >> >> diff -r 85002898f5b4 -r 0823d243b4e2 source/Lib/TLibCommon/TComPicYuv.cpp >> --- a/source/Lib/TLibCommon/TComPicYuv.cpp Thu Nov 07 14:31:05 2013 +0530 >> +++ b/source/Lib/TLibCommon/TComPicYuv.cpp Thu Nov 07 16:46:57 2013 +0530 >> @@ -345,6 +345,10 @@ >> /* width and height - without padsize */ >> int width = m_picWidth - padx; >> int height = m_picHeight - pady; >> + uint8_t rem = m_picWidth % 16; >> + padx += rem ? 16 - rem : 0; >> + rem = m_picHeight % 16; >> + pady += rem ? 16 - rem : 0; > > > if the row width was 17, padx would already be 3 in order to pad out to the nearest multiple of 4. You would be doing: padx += 15, which would result in a pad of 18, which is more than what you bargained for. I believe the += should just be =. Also, Gopu just added a new else {} clause this is missed here, so I've hoisted this operation higher in the file. > >>when row width is 17, padx is already 3 and m_picWidth would be 20 at this stage. So padx += 12 not 15.. >> >> // Manually copy pixels to up-size them >> for (int r = 0; r < height; r++) >> @@ -475,10 +479,12 @@ >> uint8_t *v = (uint8_t*)pic.planes[2]; >> >> /* width and height - without padsize */ >> - int width = (m_picWidth * (pic.bitDepth > 8 ? 2 : 1)) - padx; >> - int height = m_picHeight - pady; >> - >> - // copy pixels by row into encoder's buffer >> + int width = (m_picWidth * (pic.bitDepth > 8 ? 2 : 1)) - padx; >> + int height = m_picHeight - pady; >> + uint8_t rem = m_picWidth % 16; >> + padx += rem ? 16 - rem : 0; >> + rem = m_picHeight % 16; >> + pady += rem ? 16 - rem : 0; > > > still white-space alignment problems here, but this code has been removed > >> >> for (int r = 0; r < height; r++) >> { >> memcpy(Y, y, width); >> diff -r 85002898f5b4 -r 0823d243b4e2 source/common/lowres.cpp >> --- a/source/common/lowres.cpp Thu Nov 07 14:31:05 2013 +0530 >> +++ b/source/common/lowres.cpp Thu Nov 07 16:46:57 2013 +0530 >> @@ -150,31 +150,6 @@ >> intraMbs[i] = 0; >> } >> >> - int y, extWidth = (orig->getWidth() + X265_LOWRES_CU_SIZE - 1); >> - int srcStride = orig->getStride(); >> - int srcHeight = orig->getHeight(); >> - int srcWidth = orig->getWidth(); >> - Pel *src; >> - src = orig->getLumaAddr(); >> - >> - /* extending right margin */ >> - if (2 * width > srcWidth) >> - { >> - for (y = 0; y < srcHeight; y++) >> - { >> - ::memset(src + srcWidth, src[srcWidth - 1], sizeof(Pel) * (X265_LOWRES_CU_SIZE - 1)); >> - src += srcStride; >> - } >> - } >> - >> - /* extending bottom margin */ >> - src = orig->getLumaAddr() + (srcHeight - 1) * srcStride; >> - >> - for (y = 1; y <= 2 * lines - srcHeight; y++) >> - { >> - ::memcpy(src + y * srcStride, src, sizeof(Pel) * (extWidth)); >> - } >> - >> /* downscale and generate 4 HPEL planes for lookahead */ >> primitives.frame_init_lowres_core(orig->getLumaAddr(), >> lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3], >> diff -r 85002898f5b4 -r 0823d243b4e2 source/encoder/frameencoder.cpp >> --- a/source/encoder/frameencoder.cpp Thu Nov 07 14:31:05 2013 +0530 >> +++ b/source/encoder/frameencoder.cpp Thu Nov 07 16:46:57 2013 +0530 >> @@ -1149,9 +1149,8 @@ >> { >> /* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in the cu. */ >> double qp_offset = 0; >> - int blockSize = g_maxCUWidth >> 2; >> - int maxBlockCols = (pic->getPicYuvOrg()->getWidth() + (blockSize - 1)) / blockSize; >> - int maxBlockRows = (pic->getPicYuvOrg()->getHeight() + (blockSize - 1)) / blockSize; >> + int maxBlockCols = (pic->getPicYuvOrg()->getWidth() + (16 - 1)) / 16; >> + int maxBlockRows = (pic->getPicYuvOrg()->getHeight() + (16 - 1)) / 16; > > > much better > >> >> int block_y = (cuAddr / pic->getPicSym()->getFrameWidthInCU()) * 4; >> int block_x = (cuAddr * 4) - block_y * pic->getPicSym()->getFrameWidthInCU(); >> int cnt = 0; >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel > > > > > -- > Steve Borho > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel