Ashok is already working on pre-calculating these inside-picture flags along with more refactors. After his refactors are in, we can check whether padding will improve performance.
In fact, very likely he already has a local version of the logic in this patch. On Mon, Aug 25, 2014 at 10:46 PM, Steve Borho <st...@borho.org> wrote: > On 08/25, Satoshi Nakagawa wrote: > > # HG changeset patch > > # User Satoshi Nakagawa <nakagawa...@oki.com> > > # Date 1408956792 -32400 > > # Mon Aug 25 17:53:12 2014 +0900 > > # Node ID 7145e57c722a94a06faec33e3041442032a1892f > > # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 > > replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] > > Queued for default, thanks. > > There seems to be a lot of logic that checks for 'inside picture > bounds'. It seems like we could save a lot of CPU cycles if we padded > input pictures to the max-ctu size instead of the min-ctu size and > adjusted the conformance window accordingly. > > > diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp > > --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Aug 22 15:53:34 2014 > -0500 > > +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 25 17:53:12 2014 > +0900 > > @@ -816,12 +816,12 @@ > > > > TComDataCU* TComDataCU::getPUAboveRight(uint32_t& arPartUnitIdx, > uint32_t curPartUnitIdx) > > { > > + if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= > m_slice->m_sps->picWidthInLumaSamples) > > + return NULL; > > + > > uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; > > uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > > > - if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) >= > m_slice->m_sps->picWidthInLumaSamples) > > - return NULL; > > - > > if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, > numPartInCUSize)) > > { > > if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) > > @@ -857,14 +857,11 @@ > > > > TComDataCU* TComDataCU::getPUBelowLeft(uint32_t& blPartUnitIdx, > uint32_t curPartUnitIdx) > > { > > - uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; > > + if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= > m_slice->m_sps->picHeightInLumaSamples) > > + return NULL; > > > > - if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) >= > m_slice->m_sps->picHeightInLumaSamples) > > - { > > - return NULL; > > - } > > - > > - uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > + uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; > > + uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > > > if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, > numPartInCUSize)) > > { > > @@ -895,15 +892,14 @@ > > > > TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, > uint32_t curPartUnitIdx, uint32_t partUnitOffset) > > { > > - uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; > > - > > - if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_rasterToPelY[absPartIdxLB] + (partUnitOffset << LOG2_UNIT_SIZE)) >= > > + if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= > > m_slice->m_sps->picHeightInLumaSamples) > > { > > return NULL; > > } > > > > - uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > + uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; > > + uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > > > if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - > partUnitOffset, numPartInCUSize)) > > { > > @@ -938,14 +934,13 @@ > > > > TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t& arPartUnitIdx, > uint32_t curPartUnitIdx, uint32_t partUnitOffset) > > { > > - uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; > > - > > - if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_rasterToPelX[absPartIdxRT] + (partUnitOffset << LOG2_UNIT_SIZE)) >= > > + if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= > > m_slice->m_sps->picWidthInLumaSamples) > > { > > return NULL; > > } > > > > + uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; > > uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > > > if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - > partUnitOffset, numPartInCUSize)) > > @@ -954,7 +949,7 @@ > > { > > if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - > numPartInCUSize + partUnitOffset]) > > { > > - uint32_t absZorderCUIdx = > g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) > - 1; > > + uint32_t absZorderCUIdx = > g_zscanToRaster[m_absIdxInLCU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) > - 1; > > arPartUnitIdx = g_rasterToZscan[absPartIdxRT - > numPartInCUSize + partUnitOffset]; > > if (RasterAddress::isEqualRowOrCol(absPartIdxRT, > absZorderCUIdx, numPartInCUSize)) > > { > > @@ -1817,48 +1812,42 @@ > > } > > // TMVP always enabled > > { > > - //>> MTK colocated-RightBottom > > + MV colmv; > > uint32_t partIdxRB; > > > > deriveRightBottomIdx(puIdx, partIdxRB); > > > > - uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB]; > > - uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > - > > - MV colmv; > > - int refIdx; > > int lcuIdx = -1; > > > > - if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) >= > m_slice->m_sps->picWidthInLumaSamples) // image boundary check > > + // image boundary check > > + if (m_pic->getCU(m_cuAddr)->getCUPelX() + > g_zscanToPelX[partIdxRB] + UNIT_SIZE < > m_slice->m_sps->picWidthInLumaSamples && > > + m_pic->getCU(m_cuAddr)->getCUPelY() + > g_zscanToPelY[partIdxRB] + UNIT_SIZE < > m_slice->m_sps->picHeightInLumaSamples) > > { > > - } > > - else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_rasterToPelY[absPartIdxTmp] + UNIT_SIZE) >= > m_slice->m_sps->picHeightInLumaSamples) > > - { > > - } > > - else > > - { > > - if ((absPartIdxTmp % numPartInCUSize < numPartInCUSize - 1) > && // is not at the last column of LCU > > - (absPartIdxTmp / numPartInCUSize < numPartInCUSize - > 1)) // is not at the last row of LCU > > + uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; > > + uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > + bool bNotLastCol = RasterAddress::lessThanCol(absPartIdxRB, > numPartInCUSize - 1, numPartInCUSize); // is not at the last column of LCU > > + bool bNotLastRow = RasterAddress::lessThanRow(absPartIdxRB, > numPartInCUSize - 1, numPartInCUSize); // is not at the last row of LCU > > + > > + if (bNotLastCol && bNotLastRow) > > { > > - absPartAddr = g_rasterToZscan[absPartIdxTmp + > numPartInCUSize + 1]; > > + absPartAddr = g_rasterToZscan[absPartIdxRB + > numPartInCUSize + 1]; > > lcuIdx = getAddr(); > > } > > - else if (absPartIdxTmp % numPartInCUSize < numPartInCUSize > - 1) // is not at the last column of LCU But is last row of LCU > > - absPartAddr = g_rasterToZscan[(absPartIdxTmp + > numPartInCUSize + 1) % m_pic->getNumPartInCU()]; > > - else if (absPartIdxTmp / numPartInCUSize < numPartInCUSize > - 1) // is not at the last row of LCU But is last column of LCU > > + else if (bNotLastCol) > > + absPartAddr = g_rasterToZscan[(absPartIdxRB + > numPartInCUSize + 1) & (numPartInCUSize - 1)]; > > + else if (bNotLastRow) > > { > > - absPartAddr = g_rasterToZscan[absPartIdxTmp + 1]; > > + absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; > > lcuIdx = getAddr() + 1; > > } > > - else //is the right bottom corner of LCU > > + else // is the right bottom corner of LCU > > absPartAddr = 0; > > } > > > > - refIdx = 0; > > + int refIdx = 0; > > uint32_t partIdxCenter; > > uint32_t curLCUIdx = getAddr(); > > int dir = 0; > > - uint32_t arrayAddr = count; > > xDeriveCenterIdx(puIdx, partIdxCenter); > > bool bExistMV = lcuIdx >= 0 && xGetColMVP(REF_PIC_LIST_0, > lcuIdx, absPartAddr, colmv, refIdx); > > if (!bExistMV) > > @@ -1866,7 +1855,7 @@ > > if (bExistMV) > > { > > dir |= 1; > > - mvFieldNeighbours[arrayAddr][0].setMvField(colmv, refIdx); > > + mvFieldNeighbours[count][0].setMvField(colmv, refIdx); > > } > > > > if (isInterB) > > @@ -1878,13 +1867,13 @@ > > if (bExistMV) > > { > > dir |= 2; > > - mvFieldNeighbours[arrayAddr][1].setMvField(colmv, > refIdx); > > + mvFieldNeighbours[count][1].setMvField(colmv, refIdx); > > } > > } > > > > if (dir != 0) > > { > > - interDirNeighbours[arrayAddr] = dir; > > + interDirNeighbours[count] = dir; > > > > count++; > > > > @@ -1893,8 +1882,6 @@ > > } > > } > > > > - uint32_t arrayAddr = count; > > - > > if (isInterB) > > { > > const uint32_t cutoff = count * (count - 1); > > @@ -1917,13 +1904,13 @@ > > int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; > > if (!(refPOCL0 == refPOCL1 && > mvFieldNeighbours[i][0].mv == mvFieldNeighbours[j][1].mv)) > > { > > - > mvFieldNeighbours[arrayAddr][0].setMvField(mvFieldNeighbours[i][0].mv, > refIdxL0); > > - > mvFieldNeighbours[arrayAddr][1].setMvField(mvFieldNeighbours[j][1].mv, > refIdxL1); > > - interDirNeighbours[arrayAddr] = 3; > > + > mvFieldNeighbours[count][0].setMvField(mvFieldNeighbours[i][0].mv, > refIdxL0); > > + > mvFieldNeighbours[count][1].setMvField(mvFieldNeighbours[j][1].mv, > refIdxL1); > > + interDirNeighbours[count] = 3; > > > > - arrayAddr++; > > + count++; > > > > - if (arrayAddr == maxNumMergeCand) > > + if (count == maxNumMergeCand) > > return; > > } > > } > > @@ -1932,18 +1919,18 @@ > > int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], > m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0]; > > int r = 0; > > int refcnt = 0; > > - while (arrayAddr < maxNumMergeCand) > > + while (count < maxNumMergeCand) > > { > > - interDirNeighbours[arrayAddr] = 1; > > - mvFieldNeighbours[arrayAddr][0].setMvField(MV(0, 0), r); > > + interDirNeighbours[count] = 1; > > + mvFieldNeighbours[count][0].setMvField(MV(0, 0), r); > > > > if (isInterB) > > { > > - interDirNeighbours[arrayAddr] = 3; > > - mvFieldNeighbours[arrayAddr][1].setMvField(MV(0, 0), r); > > + interDirNeighbours[count] = 3; > > + mvFieldNeighbours[count][1].setMvField(MV(0, 0), r); > > } > > > > - arrayAddr++; > > + count++; > > > > if (refcnt == numRefIdx - 1) > > r = 0; > > @@ -2078,45 +2065,40 @@ > > > > // TMVP always enabled > > { > > - // Get Temporal Motion Predictor > > - int refIdxCol = refIdx; > > - MV colmv; > > + uint32_t absPartAddr = m_absIdxInLCU + partAddr; > > + MV colmv; > > uint32_t partIdxRB; > > - uint32_t absPartIdx; > > - uint32_t absPartAddr; > > > > deriveRightBottomIdx(partIdx, partIdxRB); > > - absPartAddr = m_absIdxInLCU + partAddr; > > > > //---- co-located RightBottom Temporal Predictor (H) ---// > > - absPartIdx = g_zscanToRaster[partIdxRB]; > > int lcuIdx = -1; > > - if ((m_pic->getCU(m_cuAddr)->getCUPelX() + > g_rasterToPelX[absPartIdx] + UNIT_SIZE) >= > m_slice->m_sps->picWidthInLumaSamples) // image boundary check > > + > > + // image boundary check > > + if (m_pic->getCU(m_cuAddr)->getCUPelX() + > g_zscanToPelX[partIdxRB] + UNIT_SIZE < > m_slice->m_sps->picWidthInLumaSamples && > > + m_pic->getCU(m_cuAddr)->getCUPelY() + > g_zscanToPelY[partIdxRB] + UNIT_SIZE < > m_slice->m_sps->picHeightInLumaSamples) > > { > > - } > > - else if ((m_pic->getCU(m_cuAddr)->getCUPelY() + > g_rasterToPelY[absPartIdx] + UNIT_SIZE) >= > m_slice->m_sps->picHeightInLumaSamples) > > - { > > - } > > - else > > - { > > + uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; > > uint32_t numPartInCUSize = m_pic->getNumPartInCUSize(); > > - if ((absPartIdx % numPartInCUSize < numPartInCUSize - 1) && > // is not at the last column of LCU > > - (absPartIdx / numPartInCUSize < numPartInCUSize - 1)) > // is not at the last row of LCU > > + bool bNotLastCol = RasterAddress::lessThanCol(absPartIdxRB, > numPartInCUSize - 1, numPartInCUSize); // is not at the last column of LCU > > + bool bNotLastRow = RasterAddress::lessThanRow(absPartIdxRB, > numPartInCUSize - 1, numPartInCUSize); // is not at the last row of LCU > > + > > + if (bNotLastCol && bNotLastRow) > > { > > - absPartAddr = g_rasterToZscan[absPartIdx + > numPartInCUSize + 1]; > > + absPartAddr = g_rasterToZscan[absPartIdxRB + > numPartInCUSize + 1]; > > lcuIdx = getAddr(); > > } > > - else if (absPartIdx % numPartInCUSize < numPartInCUSize - > 1) // is not at the last column of LCU But is last row of LCU > > - absPartAddr = g_rasterToZscan[(absPartIdx + > numPartInCUSize + 1) % m_pic->getNumPartInCU()]; > > - else if (absPartIdx / numPartInCUSize < numPartInCUSize - > 1) // is not at the last row of LCU But is last column of LCU > > + else if (bNotLastCol) > > + absPartAddr = g_rasterToZscan[(absPartIdxRB + > numPartInCUSize + 1) & (numPartInCUSize - 1)]; > > + else if (bNotLastRow) > > { > > - absPartAddr = g_rasterToZscan[absPartIdx + 1]; > > + absPartAddr = g_rasterToZscan[absPartIdxRB + 1]; > > lcuIdx = getAddr() + 1; > > } > > else // is the right bottom corner of LCU > > absPartAddr = 0; > > } > > - if (lcuIdx >= 0 && xGetColMVP(picList, lcuIdx, absPartAddr, > colmv, refIdxCol)) > > + if (lcuIdx >= 0 && xGetColMVP(picList, lcuIdx, absPartAddr, > colmv, refIdx)) > > { > > amvpCand[num++] = colmv; > > mvc[numMvc++] = colmv; > > @@ -2126,7 +2108,7 @@ > > uint32_t partIdxCenter; > > uint32_t curLCUIdx = getAddr(); > > xDeriveCenterIdx(partIdx, partIdxCenter); > > - if (xGetColMVP(picList, curLCUIdx, partIdxCenter, colmv, > refIdxCol)) > > + if (xGetColMVP(picList, curLCUIdx, partIdxCenter, colmv, > refIdx)) > > { > > amvpCand[num++] = colmv; > > mvc[numMvc++] = colmv; > > diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComRom.cpp > > --- a/source/Lib/TLibCommon/TComRom.cpp Fri Aug 22 15:53:34 2014 > -0500 > > +++ b/source/Lib/TLibCommon/TComRom.cpp Mon Aug 25 17:53:12 2014 > +0900 > > @@ -117,8 +117,46 @@ > > uint32_t g_maxCUDepth = NUM_CU_DEPTH - 1; > > uint32_t g_zscanToRaster[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; > > uint32_t g_rasterToZscan[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; > > -uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; > > -uint32_t g_rasterToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = { 0, }; > > + > > +const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = > > +{ > > + 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12, > > + 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28, > > + 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12, > > + 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28, > > + 32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44, > > + 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60, > > + 32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44, > > + 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60, > > + 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12, > > + 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28, > > + 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12, > > + 16, 20, 16, 20, 24, 28, 24, 28, 16, 20, 16, 20, 24, 28, 24, 28, > > + 32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44, > > + 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60, > > + 32, 36, 32, 36, 40, 44, 40, 44, 32, 36, 32, 36, 40, 44, 40, 44, > > + 48, 52, 48, 52, 56, 60, 56, 60, 48, 52, 48, 52, 56, 60, 56, 60 > > +}; > > + > > +const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W] = > > +{ > > + 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, > > + 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, > > + 16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28, > > + 16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28, > > + 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, > > + 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12, > > + 16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28, > > + 16, 16, 20, 20, 16, 16, 20, 20, 24, 24, 28, 28, 24, 24, 28, 28, > > + 32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44, > > + 32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44, > > + 48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60, > > + 48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60, > > + 32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44, > > + 32, 32, 36, 36, 32, 32, 36, 36, 40, 40, 44, 44, 40, 40, 44, 44, > > + 48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60, > > + 48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60 > > +}; > > > > const uint32_t g_puOffset[8] = { 0, 8, 4, 4, 2, 10, 1, 5 }; > > > > @@ -151,36 +189,6 @@ > > } > > } > > > > -void initRasterToPelXY(uint32_t maxFullDepth) > > -{ > > - uint32_t i; > > - > > - uint32_t* tempX = &g_rasterToPelX[0]; > > - uint32_t* tempY = &g_rasterToPelY[0]; > > - > > - uint32_t numPartInCUSize = 1 << maxFullDepth; > > - uint32_t numPartitions = 1 << maxFullDepth * 2; > > - > > - tempX[0] = 0; > > - tempX++; > > - for (i = 1; i < numPartInCUSize; i++) > > - { > > - tempX[0] = tempX[-1] + UNIT_SIZE; > > - tempX++; > > - } > > - > > - for (i = 1; i < numPartInCUSize; i++) > > - { > > - memcpy(tempX, tempX - numPartInCUSize, sizeof(uint32_t) * > numPartInCUSize); > > - tempX += numPartInCUSize; > > - } > > - > > - for (i = 1; i < numPartitions; i++) > > - { > > - tempY[i] = (i >> maxFullDepth) * UNIT_SIZE; > > - } > > -} > > - > > const int16_t g_lumaFilter[4][NTAPS_LUMA] = > > { > > { 0, 0, 0, 64, 0, 0, 0, 0 }, > > diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComRom.h > > --- a/source/Lib/TLibCommon/TComRom.h Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/Lib/TLibCommon/TComRom.h Mon Aug 25 17:53:12 2014 +0900 > > @@ -82,10 +82,8 @@ > > void initRasterToZscan(uint32_t maxFullDepth); > > > > // conversion of partition index to picture pel position > > -extern uint32_t g_rasterToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; > > -extern uint32_t g_rasterToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; > > - > > -void initRasterToPelXY(uint32_t maxFullDepth); > > +extern const uint8_t g_zscanToPelX[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; > > +extern const uint8_t g_zscanToPelY[MAX_NUM_SPU_W * MAX_NUM_SPU_W]; > > > > // global variable (LCU width/height, max. CU depth) > > extern uint32_t g_maxLog2CUSize; > > diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComYuv.h > > --- a/source/Lib/TLibCommon/TComYuv.h Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/Lib/TLibCommon/TComYuv.h Mon Aug 25 17:53:12 2014 +0900 > > @@ -80,18 +80,18 @@ > > int m_vChromaShift; > > int m_csp; > > > > - int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width) > > + int getChromaAddrOffset(uint32_t idx, uint32_t width) > > { > > - int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]] >> > m_hChromaShift; > > - int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]] >> > m_vChromaShift; > > + int blkX = g_zscanToPelX[idx] >> m_hChromaShift; > > + int blkY = g_zscanToPelY[idx] >> m_vChromaShift; > > > > return blkX + blkY * width; > > } > > > > - static int getAddrOffset(uint32_t partUnitIdx, uint32_t width) > > + static int getAddrOffset(uint32_t idx, uint32_t width) > > { > > - int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]]; > > - int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]]; > > + int blkX = g_zscanToPelX[idx]; > > + int blkY = g_zscanToPelY[idx]; > > > > return blkX + blkY * width; > > } > > diff -r 6e6756f94b27 -r 7145e57c722a source/common/deblock.cpp > > --- a/source/common/deblock.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/common/deblock.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -49,15 +49,15 @@ > > > > Frame* pic = cu->m_pic; > > uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1); > > - uint32_t qNumParts = curNumParts >> 2; > > > > if (cu->getDepth(absZOrderIdx) > depth) > > { > > + uint32_t qNumParts = curNumParts >> 2; > > + uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - > cu->getCUPelX(); > > + uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - > cu->getCUPelY(); > > for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx > += qNumParts) > > { > > - uint32_t lpelx = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absZOrderIdx]]; > > - uint32_t tpely = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absZOrderIdx]]; > > - if ((lpelx < cu->m_slice->m_sps->picWidthInLumaSamples) && > (tpely < cu->m_slice->m_sps->picHeightInLumaSamples)) > > + if (g_zscanToPelX[absZOrderIdx] < xmax && > g_zscanToPelY[absZOrderIdx] < ymax) > > deblockCU(cu, absZOrderIdx, depth + 1, dir, edgeFilter, > blockingStrength); > > } > > return; > > @@ -184,8 +184,8 @@ > > > > void Deblock::setLoopfilterParam(TComDataCU* cu, uint32_t absZOrderIdx, > Param *params) > > { > > - uint32_t x = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absZOrderIdx]]; > > - uint32_t y = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absZOrderIdx]]; > > + uint32_t x = cu->getCUPelX() + g_zscanToPelX[absZOrderIdx]; > > + uint32_t y = cu->getCUPelY() + g_zscanToPelY[absZOrderIdx]; > > > > TComDataCU* tempCU; > > uint32_t tempPartIdx; > > diff -r 6e6756f94b27 -r 7145e57c722a source/common/param.cpp > > --- a/source/common/param.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/common/param.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -1071,9 +1071,6 @@ > > uint32_t* tmp = &g_zscanToRaster[0]; > > initZscanToRaster(g_maxFullDepth, 1, 0, tmp); > > initRasterToZscan(g_maxFullDepth); > > - > > - // initialize conversion matrix from partition index to pel > > - initRasterToPelXY(g_maxFullDepth); > > } > > return 0; > > } > > diff -r 6e6756f94b27 -r 7145e57c722a source/common/shortyuv.h > > --- a/source/common/shortyuv.h Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/common/shortyuv.h Mon Aug 25 17:53:12 2014 +0900 > > @@ -51,18 +51,18 @@ > > ShortYuv(); > > ~ShortYuv(); > > > > - int getChromaAddrOffset(uint32_t partUnitIdx, uint32_t width) > > + int getChromaAddrOffset(uint32_t idx, uint32_t width) > > { > > - int blkX = g_rasterToPelX[g_zscanToRaster[partUnitIdx]] >> > m_hChromaShift; > > - int blkY = g_rasterToPelY[g_zscanToRaster[partUnitIdx]] >> > m_vChromaShift; > > + int blkX = g_zscanToPelX[idx] >> m_hChromaShift; > > + int blkY = g_zscanToPelY[idx] >> m_vChromaShift; > > > > return blkX + blkY * width; > > } > > > > static int getAddrOffset(uint32_t idx, uint32_t width) > > { > > - int blkX = g_rasterToPelX[g_zscanToRaster[idx]]; > > - int blkY = g_rasterToPelY[g_zscanToRaster[idx]]; > > + int blkX = g_zscanToPelX[idx]; > > + int blkY = g_zscanToPelY[idx]; > > > > return blkX + blkY * width; > > } > > diff -r 6e6756f94b27 -r 7145e57c722a source/common/slice.cpp > > --- a/source/common/slice.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/common/slice.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -178,3 +178,26 @@ > > bUsed[k] = used; > > } > > } > > + > > +uint32_t Slice::realEndAddress(uint32_t endCUAddr) > > +{ > > + // Calculate end address > > + uint32_t internalAddress = (endCUAddr - 1) % > m_pic->getNumPartInCU(); > > + uint32_t externalAddress = (endCUAddr - 1) / > m_pic->getNumPartInCU(); > > + uint32_t xmax = m_sps->picWidthInLumaSamples - (externalAddress % > m_pic->getFrameWidthInCU()) * g_maxCUSize; > > + uint32_t ymax = m_sps->picHeightInLumaSamples - (externalAddress / > m_pic->getFrameWidthInCU()) * g_maxCUSize; > > + > > + while (g_zscanToPelX[internalAddress] >= xmax || > g_zscanToPelY[internalAddress] >= ymax) > > + internalAddress--; > > + > > + internalAddress++; > > + if (internalAddress == m_pic->getNumPartInCU()) > > + { > > + internalAddress = 0; > > + externalAddress++; > > + } > > + > > + return externalAddress * m_pic->getNumPartInCU() + internalAddress; > > +} > > + > > + > > diff -r 6e6756f94b27 -r 7145e57c722a source/common/slice.h > > --- a/source/common/slice.h Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/common/slice.h Mon Aug 25 17:53:12 2014 +0900 > > @@ -335,6 +335,8 @@ > > bool isInterB() const { return m_sliceType == B_SLICE; } > > > > bool isInterP() const { return m_sliceType == P_SLICE; } > > + > > + uint32_t realEndAddress(uint32_t endCUAddr); > > }; > > > > #define IS_REFERENCED(slice) (slice->m_pic->m_lowres.sliceType != > X265_TYPE_B) > > diff -r 6e6756f94b27 -r 7145e57c722a source/encoder/analysis.cpp > > --- a/source/encoder/analysis.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -548,7 +548,7 @@ > > Slice* slice = outTempCU->m_slice; > > if (!bInsidePicture) > > { > > - int cuSize = 1 << outTempCU->getLog2CUSize(0); > > + uint32_t cuSize = 1 << outTempCU->getLog2CUSize(0); > > uint32_t lpelx = outTempCU->getCUPelX(); > > uint32_t tpely = outTempCU->getCUPelY(); > > uint32_t rpelx = lpelx + cuSize; > > @@ -1875,15 +1875,14 @@ > > uint32_t nextDepth = depth + 1; > > TComDataCU* subTempPartCU = m_tempCU[nextDepth]; > > uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> > 2; > > + uint32_t xmax = slice->m_sps->picWidthInLumaSamples - > lcu->getCUPelX(); > > + uint32_t ymax = slice->m_sps->picHeightInLumaSamples - > lcu->getCUPelY(); > > for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, > absPartIdx += qNumParts) > > { > > - uint32_t lpelx = lcu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absPartIdx]]; > > - uint32_t tpely = lcu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absPartIdx]]; > > - if ((lpelx < slice->m_sps->picWidthInLumaSamples) && > > - (tpely < slice->m_sps->picHeightInLumaSamples)) > > + if (g_zscanToPelX[absPartIdx] < xmax && > g_zscanToPelY[absPartIdx] < ymax) > > { > > - subTempPartCU->copyToSubCU(cu, partUnitIdx, depth + 1); > > - encodeResidue(lcu, subTempPartCU, absPartIdx, depth + > 1); > > + subTempPartCU->copyToSubCU(cu, partUnitIdx, nextDepth); > > + encodeResidue(lcu, subTempPartCU, absPartIdx, > nextDepth); > > } > > } > > > > diff -r 6e6756f94b27 -r 7145e57c722a source/encoder/encoder.cpp > > --- a/source/encoder/encoder.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/encoder/encoder.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -441,10 +441,12 @@ > > else > > { > > fenc->allocPicSym(m_param); > > - fenc->m_picSym->m_slice->m_sps = &m_sps; > > - fenc->m_picSym->m_slice->m_pps = &m_pps; > > - fenc->m_picSym->m_slice->m_maxNumMergeCand = > m_param->maxNumMergeCand; > > - fenc->m_picSym->m_slice->m_endCUAddr = > fenc->getNumCUsInFrame() * fenc->getNumPartInCU(); > > + Slice* slice = fenc->m_picSym->m_slice; > > + slice->m_pic = fenc; > > + slice->m_sps = &m_sps; > > + slice->m_pps = &m_pps; > > + slice->m_maxNumMergeCand = m_param->maxNumMergeCand; > > + slice->m_endCUAddr = > slice->realEndAddress(fenc->getNumCUsInFrame() * fenc->getNumPartInCU()); > > } > > curEncoder->m_rce.encodeOrder = m_encodedFrameNum++; > > if (m_bframeDelay) > > diff -r 6e6756f94b27 -r 7145e57c722a source/encoder/entropy.cpp > > --- a/source/encoder/entropy.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/encoder/entropy.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -493,41 +493,35 @@ > > Frame* pic = cu->m_pic; > > Slice* slice = cu->m_slice; > > > > + if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP) > > + bEncodeDQP = true; > > + > > if (!bInsidePicture) > > { > > - uint32_t lpelx = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absPartIdx]]; > > - uint32_t tpely = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absPartIdx]]; > > - uint32_t rpelx = lpelx + (g_maxCUSize >> depth); > > - uint32_t bpely = tpely + (g_maxCUSize >> depth); > > - bInsidePicture = (rpelx <= slice->m_sps->picWidthInLumaSamples > && > > - bpely <= > slice->m_sps->picHeightInLumaSamples); > > + uint32_t xmax = slice->m_sps->picWidthInLumaSamples - > cu->getCUPelX(); > > + uint32_t ymax = slice->m_sps->picHeightInLumaSamples - > cu->getCUPelY(); > > + uint32_t cuSize = g_maxCUSize >> depth; > > + > > + bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize <= xmax && > > + g_zscanToPelY[absPartIdx] + cuSize <= ymax); > > + > > + if (!bInsidePicture) > > + { > > + uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << > 1)) >> 2; > > + for (uint32_t partUnitIdx = 0; partUnitIdx < 4; > partUnitIdx++, absPartIdx += qNumParts) > > + { > > + if (g_zscanToPelX[absPartIdx] < xmax && > g_zscanToPelY[absPartIdx] < ymax) > > + encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, > bEncodeDQP); > > + } > > + > > + return; > > + } > > } > > > > // We need to split, so don't try these modes. > > if (bInsidePicture && depth < g_maxCUDepth) > > codeSplitFlag(cu, absPartIdx, depth); > > > > - if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP) > > - bEncodeDQP = true; > > - > > - if (!bInsidePicture) > > - { > > - uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> > 2; > > - > > - for (uint32_t partUnitIdx = 0; partUnitIdx < 4; partUnitIdx++, > absPartIdx += qNumParts) > > - { > > - uint32_t lpelx = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absPartIdx]]; > > - uint32_t tpely = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absPartIdx]]; > > - if ((lpelx < slice->m_sps->picWidthInLumaSamples) && > > - (tpely < slice->m_sps->picHeightInLumaSamples)) > > - { > > - encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, > bEncodeDQP); > > - } > > - } > > - > > - return; > > - } > > - > > if (depth < cu->getDepth(absPartIdx) && depth < g_maxCUDepth) > > { > > uint32_t qNumParts = (pic->getNumPartInCU() >> (depth << 1)) >> > 2; > > @@ -568,45 +562,24 @@ > > /* finish encoding a cu and handle end-of-slice conditions */ > > void Entropy::finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t > depth) > > { > > - Frame* pic = cu->m_pic; > > Slice* slice = cu->m_slice; > > > > // Calculate end address > > + X265_CHECK(slice->m_endCUAddr == > slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n"); > > + uint32_t realEndAddress = slice->m_endCUAddr; > > uint32_t cuAddr = cu->getSCUAddr() + absPartIdx; > > > > - uint32_t internalAddress = (slice->m_endCUAddr - 1) % > pic->getNumPartInCU(); > > - uint32_t externalAddress = (slice->m_endCUAddr - 1) / > pic->getNumPartInCU(); > > - uint32_t posx = (externalAddress % pic->getFrameWidthInCU()) * > g_maxCUSize + g_rasterToPelX[g_zscanToRaster[internalAddress]]; > > - uint32_t posy = (externalAddress / pic->getFrameWidthInCU()) * > g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]]; > > - uint32_t width = slice->m_sps->picWidthInLumaSamples; > > - uint32_t height = slice->m_sps->picHeightInLumaSamples; > > - uint32_t cuSize = 1 << cu->getLog2CUSize(absPartIdx); > > - > > - while (posx >= width || posy >= height) > > - { > > - internalAddress--; > > - posx = (externalAddress % pic->getFrameWidthInCU()) * > g_maxCUSize + g_rasterToPelX[g_zscanToRaster[internalAddress]]; > > - posy = (externalAddress / pic->getFrameWidthInCU()) * > g_maxCUSize + g_rasterToPelY[g_zscanToRaster[internalAddress]]; > > - } > > - > > - internalAddress++; > > - if (internalAddress == cu->m_pic->getNumPartInCU()) > > - { > > - internalAddress = 0; > > - externalAddress = (externalAddress + 1); > > - } > > - uint32_t realEndAddress = (externalAddress * pic->getNumPartInCU() > + internalAddress); > > - > > // Encode slice finish > > bool bTerminateSlice = false; > > if (cuAddr + (cu->m_pic->getNumPartInCU() >> (depth << 1)) == > realEndAddress) > > bTerminateSlice = true; > > > > - uint32_t granularityWidth = g_maxCUSize; > > - posx = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absPartIdx]]; > > - posy = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absPartIdx]]; > > - bool granularityBoundary = ((posx + cuSize) % granularityWidth == 0 > || (posx + cuSize == width)) > > - && ((posy + cuSize) % granularityWidth == 0 > || (posy + cuSize == height)); > > + uint32_t granularityMask = g_maxCUSize - 1; > > + uint32_t cuSize = 1 << cu->getLog2CUSize(absPartIdx); > > + uint32_t rpelx = cu->getCUPelX() + g_zscanToPelX[absPartIdx] + > cuSize; > > + uint32_t bpely = cu->getCUPelY() + g_zscanToPelY[absPartIdx] + > cuSize; > > + bool granularityBoundary = (((rpelx & granularityMask) == 0 || > (rpelx == slice->m_sps->picWidthInLumaSamples )) && > > + ((bpely & granularityMask) == 0 || > (bpely == slice->m_sps->picHeightInLumaSamples))); > > > > if (granularityBoundary) > > { > > diff -r 6e6756f94b27 -r 7145e57c722a source/encoder/sao.cpp > > --- a/source/encoder/sao.cpp Fri Aug 22 15:53:34 2014 -0500 > > +++ b/source/encoder/sao.cpp Mon Aug 25 17:53:12 2014 +0900 > > @@ -2535,18 +2535,17 @@ > > /* Original YUV restoration for CU in lossless coding */ > > void origCUSampleRestoration(TComDataCU* cu, uint32_t absZOrderIdx, > uint32_t depth) > > { > > - Frame* pic = cu->m_pic; > > - uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1); > > - uint32_t qNumParts = curNumParts >> 2; > > - > > // go to sub-CU > > if (cu->getDepth(absZOrderIdx) > depth) > > { > > + Frame* pic = cu->m_pic; > > + uint32_t curNumParts = pic->getNumPartInCU() >> (depth << 1); > > + uint32_t qNumParts = curNumParts >> 2; > > + uint32_t xmax = cu->m_slice->m_sps->picWidthInLumaSamples - > cu->getCUPelX(); > > + uint32_t ymax = cu->m_slice->m_sps->picHeightInLumaSamples - > cu->getCUPelY(); > > for (uint32_t partIdx = 0; partIdx < 4; partIdx++, absZOrderIdx > += qNumParts) > > { > > - uint32_t lpelx = cu->getCUPelX() + > g_rasterToPelX[g_zscanToRaster[absZOrderIdx]]; > > - uint32_t tpely = cu->getCUPelY() + > g_rasterToPelY[g_zscanToRaster[absZOrderIdx]]; > > - if ((lpelx < cu->m_slice->m_sps->picWidthInLumaSamples) && > (tpely < cu->m_slice->m_sps->picHeightInLumaSamples)) > > + if (g_zscanToPelX[absZOrderIdx] < xmax && > g_zscanToPelY[absZOrderIdx] < ymax) > > origCUSampleRestoration(cu, absZOrderIdx, depth + 1); > > } > > > > _______________________________________________ > > x265-devel mailing list > > x265-devel@videolan.org > > https://mailman.videolan.org/listinfo/x265-devel > > -- > Steve Borho > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel