Hi, We removed separate functions for constrained intra prediction(CIP) some time back. Because it was increasing the code size at the cost of few conditional checks. Can you please send a separate patch for other changes not related to CIP.
Thanks Ashok. On Tue, Dec 23, 2014 at 11:23 AM, Satoshi Nakagawa <[email protected]> wrote: > # HG changeset patch > # User Satoshi Nakagawa <[email protected]> > # Date 1419313799 -32400 > # Tue Dec 23 14:49:59 2014 +0900 > # Node ID 6b59452a17d75c42c1750d47e2318c8da80c39fb > # Parent 8d2f418829c894c25da79daa861f16c61e5060d7 > refine intra neighbors > > diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/common.h > --- a/source/common/common.h Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/common/common.h Tue Dec 23 14:49:59 2014 +0900 > @@ -163,6 +163,9 @@ > template<typename T> > inline T x265_max(T a, T b) { return a > b ? a : b; } > > +template<typename T> > +inline T x265_clip3(T minVal, T maxVal, T a) { return > x265_min(x265_max(minVal, a), maxVal); } > + > typedef int16_t coeff_t; // transform coefficient > > #define X265_MIN(a, b) ((a) < (b) ? (a) : (b)) > diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.cpp > --- a/source/common/cudata.cpp Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/common/cudata.cpp Tue Dec 23 14:49:59 2014 +0900 > @@ -608,7 +608,7 @@ > { > if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - > s_numPartInCUSize + 1]) > { > - uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] > + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; > + uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] > + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; > arPartUnitIdx = g_rasterToZscan[absPartIdxRT - > s_numPartInCUSize + 1]; > if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, > s_numPartInCUSize)) > return m_encData->getPicCTU(m_cuAddr); > @@ -689,8 +689,6 @@ > return NULL; > } > blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (1 + > partUnitOffset) * s_numPartInCUSize - 1]; > - if (!m_cuLeft || !m_cuLeft->m_slice) > - return NULL; > return m_cuLeft; > } > > @@ -723,8 +721,6 @@ > return NULL; > } > arPartUnitIdx = g_rasterToZscan[absPartIdxRT + NUM_CU_PARTITIONS > - s_numPartInCUSize + partUnitOffset]; > - if (!m_cuAbove || !m_cuAbove->m_slice) > - return NULL; > return m_cuAbove; > } > > @@ -732,8 +728,6 @@ > return NULL; > > arPartUnitIdx = g_rasterToZscan[NUM_CU_PARTITIONS - s_numPartInCUSize > + partUnitOffset - 1]; > - if ((m_cuAboveRight == NULL || m_cuAboveRight->m_slice == NULL || > (m_cuAboveRight->m_cuAddr) > m_cuAddr)) > - return NULL; > return m_cuAboveRight; > } > > @@ -904,7 +898,7 @@ > tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; > tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; > > - tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - > (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag), > tuDepthRange[1])); > + tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], > log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag)); > } > > void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t > absPartIdx) const > @@ -916,7 +910,7 @@ > tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; > tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; > > - tuDepthRange[0] = X265_MAX(tuDepthRange[0], X265_MIN(log2CUSize - > (quadtreeTUMaxDepth - 1 + splitFlag), tuDepthRange[1])); > + tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], > log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); > } > > uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const > @@ -1363,14 +1357,6 @@ > return outPartIdxRB; > } > > -void CUData::deriveLeftRightTopIdxAdi(uint32_t& outPartIdxLT, uint32_t& > outPartIdxRT, uint32_t partOffset, uint32_t partDepth) const > -{ > - uint32_t numPartInWidth = 1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - > partDepth); > - > - outPartIdxLT = m_absIdxInCTU + partOffset; > - outPartIdxRT = g_rasterToZscan[g_zscanToRaster[outPartIdxLT] + > numPartInWidth - 1]; > -} > - > bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, > uint32_t candAbsPartIdx) const > { > if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) > diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/cudata.h > --- a/source/common/cudata.h Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/common/cudata.h Tue Dec 23 14:49:59 2014 +0900 > @@ -212,7 +212,6 @@ > > void getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) > const; > int getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* > intraDirPred) const; > - void deriveLeftRightTopIdxAdi(uint32_t& partIdxLT, uint32_t& > partIdxRT, uint32_t partOffset, uint32_t partDepth) const; > > uint32_t getSCUAddr() const { return (m_cuAddr << > g_maxFullDepth * 2) + m_absIdxInCTU; } > uint32_t getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const; > diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.cpp > --- a/source/common/predict.cpp Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/common/predict.cpp Tue Dec 23 14:49:59 2014 +0900 > @@ -654,11 +654,8 @@ > } > } > > -void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, > uint32_t absPartIdx, uint32_t partDepth, int dirMode) > +void Predict::initAdiPattern(const CUData& cu, const CUGeom& cuGeom, > uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode) > { > - IntraNeighbors intraNeighbors; > - initIntraNeighbors(cu, absPartIdx, partDepth, true, &intraNeighbors); > - > pixel* adiBuf = m_predBuf; > pixel* refAbove = m_refAbove; > pixel* refLeft = m_refLeft; > @@ -700,12 +697,12 @@ > int refTL = refAbove[0]; > int refTR = refAbove[trSize2]; > bStrongSmoothing = (abs(refBL + refTL - 2 * refLeft[trSize]) > < threshold && > - abs(refTL + refTR - 2 * refAbove[trSize]) < threshold); > + abs(refTL + refTR - 2 * refAbove[trSize]) > < threshold); > > if (bStrongSmoothing) > { > // bilinear interpolation > - const int shift = 5 + 1; // intraNeighbors.log2TrSize + 1; > + const int shift = 5 + 1; // log2TrSize + 1; > int init = (refTL << shift) + tuSize; > int delta; > > @@ -738,10 +735,8 @@ > } > } > > -void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId) > +void Predict::initAdiPatternChroma(const CUData& cu, const CUGeom& > cuGeom, uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t > chromaId) > { > - IntraNeighbors intraNeighbors; > - initIntraNeighbors(cu, absPartIdx, partDepth, false, &intraNeighbors); > uint32_t tuSize = intraNeighbors.tuSize; > > const pixel* adiOrigin = > cu.m_encData->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, > cuGeom.encodeIdx + absPartIdx); > @@ -751,9 +746,9 @@ > fillReferenceSamples(adiOrigin, picStride, adiRef, intraNeighbors); > } > > -void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, > uint32_t partDepth, bool isLuma, IntraNeighbors *intraNeighbors) > +void Predict::initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, > uint32_t tuDepth, bool isLuma, IntraNeighbors *intraNeighbors) > { > - uint32_t log2TrSize = cu.m_log2CUSize[0] - partDepth; > + uint32_t log2TrSize = cu.m_log2CUSize[0] - tuDepth; > int log2UnitWidth = LOG2_UNIT_SIZE; > int log2UnitHeight = LOG2_UNIT_SIZE; > > @@ -764,12 +759,12 @@ > log2UnitHeight -= cu.m_vChromaShift; > } > > - int numIntraNeighbor = 0; > + int numIntraNeighbor; > bool* bNeighborFlags = intraNeighbors->bNeighborFlags; > > - uint32_t partIdxLT, partIdxRT, partIdxLB; > - > - cu.deriveLeftRightTopIdxAdi(partIdxLT, partIdxRT, absPartIdx, > partDepth); > + uint32_t numPartInWidth = 1 << (cu.m_log2CUSize[0] - LOG2_UNIT_SIZE - > tuDepth); > + uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx; > + uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + > numPartInWidth - 1]; > > uint32_t tuSize = 1 << log2TrSize; > int tuWidthInUnits = tuSize >> log2UnitWidth; > @@ -777,14 +772,26 @@ > int aboveUnits = tuWidthInUnits << 1; > int leftUnits = tuHeightInUnits << 1; > int partIdxStride = cu.m_slice->m_sps->numPartInCUSize; > - partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + > ((tuHeightInUnits - 1) * partIdxStride)]; > + uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + > ((tuHeightInUnits - 1) * partIdxStride)]; > > - bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); > - numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); > - numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, > (bNeighborFlags + leftUnits + 1)); > - numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, > (bNeighborFlags + leftUnits + 1 + tuWidthInUnits)); > - numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, > (bNeighborFlags + leftUnits - 1)); > - numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, > (bNeighborFlags + leftUnits - 1 - tuHeightInUnits)); > + if (cu.m_slice->isIntra() || > !cu.m_slice->m_pps->bConstrainedIntraPred) > + { > + bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); > + numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); > + numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, > bNeighborFlags + leftUnits + 1); > + numIntraNeighbor += isAboveRightAvailable(cu, partIdxRT, > bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); > + numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, > bNeighborFlags + leftUnits - 1); > + numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLB, > bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); > + } > + else > + { > + bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, > partIdxLT); > + numIntraNeighbor = (int)(bNeighborFlags[leftUnits]); > + numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, > bNeighborFlags + leftUnits + 1); > + numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxRT, > bNeighborFlags + leftUnits + 1 + tuWidthInUnits, tuWidthInUnits); > + numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, > bNeighborFlags + leftUnits - 1); > + numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLB, > bNeighborFlags + tuHeightInUnits - 1, tuHeightInUnits); > + } > > intraNeighbors->numIntraNeighbor = numIntraNeighbor; > intraNeighbors->totalUnits = aboveUnits + leftUnits + 1; > @@ -793,7 +800,6 @@ > intraNeighbors->unitWidth = 1 << log2UnitWidth; > intraNeighbors->unitHeight = 1 << log2UnitHeight; > intraNeighbors->tuSize = tuSize; > - intraNeighbors->log2TrSize = log2TrSize; > } > > void Predict::fillReferenceSamples(const pixel* adiOrigin, intptr_t > picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors) > @@ -953,33 +959,27 @@ > uint32_t partAboveLeft; > const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, > partIdxLT); > > - if (!cu.m_slice->m_pps->bConstrainedIntraPred) > - return cuAboveLeft ? true : false; > - else > - return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft); > + return !!cuAboveLeft; > } > > int Predict::isAboveAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxRT, bool* bValidFlags) > { > const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; > - const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT] + 1; > + const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT]; > const uint32_t idxStep = 1; > - bool* validFlagPtr = bValidFlags; > int numIntra = 0; > > - for (uint32_t rasterPart = rasterPartBegin; rasterPart < > rasterPartEnd; rasterPart += idxStep) > + for (uint32_t rasterPart = rasterPartBegin; rasterPart <= > rasterPartEnd; rasterPart += idxStep, bValidFlags++) > { > uint32_t partAbove; > const CUData* cuAbove = cu.getPUAbove(partAbove, > g_rasterToZscan[rasterPart]); > - if (cuAbove && (!cu.m_slice->m_pps->bConstrainedIntraPred || > cuAbove->isIntra(partAbove))) > + if (cuAbove) > { > numIntra++; > - *validFlagPtr = true; > + *bValidFlags = true; > } > else > - *validFlagPtr = false; > - > - validFlagPtr++; > + *bValidFlags = false; > } > > return numIntra; > @@ -988,73 +988,156 @@ > int Predict::isLeftAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxLB, bool* bValidFlags) > { > const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; > - const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB] + 1; > + const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB]; > const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize; > - bool* validFlagPtr = bValidFlags; > int numIntra = 0; > > - for (uint32_t rasterPart = rasterPartBegin; rasterPart < > rasterPartEnd; rasterPart += idxStep) > + for (uint32_t rasterPart = rasterPartBegin; rasterPart <= > rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction > { > uint32_t partLeft; > const CUData* cuLeft = cu.getPULeft(partLeft, > g_rasterToZscan[rasterPart]); > - if (cuLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || > cuLeft->isIntra(partLeft))) > + if (cuLeft) > { > numIntra++; > - *validFlagPtr = true; > + *bValidFlags = true; > } > else > - *validFlagPtr = false; > - > - validFlagPtr--; // opposite direction > + *bValidFlags = false; > } > > return numIntra; > } > > -int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxRT, bool* bValidFlags) > +int Predict::isAboveRightAvailable(const CUData& cu, uint32_t partIdxRT, > bool* bValidFlags, uint32_t numUnits) > { > - const uint32_t numUnitsInPU = g_zscanToRaster[partIdxRT] - > g_zscanToRaster[partIdxLT] + 1; > - bool* validFlagPtr = bValidFlags; > int numIntra = 0; > > - for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) > + for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++) > { > uint32_t partAboveRight; > const CUData* cuAboveRight = > cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); > - if (cuAboveRight && (!cu.m_slice->m_pps->bConstrainedIntraPred || > cuAboveRight->isIntra(partAboveRight))) > + if (cuAboveRight) > { > numIntra++; > - *validFlagPtr = true; > + *bValidFlags = true; > } > else > - *validFlagPtr = false; > - > - validFlagPtr++; > + *bValidFlags = false; > } > > return numIntra; > } > > -int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxLB, bool* bValidFlags) > +int Predict::isBelowLeftAvailable(const CUData& cu, uint32_t partIdxLB, > bool* bValidFlags, uint32_t numUnits) > { > - const uint32_t numUnitsInPU = (g_zscanToRaster[partIdxLB] - > g_zscanToRaster[partIdxLT]) / cu.m_slice->m_sps->numPartInCUSize + 1; > - bool* validFlagPtr = bValidFlags; > int numIntra = 0; > > - for (uint32_t offset = 1; offset <= numUnitsInPU; offset++) > + for (uint32_t offset = 1; offset <= numUnits; offset++, > bValidFlags--) // opposite direction > { > uint32_t partBelowLeft; > const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, > partIdxLB, offset); > - if (cuBelowLeft && (!cu.m_slice->m_pps->bConstrainedIntraPred || > cuBelowLeft->isIntra(partBelowLeft))) > + if (cuBelowLeft) > { > numIntra++; > - *validFlagPtr = true; > + *bValidFlags = true; > } > else > - *validFlagPtr = false; > - > - validFlagPtr--; // opposite direction > + *bValidFlags = false; > } > > return numIntra; > } > + > +bool Predict::isAboveLeftAvailableCIP(const CUData& cu, uint32_t > partIdxLT) > +{ > + uint32_t partAboveLeft; > + const CUData* cuAboveLeft = cu.getPUAboveLeft(partAboveLeft, > partIdxLT); > + > + return cuAboveLeft && cuAboveLeft->isIntra(partAboveLeft); > +} > + > +int Predict::isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxRT, bool* bValidFlags) > +{ > + const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; > + const uint32_t rasterPartEnd = g_zscanToRaster[partIdxRT]; > + const uint32_t idxStep = 1; > + int numIntra = 0; > + > + for (uint32_t rasterPart = rasterPartBegin; rasterPart <= > rasterPartEnd; rasterPart += idxStep, bValidFlags++) > + { > + uint32_t partAbove; > + const CUData* cuAbove = cu.getPUAbove(partAbove, > g_rasterToZscan[rasterPart]); > + if (cuAbove && cuAbove->isIntra(partAbove)) > + { > + numIntra++; > + *bValidFlags = true; > + } > + else > + *bValidFlags = false; > + } > + > + return numIntra; > +} > + > +int Predict::isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxLB, bool* bValidFlags) > +{ > + const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT]; > + const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB]; > + const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize; > + int numIntra = 0; > + > + for (uint32_t rasterPart = rasterPartBegin; rasterPart <= > rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction > + { > + uint32_t partLeft; > + const CUData* cuLeft = cu.getPULeft(partLeft, > g_rasterToZscan[rasterPart]); > + if (cuLeft && cuLeft->isIntra(partLeft)) > + { > + numIntra++; > + *bValidFlags = true; > + } > + else > + *bValidFlags = false; > + } > + > + return numIntra; > +} > + > +int Predict::isAboveRightAvailableCIP(const CUData& cu, uint32_t > partIdxRT, bool* bValidFlags, uint32_t numUnits) > +{ > + int numIntra = 0; > + > + for (uint32_t offset = 1; offset <= numUnits; offset++, bValidFlags++) > + { > + uint32_t partAboveRight; > + const CUData* cuAboveRight = > cu.getPUAboveRightAdi(partAboveRight, partIdxRT, offset); > + if (cuAboveRight && cuAboveRight->isIntra(partAboveRight)) > + { > + numIntra++; > + *bValidFlags = true; > + } > + else > + *bValidFlags = false; > + } > + > + return numIntra; > +} > + > +int Predict::isBelowLeftAvailableCIP(const CUData& cu, uint32_t > partIdxLB, bool* bValidFlags, uint32_t numUnits) > +{ > + int numIntra = 0; > + > + for (uint32_t offset = 1; offset <= numUnits; offset++, > bValidFlags--) // opposite direction > + { > + uint32_t partBelowLeft; > + const CUData* cuBelowLeft = cu.getPUBelowLeftAdi(partBelowLeft, > partIdxLB, offset); > + if (cuBelowLeft && cuBelowLeft->isIntra(partBelowLeft)) > + { > + numIntra++; > + *bValidFlags = true; > + } > + else > + *bValidFlags = false; > + } > + > + return numIntra; > +} > diff -r 8d2f418829c8 -r 6b59452a17d7 source/common/predict.h > --- a/source/common/predict.h Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/common/predict.h Tue Dec 23 14:49:59 2014 +0900 > @@ -57,7 +57,6 @@ > int unitWidth; > int unitHeight; > int tuSize; > - uint32_t log2TrSize; > bool bNeighborFlags[4 * MAX_NUM_SPU_W + 1]; > }; > > @@ -105,14 +104,20 @@ > void addWeightUni(Yuv& predYuv, const ShortYuv& srcYuv, const > WeightValues wp[3], bool bLuma, bool bChroma) const; > > /* Intra prediction helper functions */ > - static void initIntraNeighbors(const CUData& cu, uint32_t > zOrderIdxInPart, uint32_t partDepth, bool isLuma, IntraNeighbors > *IntraNeighbors); > + static void initIntraNeighbors(const CUData& cu, uint32_t absPartIdx, > uint32_t tuDepth, bool isLuma, IntraNeighbors *IntraNeighbors); > static void fillReferenceSamples(const pixel* adiOrigin, intptr_t > picStride, pixel* adiRef, const IntraNeighbors& intraNeighbors); > > static bool isAboveLeftAvailable(const CUData& cu, uint32_t > partIdxLT); > static int isAboveAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxRT, bool* bValidFlags); > static int isLeftAvailable(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxLB, bool* bValidFlags); > - static int isAboveRightAvailable(const CUData& cu, uint32_t > partIdxLT, uint32_t partIdxRT, bool* bValidFlags); > - static int isBelowLeftAvailable(const CUData& cu, uint32_t > partIdxLT, uint32_t partIdxLB, bool* bValidFlags); > + static int isAboveRightAvailable(const CUData& cu, uint32_t > partIdxRT, bool* bValidFlags, uint32_t numUnits); > + static int isBelowLeftAvailable(const CUData& cu, uint32_t > partIdxLB, bool* bValidFlags, uint32_t numUnits); > + > + static bool isAboveLeftAvailableCIP(const CUData& cu, uint32_t > partIdxLT); > + static int isAboveAvailableCIP(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxRT, bool* bValidFlags); > + static int isLeftAvailableCIP(const CUData& cu, uint32_t partIdxLT, > uint32_t partIdxLB, bool* bValidFlags); > + static int isAboveRightAvailableCIP(const CUData& cu, uint32_t > partIdxRT, bool* bValidFlags, uint32_t numUnits); > + static int isBelowLeftAvailableCIP(const CUData& cu, uint32_t > partIdxLB, bool* bValidFlags, uint32_t numUnits); > > public: > > @@ -125,8 +130,8 @@ > void predIntraLumaAng(uint32_t dirMode, pixel* pred, intptr_t stride, > uint32_t log2TrSize); > void predIntraChromaAng(pixel* src, uint32_t dirMode, pixel* pred, > intptr_t stride, uint32_t log2TrSizeC, int chFmt); > > - void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t > absPartIdx, uint32_t partDepth, int dirMode); > - void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, > uint32_t absPartIdx, uint32_t partDepth, uint32_t chromaId); > + void initAdiPattern(const CUData& cu, const CUGeom& cuGeom, uint32_t > absPartIdx, const IntraNeighbors& intraNeighbors, int dirMode); > + void initAdiPatternChroma(const CUData& cu, const CUGeom& cuGeom, > uint32_t absPartIdx, const IntraNeighbors& intraNeighbors, uint32_t > chromaId); > pixel* getAdiChromaBuf(uint32_t chromaId, int tuSize) > { > return m_predBuf + (chromaId == 1 ? 0 : 2 * ADI_BUF_STRIDE * > (tuSize * 2 + 1)); > diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/encoder/analysis.cpp Tue Dec 23 14:49:59 2014 +0900 > @@ -914,7 +914,7 @@ > cu.getInterTUQtDepthRange(tuDepthRange, 0); > > > m_rqt[cuGeom.depth].tmpResiYuv.subtract(*md.bestMode->fencYuv, > md.bestMode->predYuv, cuGeom.log2CUSize); > - residualTransformQuantInter(*md.bestMode, cuGeom, > 0, cuGeom.depth, tuDepthRange); > + residualTransformQuantInter(*md.bestMode, cuGeom, > 0, 0, tuDepthRange); > if (cu.getQtRootCbf(0)) > > md.bestMode->reconYuv.addClip(md.bestMode->predYuv, > m_rqt[cuGeom.depth].tmpResiYuv, cu.m_log2CUSize[0]); > else > @@ -938,8 +938,7 @@ > uint32_t tuDepthRange[2]; > cu.getIntraTUQtDepthRange(tuDepthRange, 0); > > - uint32_t initTuDepth = cu.m_partSize[0] != > SIZE_2Nx2N; > - residualTransformQuantIntra(*md.bestMode, cuGeom, > initTuDepth, 0, tuDepthRange); > + residualTransformQuantIntra(*md.bestMode, cuGeom, > 0, 0, tuDepthRange); > getBestIntraModeChroma(*md.bestMode, cuGeom); > residualQTIntraChroma(*md.bestMode, cuGeom, 0, 0); > > md.bestMode->reconYuv.copyFromPicYuv(*m_frame->m_reconPic, cu.m_cuAddr, > cuGeom.encodeIdx); // TODO: > @@ -1702,8 +1701,7 @@ > uint32_t tuDepthRange[2]; > cu.getIntraTUQtDepthRange(tuDepthRange, 0); > > - uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N; > - residualTransformQuantIntra(*bestMode, cuGeom, initTuDepth, 0, > tuDepthRange); > + residualTransformQuantIntra(*bestMode, cuGeom, 0, 0, > tuDepthRange); > getBestIntraModeChroma(*bestMode, cuGeom); > residualQTIntraChroma(*bestMode, cuGeom, 0, 0); > } > @@ -1736,7 +1734,7 @@ > uint32_t tuDepthRange[2]; > cu.getInterTUQtDepthRange(tuDepthRange, 0); > > - residualTransformQuantInter(*bestMode, cuGeom, 0, cuGeom.depth, > tuDepthRange); > + residualTransformQuantInter(*bestMode, cuGeom, 0, 0, > tuDepthRange); > > if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && > !cu.getQtRootCbf(0)) > cu.setPredModeSubParts(MODE_SKIP); > diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.cpp > --- a/source/encoder/search.cpp Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/encoder/search.cpp Tue Dec 23 14:49:59 2014 +0900 > @@ -239,7 +239,8 @@ > > void Search::codeIntraLumaQT(Mode& mode, const CUGeom& cuGeom, uint32_t > tuDepth, uint32_t absPartIdx, bool bAllowSplit, Cost& outCost, const > uint32_t depthRange[2]) > { > - uint32_t fullDepth = mode.cu.m_cuDepth[0] + tuDepth; > + CUData& cu = mode.cu; > + uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth; > uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > uint32_t qtLayer = log2TrSize - 2; > uint32_t sizeIdx = log2TrSize - 2; > @@ -253,8 +254,6 @@ > mightSplit = true; > } > > - CUData& cu = mode.cu; > - > Cost fullCost; > uint32_t bCBF = 0; > > @@ -273,7 +272,9 @@ > > // init availability pattern > uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx]; > - initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode); > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdx, tuDepth, true, > &intraNeighbors); > + initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, > lumaPredMode); > > // get prediction signal > predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize); > @@ -365,7 +366,7 @@ > m_entropyCoder.load(m_rqt[fullDepth].rqtRoot); // prep > state of split encode > } > > - // code split block > + /* code split block */ > uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2; > > int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && > (log2TrSize - 1) <= MAX_LOG2_TS_SIZE && !cu.m_tqBypass[0]; > @@ -451,11 +452,13 @@ > pixel* pred = predYuv->getLumaAddr(absPartIdx); > int16_t* residual = > m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx); > uint32_t stride = fencYuv->m_size; > - int sizeIdx = log2TrSize - 2; > + uint32_t sizeIdx = log2TrSize - 2; > > // init availability pattern > uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx]; > - initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode); > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdx, tuDepth, true, &intraNeighbors); > + initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, lumaPredMode); > > // get prediction signal > predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize); > @@ -597,13 +600,12 @@ > } > > /* fast luma intra residual generation. Only perform the minimum number > of TU splits required by the CU size */ > -void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& > cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t depthRange[2]) > +void Search::residualTransformQuantIntra(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]) > { > CUData& cu = mode.cu; > - > - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth; > - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > - bool bCheckFull = log2TrSize <= depthRange[1]; > + uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth; > + uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > + bool bCheckFull = log2TrSize <= depthRange[1]; > > X265_CHECK(m_slice->m_sliceType != I_SLICE, > "residualTransformQuantIntra not intended for I slices\n"); > > @@ -614,28 +616,36 @@ > > if (bCheckFull) > { > - const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx); > - pixel* pred = mode.predYuv.getLumaAddr(absPartIdx); > - int16_t* residual = > m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx); > + const pixel* fenc = mode.fencYuv->getLumaAddr(absPartIdx); > + pixel* pred = mode.predYuv.getLumaAddr(absPartIdx); > + int16_t* residual = > m_rqt[cuGeom.depth].tmpResiYuv.getLumaAddr(absPartIdx); > + uint32_t stride = mode.fencYuv->m_size; > + > + // init availability pattern > + uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx]; > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdx, tuDepth, true, > &intraNeighbors); > + initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, > lumaPredMode); > + > + // get prediction signal > + predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize); > + > + X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], > "unexpected tskip flag in residualTransformQuantIntra\n"); > + cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth); > + > + uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2); > + coeff_t* coeffY = cu.m_trCoeff[0] + coeffOffsetY; > + > + uint32_t sizeIdx = log2TrSize - 2; > + primitives.calcresidual[sizeIdx](fenc, pred, residual, stride); > + > pixel* picReconY = > m_frame->m_reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.encodeIdx + > absPartIdx); > intptr_t picStride = m_frame->m_reconPic->m_stride; > - uint32_t stride = mode.fencYuv->m_size; > - uint32_t sizeIdx = log2TrSize - 2; > - uint32_t lumaPredMode = cu.m_lumaIntraDir[absPartIdx]; > - uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2); > - coeff_t* coeff = cu.m_trCoeff[TEXT_LUMA] + coeffOffsetY; > - > - initAdiPattern(cu, cuGeom, absPartIdx, tuDepth, lumaPredMode); > - predIntraLumaAng(lumaPredMode, pred, stride, log2TrSize); > - > - X265_CHECK(!cu.m_transformSkip[TEXT_LUMA][absPartIdx], > "unexpected tskip flag in residualTransformQuantIntra\n"); > - cu.setTUDepthSubParts(tuDepth, absPartIdx, fullDepth); > - > - primitives.calcresidual[sizeIdx](fenc, pred, residual, stride); > - uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, > residual, stride, coeff, log2TrSize, TEXT_LUMA, absPartIdx, false); > + > + uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, > residual, stride, coeffY, log2TrSize, TEXT_LUMA, absPartIdx, false); > if (numSig) > { > - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdx], residual, > stride, coeff, log2TrSize, TEXT_LUMA, true, false, numSig); > + m_quant.invtransformNxN(cu.m_tqBypass[0], residual, stride, > coeffY, log2TrSize, TEXT_LUMA, true, false, numSig); > primitives.luma_add_ps[sizeIdx](picReconY, picStride, pred, > residual, stride, stride); > cu.setCbfSubParts(1 << tuDepth, TEXT_LUMA, absPartIdx, > fullDepth); > } > @@ -654,11 +664,11 @@ > uint32_t cbf = 0; > for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, > qPartIdx += qNumParts) > { > - residualTransformQuantIntra(mode, cuGeom, tuDepth + 1, > qPartIdx, depthRange); > + residualTransformQuantIntra(mode, cuGeom, qPartIdx, tuDepth + > 1, depthRange); > cbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1); > } > for (uint32_t offs = 0; offs < 4 * qNumParts; offs++) > - cu.m_cbf[TEXT_LUMA][absPartIdx + offs] |= (cbf << tuDepth); > + cu.m_cbf[0][absPartIdx + offs] |= (cbf << tuDepth); > } > } > > @@ -739,15 +749,14 @@ > } > for (uint32_t offs = 0; offs < 4 * qNumParts; offs++) > { > - cu.m_cbf[TEXT_CHROMA_U][absPartIdx + offs] |= (splitCbfU << > tuDepth); > - cu.m_cbf[TEXT_CHROMA_V][absPartIdx + offs] |= (splitCbfV << > tuDepth); > + cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth); > + cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth); > } > > return outDist; > } > > uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; > - > uint32_t tuDepthC = tuDepth; > if (log2TrSizeC < 2) > { > @@ -766,46 +775,48 @@ > if (checkTransformSkip) > return codeIntraChromaTSkip(mode, cuGeom, tuDepth, tuDepthC, > absPartIdx, psyEnergy); > > + ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv; > uint32_t qtLayer = log2TrSize - 2; > uint32_t tuSize = 1 << log2TrSizeC; > + uint32_t stride = mode.fencYuv->m_csize; > + const uint32_t sizeIdxC = log2TrSizeC - 2; > uint32_t outDist = 0; > > uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT > : DONT_SPLIT; > > - for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; > chromaId++) > + TURecurse tuIterator(splitType, curPartNum, absPartIdx); > + do > { > - TextType ttype = (TextType)chromaId; > - > - TURecurse tuIterator(splitType, curPartNum, absPartIdx); > - do > + uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > + > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, > &intraNeighbors); > + > + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= > TEXT_CHROMA_V; chromaId++) > { > - uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > + TextType ttype = (TextType)chromaId; > > const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, > absPartIdxC); > pixel* pred = mode.predYuv.getChromaAddr(chromaId, > absPartIdxC); > - int16_t* residual = > m_rqt[cuGeom.depth].tmpResiYuv.getChromaAddr(chromaId, absPartIdxC); > - uint32_t stride = mode.fencYuv->m_csize; > - uint32_t sizeIdxC = log2TrSizeC - 2; > - > + int16_t* residual = resiYuv.getChromaAddr(chromaId, > absPartIdxC); > uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - > (m_hChromaShift + m_vChromaShift)); > coeff_t* coeffC = m_rqt[qtLayer].coeffRQT[chromaId] + > coeffOffsetC; > pixel* reconQt = > m_rqt[qtLayer].reconQtYuv.getChromaAddr(chromaId, absPartIdxC); > uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize; > - > pixel* picReconC = > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx > + absPartIdxC); > intptr_t picStride = m_frame->m_reconPic->m_strideC; > > - // init availability pattern > - initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, > chromaId); > - pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize); > - > uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC]; > if (chromaPredMode == DM_CHROMA_IDX) > chromaPredMode = cu.m_lumaIntraDir[(m_csp == > X265_CSP_I444) ? absPartIdxC : 0]; > if (m_csp == X265_CSP_I422) > chromaPredMode = > g_chroma422IntraAngleMappingTable[chromaPredMode]; > > + // init availability pattern > + initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, > chromaId); > + pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize); > + > // get prediction signal > predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, > log2TrSizeC, m_csp); > > @@ -813,7 +824,6 @@ > > primitives.calcresidual[sizeIdxC](fenc, pred, residual, > stride); > uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, > residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false); > - uint32_t tmpDist; > if (numSig) > { > m_quant.invtransformNxN(cu.m_tqBypass[0], residual, > stride, coeffC, log2TrSizeC, ttype, true, false, numSig); > @@ -827,7 +837,7 @@ > cu.setCbfPartRange(0, ttype, absPartIdxC, > tuIterator.absPartIdxStep); > } > > - tmpDist = primitives.sse_pp[sizeIdxC](reconQt, reconQtStride, > fenc, stride); > + uint32_t tmpDist = primitives.sse_pp[sizeIdxC](reconQt, > reconQtStride, fenc, stride); > outDist += (ttype == TEXT_CHROMA_U) ? > m_rdCost.scaleChromaDistCb(tmpDist) : m_rdCost.scaleChromaDistCr(tmpDist); > > if (m_rdCost.m_psyRd) > @@ -835,10 +845,13 @@ > > primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, > reconQt, reconQtStride); > } > - while (tuIterator.isNextSection()); > - > - if (splitType == VERTICAL_SPLIT) > - offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx); > + } > + while (tuIterator.isNextSection()); > + > + if (splitType == VERTICAL_SPLIT) > + { > + offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx); > + offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx); > } > > return outDist; > @@ -866,14 +879,17 @@ > uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT > : DONT_SPLIT; > > - for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; > chromaId++) > + TURecurse tuIterator(splitType, curPartNum, absPartIdx); > + do > { > - TextType ttype = (TextType)chromaId; > - > - TURecurse tuIterator(splitType, curPartNum, absPartIdx); > - do > + uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > + > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, > &intraNeighbors); > + > + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= > TEXT_CHROMA_V; chromaId++) > { > - uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > + TextType ttype = (TextType)chromaId; > > const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, > absPartIdxC); > pixel* pred = mode.predYuv.getChromaAddr(chromaId, > absPartIdxC); > @@ -887,7 +903,7 @@ > uint32_t reconQtStride = m_rqt[qtLayer].reconQtYuv.m_csize; > > // init availability pattern > - initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, > chromaId); > + initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, > chromaId); > pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize); > > uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC]; > @@ -980,10 +996,13 @@ > outDist += bDist; > psyEnergy += bEnergy; > } > - while (tuIterator.isNextSection()); > - > - if (splitType == VERTICAL_SPLIT) > - offsetSubTUCBFs(cu, ttype, tuDepth, absPartIdx); > + } > + while (tuIterator.isNextSection()); > + > + if (splitType == VERTICAL_SPLIT) > + { > + offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx); > + offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx); > } > > m_entropyCoder.load(m_rqt[fullDepth].rqtRoot); > @@ -1022,91 +1041,18 @@ > } > } > > -void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, > uint32_t tuDepth, uint32_t absPartIdx) > +void Search::residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, > uint32_t absPartIdx, uint32_t tuDepth) > { > CUData& cu = mode.cu; > - uint32_t fullDepth = cu.m_cuDepth[0] + tuDepth; > - uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; > - > - if (tuDepth == cu.m_tuDepth[absPartIdx]) > - { > - uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; > - uint32_t tuDepthC = tuDepth; > - if (log2TrSizeC < 2) > - { > - X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && > tuDepth, "invalid tuDepth\n"); > - if (absPartIdx & 3) > - return; > - log2TrSizeC = 2; > - tuDepthC--; > - } > - > - ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv; > - uint32_t tuSize = 1 << log2TrSizeC; > - uint32_t stride = mode.fencYuv->m_csize; > - const int sizeIdxC = log2TrSizeC - 2; > - > - uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > - const SplitType splitType = (m_csp == X265_CSP_I422) ? > VERTICAL_SPLIT : DONT_SPLIT; > - > - for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= > TEXT_CHROMA_V; chromaId++) > - { > - TextType ttype = (TextType)chromaId; > - > - TURecurse tuIterator(splitType, curPartNum, absPartIdx); > - do > - { > - uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > - > - const pixel* fenc = > mode.fencYuv->getChromaAddr(chromaId, absPartIdxC); > - pixel* pred = > mode.predYuv.getChromaAddr(chromaId, absPartIdxC); > - int16_t* residual = resiYuv.getChromaAddr(chromaId, > absPartIdxC); > - pixel* recon = > mode.reconYuv.getChromaAddr(chromaId, absPartIdxC); // TODO: needed? > - uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * > 2 - (m_hChromaShift + m_vChromaShift)); > - coeff_t* coeff = cu.m_trCoeff[ttype] + > coeffOffsetC; > - pixel* picReconC = > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx > + absPartIdxC); > - uint32_t picStride = m_frame->m_reconPic->m_strideC; > - > - uint32_t chromaPredMode = > cu.m_chromaIntraDir[absPartIdxC]; > - if (chromaPredMode == DM_CHROMA_IDX) > - chromaPredMode = cu.m_lumaIntraDir[(m_csp == > X265_CSP_I444) ? absPartIdxC : 0]; > - chromaPredMode = (m_csp == X265_CSP_I422) ? > g_chroma422IntraAngleMappingTable[chromaPredMode] : chromaPredMode; > - initAdiPatternChroma(cu, cuGeom, absPartIdxC, tuDepthC, > chromaId); > - pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize); > - > - predIntraChromaAng(chromaPred, chromaPredMode, pred, > stride, log2TrSizeC, m_csp); > - > - X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip > not supported at low RD levels\n"); > - > - primitives.calcresidual[sizeIdxC](fenc, pred, residual, > stride); > - uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, > residual, stride, coeff, log2TrSizeC, ttype, absPartIdxC, false); > - if (numSig) > - { > - m_quant.invtransformNxN(cu.m_tqBypass[absPartIdxC], > residual, stride, coeff, log2TrSizeC, ttype, true, false, numSig); > - primitives.luma_add_ps[sizeIdxC](recon, stride, pred, > residual, stride, stride); > - primitives.luma_copy_pp[sizeIdxC](picReconC, > picStride, recon, stride); > - cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, > tuIterator.absPartIdxStep); > - } > - else > - { > - primitives.luma_copy_pp[sizeIdxC](recon, stride, > pred, stride); > - primitives.luma_copy_pp[sizeIdxC](picReconC, > picStride, pred, stride); > - cu.setCbfPartRange(0, ttype, absPartIdxC, > tuIterator.absPartIdxStep); > - } > - } > - while (tuIterator.isNextSection()); > - > - if (splitType == VERTICAL_SPLIT) > - offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, > absPartIdx); > - } > - } > - else > + uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth; > + > + if (tuDepth < cu.m_tuDepth[absPartIdx]) > { > uint32_t qNumParts = 1 << (log2TrSize - 1 - LOG2_UNIT_SIZE) * 2; > uint32_t splitCbfU = 0, splitCbfV = 0; > for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, > qPartIdx += qNumParts) > { > - residualQTIntraChroma(mode, cuGeom, tuDepth + 1, qPartIdx); > + residualQTIntraChroma(mode, cuGeom, qPartIdx, tuDepth + 1); > splitCbfU |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1); > splitCbfV |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1); > } > @@ -1115,12 +1061,91 @@ > cu.m_cbf[1][absPartIdx + offs] |= (splitCbfU << tuDepth); > cu.m_cbf[2][absPartIdx + offs] |= (splitCbfV << tuDepth); > } > + > + return; > + } > + > + uint32_t log2TrSizeC = log2TrSize - m_hChromaShift; > + uint32_t tuDepthC = tuDepth; > + if (log2TrSizeC < 2) > + { > + X265_CHECK(log2TrSize == 2 && m_csp != X265_CSP_I444 && tuDepth, > "invalid tuDepth\n"); > + if (absPartIdx & 3) > + return; > + log2TrSizeC = 2; > + tuDepthC--; > + } > + > + ShortYuv& resiYuv = m_rqt[cuGeom.depth].tmpResiYuv; > + uint32_t tuSize = 1 << log2TrSizeC; > + uint32_t stride = mode.fencYuv->m_csize; > + const uint32_t sizeIdxC = log2TrSizeC - 2; > + > + uint32_t curPartNum = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > + const SplitType splitType = (m_csp == X265_CSP_I422) ? VERTICAL_SPLIT > : DONT_SPLIT; > + > + TURecurse tuIterator(splitType, curPartNum, absPartIdx); > + do > + { > + uint32_t absPartIdxC = tuIterator.absPartIdxTURelCU; > + > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdxC, tuDepthC, false, > &intraNeighbors); > + > + for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= > TEXT_CHROMA_V; chromaId++) > + { > + TextType ttype = (TextType)chromaId; > + > + const pixel* fenc = mode.fencYuv->getChromaAddr(chromaId, > absPartIdxC); > + pixel* pred = mode.predYuv.getChromaAddr(chromaId, > absPartIdxC); > + int16_t* residual = resiYuv.getChromaAddr(chromaId, > absPartIdxC); > + uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - > (m_hChromaShift + m_vChromaShift)); > + coeff_t* coeffC = cu.m_trCoeff[ttype] + coeffOffsetC; > + pixel* picReconC = > m_frame->m_reconPic->getChromaAddr(chromaId, cu.m_cuAddr, cuGeom.encodeIdx > + absPartIdxC); > + intptr_t picStride = m_frame->m_reconPic->m_strideC; > + > + uint32_t chromaPredMode = cu.m_chromaIntraDir[absPartIdxC]; > + if (chromaPredMode == DM_CHROMA_IDX) > + chromaPredMode = cu.m_lumaIntraDir[(m_csp == > X265_CSP_I444) ? absPartIdxC : 0]; > + if (m_csp == X265_CSP_I422) > + chromaPredMode = > g_chroma422IntraAngleMappingTable[chromaPredMode]; > + > + // init availability pattern > + initAdiPatternChroma(cu, cuGeom, absPartIdxC, intraNeighbors, > chromaId); > + pixel* chromaPred = getAdiChromaBuf(chromaId, tuSize); > + > + // get prediction signal > + predIntraChromaAng(chromaPred, chromaPredMode, pred, stride, > log2TrSizeC, m_csp); > + > + X265_CHECK(!cu.m_transformSkip[ttype][0], "transform skip not > supported at low RD levels\n"); > + > + primitives.calcresidual[sizeIdxC](fenc, pred, residual, > stride); > + uint32_t numSig = m_quant.transformNxN(cu, fenc, stride, > residual, stride, coeffC, log2TrSizeC, ttype, absPartIdxC, false); > + if (numSig) > + { > + m_quant.invtransformNxN(cu.m_tqBypass[0], residual, > stride, coeffC, log2TrSizeC, ttype, true, false, numSig); > + primitives.luma_add_ps[sizeIdxC](picReconC, picStride, > pred, residual, stride, stride); > + cu.setCbfPartRange(1 << tuDepth, ttype, absPartIdxC, > tuIterator.absPartIdxStep); > + } > + else > + { > + // no coded residual, recon = pred > + primitives.luma_copy_pp[sizeIdxC](picReconC, picStride, > pred, stride); > + cu.setCbfPartRange(0, ttype, absPartIdxC, > tuIterator.absPartIdxStep); > + } > + } > + } > + while (tuIterator.isNextSection()); > + > + if (splitType == VERTICAL_SPLIT) > + { > + offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx); > + offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx); > } > } > > void Search::checkIntra(Mode& intraMode, const CUGeom& cuGeom, PartSize > partSize, uint8_t* sharedModes) > { > - uint32_t depth = cuGeom.depth; > CUData& cu = intraMode.cu; > > cu.setPartSizeSubParts(partSize); > @@ -1143,7 +1168,7 @@ > m_entropyCoder.codePredMode(cu.m_predMode[0]); > } > > - m_entropyCoder.codePartSize(cu, 0, depth); > + m_entropyCoder.codePartSize(cu, 0, cuGeom.depth); > m_entropyCoder.codePredInfo(cu, 0); > intraMode.mvBits = m_entropyCoder.getNumberOfWrittenBits(); > > @@ -1153,7 +1178,10 @@ > intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits(); > intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits; > if (m_rdCost.m_psyRd) > - intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, > intraMode.fencYuv->m_buf[0], intraMode.fencYuv->m_size, > intraMode.reconYuv.m_buf[0], intraMode.reconYuv.m_size); > + { > + const Yuv* fencYuv = intraMode.fencYuv; > + intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, > fencYuv->m_buf[0], fencYuv->m_size, intraMode.reconYuv.m_buf[0], > intraMode.reconYuv.m_size); > + } > > updateModeCost(intraMode); > } > @@ -1174,7 +1202,9 @@ > const uint32_t absPartIdx = 0; > > // Reference sample smoothing > - initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX); > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdx, initTuDepth, true, > &intraNeighbors); > + initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX); > > const pixel* fenc = intraMode.fencYuv->m_buf[0]; > uint32_t stride = intraMode.fencYuv->m_size; > @@ -1335,7 +1365,6 @@ > { > CUData& cu = intraMode.cu; > Yuv* reconYuv = &intraMode.reconYuv; > - const Yuv* fencYuv = intraMode.fencYuv; > > X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does > not expect NxN intra\n"); > X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect > to be used in I slices\n"); > @@ -1369,7 +1398,10 @@ > intraMode.totalBits = m_entropyCoder.getNumberOfWrittenBits(); > intraMode.coeffBits = intraMode.totalBits - intraMode.mvBits; > if (m_rdCost.m_psyRd) > + { > + const Yuv* fencYuv = intraMode.fencYuv; > intraMode.psyEnergy = m_rdCost.psyCost(cuGeom.log2CUSize - 2, > fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size); > + } > > m_entropyCoder.store(intraMode.contexts); > updateModeCost(intraMode); > @@ -1404,7 +1436,9 @@ > else > { > // Reference sample smoothing > - initAdiPattern(cu, cuGeom, absPartIdx, initTuDepth, ALL_IDX); > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, absPartIdx, initTuDepth, true, > &intraNeighbors); > + initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, > ALL_IDX); > > // determine set of modes to be tested (using prediction > signal only) > const pixel* fenc = fencYuv->getLumaAddr(absPartIdx); > @@ -1602,8 +1636,10 @@ > log2TrSizeC = 5; > } > > - Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 1); > - Predict::initAdiPatternChroma(cu, cuGeom, 0, tuDepth, 2); > + IntraNeighbors intraNeighbors; > + initIntraNeighbors(cu, 0, tuDepth, false, &intraNeighbors); > + Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 1); // U > + Predict::initAdiPatternChroma(cu, cuGeom, 0, intraNeighbors, 2); // V > cu.getAllowedChromaDir(0, modeList); > > // check chroma modes > @@ -2581,16 +2617,16 @@ > updateModeCost(interMode); > } > > -void Search::residualTransformQuantInter(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2]) > +void Search::residualTransformQuantInter(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t depthRange[2]) > { > + uint32_t depth = cuGeom.depth + tuDepth; > CUData& cu = mode.cu; > X265_CHECK(cu.m_cuDepth[0] == cu.m_cuDepth[absPartIdx], "invalid > depth\n"); > > uint32_t log2TrSize = g_maxLog2CUSize - depth; > - uint32_t tuDepth = depth - cu.m_cuDepth[0]; > > bool bCheckFull = log2TrSize <= depthRange[1]; > - if (cu.m_partSize[0] != SIZE_2Nx2N && depth == > cu.m_cuDepth[absPartIdx] && log2TrSize > depthRange[0]) > + if (cu.m_partSize[0] != SIZE_2Nx2N && !tuDepth && log2TrSize > > depthRange[0]) > bCheckFull = false; > > if (bCheckFull) > @@ -2611,7 +2647,7 @@ > uint32_t setCbf = 1 << tuDepth; > > uint32_t coeffOffsetY = absPartIdx << (LOG2_UNIT_SIZE * 2); > - coeff_t *coeffCurY = cu.m_trCoeff[0] + coeffOffsetY; > + coeff_t* coeffCurY = cu.m_trCoeff[0] + coeffOffsetY; > > uint32_t sizeIdx = log2TrSize - 2; > > @@ -2644,8 +2680,8 @@ > uint32_t strideResiC = resiYuv.m_csize; > > uint32_t coeffOffsetC = coeffOffsetY >> (m_hChromaShift + > m_vChromaShift); > - coeff_t *coeffCurU = cu.m_trCoeff[1] + coeffOffsetC; > - coeff_t *coeffCurV = cu.m_trCoeff[2] + coeffOffsetC; > + coeff_t* coeffCurU = cu.m_trCoeff[1] + coeffOffsetC; > + coeff_t* coeffCurV = cu.m_trCoeff[2] + coeffOffsetC; > bool splitIntoSubTUs = (m_csp == X265_CSP_I422); > > TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : > DONT_SPLIT, absPartIdxStep, absPartIdx); > @@ -2702,16 +2738,16 @@ > uint32_t ycbf = 0, ucbf = 0, vcbf = 0; > for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, > qPartIdx += qNumParts) > { > - residualTransformQuantInter(mode, cuGeom, qPartIdx, depth + > 1, depthRange); > - ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1); > + residualTransformQuantInter(mode, cuGeom, qPartIdx, tuDepth + > 1, depthRange); > + ycbf |= cu.getCbf(qPartIdx, TEXT_LUMA, tuDepth + 1); > ucbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_U, tuDepth + 1); > vcbf |= cu.getCbf(qPartIdx, TEXT_CHROMA_V, tuDepth + 1); > } > - for (uint32_t i = 0; i < 4 * qNumParts; i++) > + for (uint32_t i = 0; i < 4 * qNumParts; ++i) > { > - cu.m_cbf[TEXT_LUMA][absPartIdx + i] |= ycbf << tuDepth; > - cu.m_cbf[TEXT_CHROMA_U][absPartIdx + i] |= ucbf << tuDepth; > - cu.m_cbf[TEXT_CHROMA_V][absPartIdx + i] |= vcbf << tuDepth; > + cu.m_cbf[0][absPartIdx + i] |= ycbf << tuDepth; > + cu.m_cbf[1][absPartIdx + i] |= ucbf << tuDepth; > + cu.m_cbf[2][absPartIdx + i] |= vcbf << tuDepth; > } > } > } > @@ -2769,7 +2805,7 @@ > > uint32_t trSize = 1 << log2TrSize; > const bool splitIntoSubTUs = (m_csp == X265_CSP_I422); > - uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > + uint32_t absPartIdxStep = NUM_CU_PARTITIONS >> ((cu.m_cuDepth[0] + > tuDepthC) << 1); > const Yuv* fencYuv = mode.fencYuv; > > // code full block > @@ -3127,16 +3163,19 @@ > //Encode cbf flags > if (bCodeChroma) > { > - for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= > TEXT_CHROMA_V; chromaId++) > + if (!splitIntoSubTUs) > { > - if (!splitIntoSubTUs) > - m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], > tuDepth); > - else > - { > - offsetSubTUCBFs(cu, (TextType)chromaId, tuDepth, > absPartIdx); > - m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][0], > tuDepth); > - m_entropyCoder.codeQtCbfChroma(cbfFlag[chromaId][1], > tuDepth); > - } > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], > tuDepth); > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], > tuDepth); > + } > + else > + { > + offsetSubTUCBFs(cu, TEXT_CHROMA_U, tuDepth, absPartIdx); > + offsetSubTUCBFs(cu, TEXT_CHROMA_V, tuDepth, absPartIdx); > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][0], > tuDepth); > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_U][1], > tuDepth); > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][0], > tuDepth); > + m_entropyCoder.codeQtCbfChroma(cbfFlag[TEXT_CHROMA_V][1], > tuDepth); > } > } > > diff -r 8d2f418829c8 -r 6b59452a17d7 source/encoder/search.h > --- a/source/encoder/search.h Sat Dec 20 21:27:14 2014 +0900 > +++ b/source/encoder/search.h Tue Dec 23 14:49:59 2014 +0900 > @@ -178,9 +178,9 @@ > void encodeResAndCalcRdSkipCU(Mode& interMode); > > // encode residual without rd-cost > - void residualTransformQuantInter(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t depth, const uint32_t depthRange[2]); > - void residualTransformQuantIntra(Mode& mode, const CUGeom& > cuGeom, uint32_t tuDepth, uint32_t absPartIdx, const uint32_t > depthRange[2]); > - void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, > uint32_t tuDepth, uint32_t absPartIdx); > + void residualTransformQuantInter(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t > depthRange[2]); > + void residualTransformQuantIntra(Mode& mode, const CUGeom& > cuGeom, uint32_t absPartIdx, uint32_t tuDepth, const uint32_t > depthRange[2]); > + void residualQTIntraChroma(Mode& mode, const CUGeom& cuGeom, > uint32_t absPartIdx, uint32_t tuDepth); > > // pick be chroma mode from available using just sa8d costs > void getBestIntraModeChroma(Mode& intraMode, const CUGeom& > cuGeom); > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
