Agreed, and the averaging should be weighted by CU block size. On Sun, Apr 26, 2015 at 11:17 PM, Steve Borho <st...@borho.org> wrote:
> On Sun, Apr 26, 2015 at 12:21 PM, Steve Borho <st...@borho.org> wrote: > > # HG changeset patch > > # User Steve Borho <st...@borho.org> > > # Date 1429943995 18000 > > # Sat Apr 25 01:39:55 2015 -0500 > > # Node ID 68a13226d586b335c02cade9311e093f0149c42a > > # Parent 6a0a37c01cff03cadd44691a0fe447d17ec0b14f > > analysis: always configure quant QP directly after setting RD lambda > > > > Basically, everywhere we adjust or assign QP we set quant QP > immediately. This > > removes a great many ad-hoc calls to setQPforQuant() and hopefully makes > it > > impossible to miss quant being configured properly. > > > > This patch fixes a layering violation where the frame encoder was > setting the > > RDO lambdas directly, but only when delta-QP was not enabled. > > > > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.cpp > > --- a/source/common/quant.cpp Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/common/quant.cpp Sat Apr 25 01:39:55 2015 -0500 > > @@ -225,16 +225,15 @@ > > X265_FREE(m_fencShortBuf); > > } > > > > -void Quant::setQPforQuant(const CUData& cu) > > +void Quant::setQPforQuant(const CUData& ctu, int qp) > > { > > - m_tqBypass = !!cu.m_tqBypass[0]; > > + m_tqBypass = !!ctu.m_tqBypass[0]; > > if (m_tqBypass) > > return; > > - m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : > NULL; > > - int qpy = cu.m_qp[0]; > > - m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET); > > - setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], > TEXT_CHROMA_U, cu.m_chromaFormat); > > - setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], > TEXT_CHROMA_V, cu.m_chromaFormat); > > + m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : > NULL; > > + m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET); > > + setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], > TEXT_CHROMA_U, ctu.m_chromaFormat); > > + setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1], > TEXT_CHROMA_V, ctu.m_chromaFormat); > > } > > > > void Quant::setChromaQP(int qpin, TextType ttype, int chFmt) > > diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.h > > --- a/source/common/quant.h Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/common/quant.h Sat Apr 25 01:39:55 2015 -0500 > > @@ -103,7 +103,7 @@ > > bool allocNoiseReduction(const x265_param& param); > > > > /* CU setup */ > > - void setQPforQuant(const CUData& cu); > > + void setQPforQuant(const CUData& ctu, int qp); > > > > uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t > fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff, > > uint32_t log2TrSize, TextType ttype, uint32_t > absPartIdx, bool useTransformSkip); > > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/analysis.cpp > > --- a/source/encoder/analysis.cpp Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/encoder/analysis.cpp Sat Apr 25 01:39:55 2015 -0500 > > @@ -146,18 +146,16 @@ > > > > if (m_slice->m_pps->bUseDQP) > > { > > - m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom); > > - setLambdaFromQP(*m_slice, m_aqQP[0]); > > - m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]); > > - ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0); > > + m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu, > cuGeom)); > > > > if (m_slice->m_pps->maxCuDQPDepth) > > initAqQPs(1, ctu, &cuGeom + 1); > > } > > else > > - m_aqQP[0] = m_slice->m_sliceQp; > > + /* adaptive quant disabled, CTU QP is always slice QP, and > within spec range */ > > + m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp); > > > > - m_quant.setQPforQuant(ctu); > > + ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0); > > m_rqt[0].cur.load(initialContext); > > m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, > ctu.m_cuAddr, 0); > > > > @@ -231,20 +229,24 @@ > > return; > > else if (md.bestMode->cu.isIntra(0)) > > { > > + m_quant.m_tqBypass = true; > > md.pred[PRED_LOSSLESS].initCosts(); > > md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, > cuGeom); > > PartSize size = > (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0]; > > uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir; > > checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL); > > checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth); > > + m_quant.m_tqBypass = false; > > } > > else > > { > > + m_quant.m_tqBypass = true; > > md.pred[PRED_LOSSLESS].initCosts(); > > md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, > cuGeom); > > > md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv); > > encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom); > > checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth); > > + m_quant.m_tqBypass = false; > > } > > } > > > > @@ -269,7 +271,6 @@ > > PartSize size = (PartSize)reusePartSizes[zOrder]; > > Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : > md.pred[PRED_INTRA_NxN]; > > mode.cu.initSubCU(parentCTU, cuGeom, qp); > > - m_quant.setQPforQuant(mode.cu); > > checkIntra(mode, cuGeom, size, &reuseModes[zOrder], > &reuseChromaModes[zOrder]); > > checkBestMode(mode, depth); > > > > @@ -287,7 +288,6 @@ > > else if (mightNotSplit) > > { > > md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp); > > - m_quant.setQPforQuant(md.pred[PRED_INTRA].cu); > > checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL); > > checkBestMode(md.pred[PRED_INTRA], depth); > > > > @@ -327,11 +327,7 @@ > > m_rqt[nextDepth].cur.load(*nextContext); > > > > if (m_slice->m_pps->bUseDQP && nextDepth <= > m_slice->m_pps->maxCuDQPDepth) > > - { > > - nextQP = m_aqQP[childGeom.index]; > > - setLambdaFromQP(*m_slice, nextQP); > > - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP); > > - } > > + nextQP = setLambdaFromQP(parentCTU, > m_aqQP[childGeom.index]); > > > > compressIntraCU(parentCTU, childGeom, zOrder, nextQP); > > > > @@ -401,14 +397,9 @@ > > { > > slave.m_slice = m_slice; > > slave.m_frame = m_frame; > > - slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp); > > + slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp); > > slave.invalidateContexts(0); > > - > > - if (m_param->rdLevel >= 5) > > - { > > - > slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur); > > - slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu); > > - } > > + > slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur); > > } > > > > /* perform Mode task, repeat until no more work is available */ > > @@ -419,11 +410,6 @@ > > switch (pmode.modes[task]) > > { > > case PRED_INTRA: > > - if (&slave != this) > > - { > > - > slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur); > > - slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu); > > - } > > slave.checkIntraInInter(md.pred[PRED_INTRA], > pmode.cuGeom); > > if (m_param->rdLevel > 2) > > slave.encodeIntraInInter(md.pred[PRED_INTRA], > pmode.cuGeom); > > @@ -739,11 +725,7 @@ > > m_rqt[nextDepth].cur.load(*nextContext); > > > > if (m_slice->m_pps->bUseDQP && nextDepth <= > m_slice->m_pps->maxCuDQPDepth) > > - { > > - nextQP = m_aqQP[childGeom.index]; > > - setLambdaFromQP(*m_slice, nextQP); > > - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP); > > - } > > + nextQP = setLambdaFromQP(parentCTU, > m_aqQP[childGeom.index]); > > > > compressInterCU_dist(parentCTU, childGeom, nextQP); > > > > @@ -944,7 +926,6 @@ > > { > > /* generate recon pixels with no rate > distortion considerations */ > > CUData& cu = md.bestMode->cu; > > - m_quant.setQPforQuant(cu); > > > > uint32_t tuDepthRange[2]; > > cu.getInterTUQtDepthRange(tuDepthRange, 0); > > @@ -969,7 +950,6 @@ > > { > > /* generate recon pixels with no rate > distortion considerations */ > > CUData& cu = md.bestMode->cu; > > - m_quant.setQPforQuant(cu); > > > > uint32_t tuDepthRange[2]; > > cu.getIntraTUQtDepthRange(tuDepthRange, 0); > > @@ -1020,11 +1000,7 @@ > > m_rqt[nextDepth].cur.load(*nextContext); > > > > if (m_slice->m_pps->bUseDQP && nextDepth <= > m_slice->m_pps->maxCuDQPDepth) > > - { > > - nextQP = m_aqQP[childGeom.index]; > > - setLambdaFromQP(*m_slice, nextQP); > > - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP); > > - } > > + nextQP = setLambdaFromQP(parentCTU, > m_aqQP[childGeom.index]); > > > > compressInterCU_rd0_4(parentCTU, childGeom, nextQP); > > > > @@ -1228,11 +1204,7 @@ > > m_rqt[nextDepth].cur.load(*nextContext); > > > > if (m_slice->m_pps->bUseDQP && nextDepth <= > m_slice->m_pps->maxCuDQPDepth) > > - { > > - nextQP = m_aqQP[childGeom.index]; > > - setLambdaFromQP(*m_slice, nextQP); > > - nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP); > > - } > > + nextQP = setLambdaFromQP(parentCTU, > m_aqQP[childGeom.index]); > > > > compressInterCU_rd5_6(parentCTU, childGeom, zOrder, > nextQP); > > > > @@ -1758,7 +1730,6 @@ > > CUData& cu = bestMode->cu; > > > > cu.copyFromPic(ctu, cuGeom); > > - m_quant.setQPforQuant(cu); > > > > Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv; > > if (cuGeom.depth) > > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/frameencoder.cpp > > --- a/source/encoder/frameencoder.cpp Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/encoder/frameencoder.cpp Sat Apr 25 01:39:55 2015 -0500 > > @@ -851,12 +851,11 @@ > > > > if (m_param->rc.aqMode || bIsVbv) > > { > > + X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use > without DQP\n"); > > int qp = calcQpForCu(cuAddr, > curEncData.m_cuStat[cuAddr].baseQp); > > qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp); > > curEncData.m_rowStat[row].sumQpAq += qp; > > } > > - else > > - tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp); > > Note that this change raises the question of whether calcQpForCu() is > necessary any more in the frame encoder. The returned QP is only used > to update the row sum QP. compressCTU() is free to use an entirely > different QP (or QPs per CU). > > My guess is that rate control would work better if sumQpAq were > incremented *after* the CTU was compressed, based on the average of > the QPs actually used to code the block. And this would also be more > work efficient. > > > if (m_param->bEnableWavefront && !col && row) > > { > > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/rdcost.h > > --- a/source/encoder/rdcost.h Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/encoder/rdcost.h Sat Apr 25 01:39:55 2015 -0500 > > @@ -40,12 +40,13 @@ > > uint32_t m_chromaDistWeight[2]; > > uint32_t m_psyRdBase; > > uint32_t m_psyRd; > > - int m_qp; > > + int m_qp; /* QP used to configure lambda, may be higher than > QP_MAX_SPEC but <= QP_MAX_MAX */ > > > > void setPsyRdScale(double scale) { m_psyRdBase = > (uint32_t)floor(65536.0 * scale * 0.33); } > > > > void setQP(const Slice& slice, int qp) > > { > > + x265_emms(); /* TODO: if the lambda tables were ints, this > would not be necessary */ > > m_qp = qp; > > > > /* Scale PSY RD factor by a slice type factor */ > > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.cpp > > --- a/source/encoder/search.cpp Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500 > > @@ -163,11 +163,16 @@ > > X265_FREE(m_tsRecon); > > } > > > > -void Search::setLambdaFromQP(const Slice& slice, int qp) > > +int Search::setLambdaFromQP(const CUData& ctu, int qp) > > { > > - x265_emms(); /* TODO: if the lambda tables were ints, this would > not be necessary */ > > + X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is > out of range\n"); > > + > > m_me.setQP(qp); > > - m_rdCost.setQP(slice, qp); > > + m_rdCost.setQP(*m_slice, qp); > > + > > + int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp); > > + m_quant.setQPforQuant(ctu, quantQP); > > + return quantQP; > > } > > > > #if CHECKED_BUILD || _DEBUG > > @@ -1364,8 +1369,6 @@ > > X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does > not expect NxN intra\n"); > > X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect > to be used in I slices\n"); > > > > - m_quant.setQPforQuant(cu); > > - > > uint32_t tuDepthRange[2]; > > cu.getIntraTUQtDepthRange(tuDepthRange, 0); > > > > @@ -1888,10 +1891,9 @@ > > /* Setup slave Search instance for ME for master's CU */ > > if (&slave != this) > > { > > - slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp); > > slave.m_slice = m_slice; > > slave.m_frame = m_frame; > > - > > + slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp); > > slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, > pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height); > > } > > > > @@ -2523,9 +2525,6 @@ > > uint32_t log2CUSize = cuGeom.log2CUSize; > > int sizeIdx = log2CUSize - 2; > > > > - uint32_t tqBypass = cu.m_tqBypass[0]; > > - m_quant.setQPforQuant(interMode.cu); > > - > > resiYuv->subtract(*fencYuv, *predYuv, log2CUSize); > > > > uint32_t tuDepthRange[2]; > > @@ -2536,6 +2535,7 @@ > > Cost costs; > > estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, > tuDepthRange); > > > > + uint32_t tqBypass = cu.m_tqBypass[0]; > > if (!tqBypass) > > { > > uint32_t cbf0Dist = > > primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], > fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size); > > diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.h > > --- a/source/encoder/search.h Sat Apr 25 00:39:48 2015 -0500 > > +++ b/source/encoder/search.h Sat Apr 25 01:39:55 2015 -0500 > > @@ -287,7 +287,7 @@ > > ~Search(); > > > > bool initSearch(const x265_param& param, ScalingList& > scalingList); > > - void setLambdaFromQP(const Slice& slice, int qp); > > + int setLambdaFromQP(const CUData& ctu, int qp); /* returns > real quant QP in valid spec range */ > > > > // mark temp RD entropy contexts as uninitialized; useful for > finding loads without stores > > void invalidateContexts(int fromDepth); > > > > -- > Steve Borho > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel