[x265] [PATCH 6 of 6 REV2] analysis: always configure quant QP directly after setting RD lambda

Steve Borho Sun, 26 Apr 2015 10:21:32 -0700

# HG changeset patch
# User Steve Borho <st...@borho.org>
# Date 1429943995 18000
#      Sat Apr 25 01:39:55 2015 -0500
# Node ID 68a13226d586b335c02cade9311e093f0149c42a
# Parent  6a0a37c01cff03cadd44691a0fe447d17ec0b14f
analysis: always configure quant QP directly after setting RD lambda


Basically, everywhere we adjust or assign QP we set quant QP immediately. This
removes a great many ad-hoc calls to setQPforQuant() and hopefully makes it
impossible to miss quant being configured properly.

This patch fixes a layering violation where the frame encoder was setting the
RDO lambdas directly, but only when delta-QP was not enabled.

diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.cpp
--- a/source/common/quant.cpp   Sat Apr 25 00:39:48 2015 -0500
+++ b/source/common/quant.cpp   Sat Apr 25 01:39:55 2015 -0500
@@ -225,16 +225,15 @@
     X265_FREE(m_fencShortBuf);
 }
 
-void Quant::setQPforQuant(const CUData& cu)
+void Quant::setQPforQuant(const CUData& ctu, int qp)
 {
-    m_tqBypass = !!cu.m_tqBypass[0];
+    m_tqBypass = !!ctu.m_tqBypass[0];
     if (m_tqBypass)
         return;
-    m_nr = m_frameNr ? &m_frameNr[cu.m_encData->m_frameEncoderID] : NULL;
-    int qpy = cu.m_qp[0];
-    m_qpParam[TEXT_LUMA].setQpParam(qpy + QP_BD_OFFSET);
-    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, 
cu.m_chromaFormat);
-    setChromaQP(qpy + cu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, 
cu.m_chromaFormat);
+    m_nr = m_frameNr ? &m_frameNr[ctu.m_encData->m_frameEncoderID] : NULL;
+    m_qpParam[TEXT_LUMA].setQpParam(qp + QP_BD_OFFSET);
+    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[0], TEXT_CHROMA_U, 
ctu.m_chromaFormat);
+    setChromaQP(qp + ctu.m_slice->m_pps->chromaQpOffset[1], TEXT_CHROMA_V, 
ctu.m_chromaFormat);
 }
 
 void Quant::setChromaQP(int qpin, TextType ttype, int chFmt)
diff -r 6a0a37c01cff -r 68a13226d586 source/common/quant.h
--- a/source/common/quant.h     Sat Apr 25 00:39:48 2015 -0500
+++ b/source/common/quant.h     Sat Apr 25 01:39:55 2015 -0500
@@ -103,7 +103,7 @@
     bool allocNoiseReduction(const x265_param& param);
 
     /* CU setup */
-    void setQPforQuant(const CUData& cu);
+    void setQPforQuant(const CUData& ctu, int qp);
 
     uint32_t transformNxN(const CUData& cu, const pixel* fenc, uint32_t 
fencStride, const int16_t* residual, uint32_t resiStride, coeff_t* coeff,
                           uint32_t log2TrSize, TextType ttype, uint32_t 
absPartIdx, bool useTransformSkip);
diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp       Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/analysis.cpp       Sat Apr 25 01:39:55 2015 -0500
@@ -146,18 +146,16 @@
 
     if (m_slice->m_pps->bUseDQP)
     {
-        m_aqQP[0] = calculateQpforCuSize(ctu, cuGeom);
-        setLambdaFromQP(*m_slice, m_aqQP[0]);
-        m_aqQP[0] = x265_clip3(QP_MIN, QP_MAX_SPEC, m_aqQP[0]);
-        ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
+        m_aqQP[0] = setLambdaFromQP(ctu, calculateQpforCuSize(ctu, cuGeom));
 
         if (m_slice->m_pps->maxCuDQPDepth)
             initAqQPs(1, ctu, &cuGeom + 1);
     }
     else
-        m_aqQP[0] = m_slice->m_sliceQp;
+        /* adaptive quant disabled, CTU QP is always slice QP, and within spec 
range */
+        m_aqQP[0] = setLambdaFromQP(ctu, m_slice->m_sliceQp);
 
-    m_quant.setQPforQuant(ctu);
+    ctu.setQPSubParts((int8_t)m_aqQP[0], 0, 0);
     m_rqt[0].cur.load(initialContext);
     m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 
0);
 
@@ -231,20 +229,24 @@
         return;
     else if (md.bestMode->cu.isIntra(0))
     {
+        m_quant.m_tqBypass = true;
         md.pred[PRED_LOSSLESS].initCosts();
         md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
         PartSize size = (PartSize)md.pred[PRED_LOSSLESS].cu.m_partSize[0];
         uint8_t* modes = md.pred[PRED_LOSSLESS].cu.m_lumaIntraDir;
         checkIntra(md.pred[PRED_LOSSLESS], cuGeom, size, modes, NULL);
         checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
+        m_quant.m_tqBypass = false;
     }
     else
     {
+        m_quant.m_tqBypass = true;
         md.pred[PRED_LOSSLESS].initCosts();
         md.pred[PRED_LOSSLESS].cu.initLosslessCU(md.bestMode->cu, cuGeom);
         md.pred[PRED_LOSSLESS].predYuv.copyFromYuv(md.bestMode->predYuv);
         encodeResAndCalcRdInterCU(md.pred[PRED_LOSSLESS], cuGeom);
         checkBestMode(md.pred[PRED_LOSSLESS], cuGeom.depth);
+        m_quant.m_tqBypass = false;
     }
 }
 
@@ -269,7 +271,6 @@
             PartSize size = (PartSize)reusePartSizes[zOrder];
             Mode& mode = size == SIZE_2Nx2N ? md.pred[PRED_INTRA] : 
md.pred[PRED_INTRA_NxN];
             mode.cu.initSubCU(parentCTU, cuGeom, qp);
-            m_quant.setQPforQuant(mode.cu);
             checkIntra(mode, cuGeom, size, &reuseModes[zOrder], 
&reuseChromaModes[zOrder]);
             checkBestMode(mode, depth);
 
@@ -287,7 +288,6 @@
     else if (mightNotSplit)
     {
         md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom, qp);
-        m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
         checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL);
         checkBestMode(md.pred[PRED_INTRA], depth);
 
@@ -327,11 +327,7 @@
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= 
m_slice->m_pps->maxCuDQPDepth)
-                {
-                    nextQP = m_aqQP[childGeom.index];
-                    setLambdaFromQP(*m_slice, nextQP);
-                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
-                }
+                    nextQP = setLambdaFromQP(parentCTU, 
m_aqQP[childGeom.index]);
 
                 compressIntraCU(parentCTU, childGeom, zOrder, nextQP);
 
@@ -401,14 +397,9 @@
     {
         slave.m_slice = m_slice;
         slave.m_frame = m_frame;
-        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
+        slave.setLambdaFromQP(md.pred[PRED_2Nx2N].cu, m_rdCost.m_qp);
         slave.invalidateContexts(0);
-
-        if (m_param->rdLevel >= 5)
-        {
-            
slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
-            slave.m_quant.setQPforQuant(md.pred[PRED_2Nx2N].cu);
-        }
+        
slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
     }
 
     /* perform Mode task, repeat until no more work is available */
@@ -419,11 +410,6 @@
             switch (pmode.modes[task])
             {
             case PRED_INTRA:
-                if (&slave != this)
-                {
-                    
slave.m_rqt[pmode.cuGeom.depth].cur.load(m_rqt[pmode.cuGeom.depth].cur);
-                    slave.m_quant.setQPforQuant(md.pred[PRED_INTRA].cu);
-                }
                 slave.checkIntraInInter(md.pred[PRED_INTRA], pmode.cuGeom);
                 if (m_param->rdLevel > 2)
                     slave.encodeIntraInInter(md.pred[PRED_INTRA], 
pmode.cuGeom);
@@ -739,11 +725,7 @@
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= 
m_slice->m_pps->maxCuDQPDepth)
-                {
-                    nextQP = m_aqQP[childGeom.index];
-                    setLambdaFromQP(*m_slice, nextQP);
-                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
-                }
+                    nextQP = setLambdaFromQP(parentCTU, 
m_aqQP[childGeom.index]);
 
                 compressInterCU_dist(parentCTU, childGeom, nextQP);
 
@@ -944,7 +926,6 @@
                     {
                         /* generate recon pixels with no rate distortion 
considerations */
                         CUData& cu = md.bestMode->cu;
-                        m_quant.setQPforQuant(cu);
 
                         uint32_t tuDepthRange[2];
                         cu.getInterTUQtDepthRange(tuDepthRange, 0);
@@ -969,7 +950,6 @@
                     {
                         /* generate recon pixels with no rate distortion 
considerations */
                         CUData& cu = md.bestMode->cu;
-                        m_quant.setQPforQuant(cu);
 
                         uint32_t tuDepthRange[2];
                         cu.getIntraTUQtDepthRange(tuDepthRange, 0);
@@ -1020,11 +1000,7 @@
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= 
m_slice->m_pps->maxCuDQPDepth)
-                {
-                    nextQP = m_aqQP[childGeom.index];
-                    setLambdaFromQP(*m_slice, nextQP);
-                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
-                }
+                    nextQP = setLambdaFromQP(parentCTU, 
m_aqQP[childGeom.index]);
 
                 compressInterCU_rd0_4(parentCTU, childGeom, nextQP);
 
@@ -1228,11 +1204,7 @@
                 m_rqt[nextDepth].cur.load(*nextContext);
 
                 if (m_slice->m_pps->bUseDQP && nextDepth <= 
m_slice->m_pps->maxCuDQPDepth)
-                {
-                    nextQP = m_aqQP[childGeom.index];
-                    setLambdaFromQP(*m_slice, nextQP);
-                    nextQP = x265_clip3(QP_MIN, QP_MAX_SPEC, nextQP);
-                }
+                    nextQP = setLambdaFromQP(parentCTU, 
m_aqQP[childGeom.index]);
 
                 compressInterCU_rd5_6(parentCTU, childGeom, zOrder, nextQP);
 
@@ -1758,7 +1730,6 @@
     CUData& cu = bestMode->cu;
 
     cu.copyFromPic(ctu, cuGeom);
-    m_quant.setQPforQuant(cu);
 
     Yuv& fencYuv = m_modeDepth[cuGeom.depth].fencYuv;
     if (cuGeom.depth)
diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/frameencoder.cpp
--- a/source/encoder/frameencoder.cpp   Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/frameencoder.cpp   Sat Apr 25 01:39:55 2015 -0500
@@ -851,12 +851,11 @@
 
         if (m_param->rc.aqMode || bIsVbv)
         {
+            X265_CHECK(slice->m_pps->bUseDQP, "adaptive quant in use without 
DQP\n");
             int qp = calcQpForCu(cuAddr, curEncData.m_cuStat[cuAddr].baseQp);
             qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
             curEncData.m_rowStat[row].sumQpAq += qp;
         }
-        else
-            tld.analysis.setLambdaFromQP(*slice, slice->m_sliceQp);
 
         if (m_param->bEnableWavefront && !col && row)
         {
diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/rdcost.h
--- a/source/encoder/rdcost.h   Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/rdcost.h   Sat Apr 25 01:39:55 2015 -0500
@@ -40,12 +40,13 @@
     uint32_t  m_chromaDistWeight[2];
     uint32_t  m_psyRdBase;
     uint32_t  m_psyRd;
-    int       m_qp;
+    int       m_qp; /* QP used to configure lambda, may be higher than 
QP_MAX_SPEC but <= QP_MAX_MAX */
 
     void setPsyRdScale(double scale)                { m_psyRdBase = 
(uint32_t)floor(65536.0 * scale * 0.33); }
 
     void setQP(const Slice& slice, int qp)
     {
+        x265_emms(); /* TODO: if the lambda tables were ints, this would not 
be necessary */
         m_qp = qp;
 
         /* Scale PSY RD factor by a slice type factor */
diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.cpp
--- a/source/encoder/search.cpp Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/search.cpp Sat Apr 25 01:39:55 2015 -0500
@@ -163,11 +163,16 @@
     X265_FREE(m_tsRecon);
 }
 
-void Search::setLambdaFromQP(const Slice& slice, int qp)
+int Search::setLambdaFromQP(const CUData& ctu, int qp)
 {
-    x265_emms(); /* TODO: if the lambda tables were ints, this would not be 
necessary */
+    X265_CHECK(qp >= QP_MIN && qp <= QP_MAX_MAX, "QP used for lambda is out of 
range\n");
+
     m_me.setQP(qp);
-    m_rdCost.setQP(slice, qp);
+    m_rdCost.setQP(*m_slice, qp);
+
+    int quantQP = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
+    m_quant.setQPforQuant(ctu, quantQP);
+    return quantQP;
 }
 
 #if CHECKED_BUILD || _DEBUG
@@ -1364,8 +1369,6 @@
     X265_CHECK(cu.m_partSize[0] == SIZE_2Nx2N, "encodeIntraInInter does not 
expect NxN intra\n");
     X265_CHECK(!m_slice->isIntra(), "encodeIntraInInter does not expect to be 
used in I slices\n");
 
-    m_quant.setQPforQuant(cu);
-
     uint32_t tuDepthRange[2];
     cu.getIntraTUQtDepthRange(tuDepthRange, 0);
 
@@ -1888,10 +1891,9 @@
     /* Setup slave Search instance for ME for master's CU */
     if (&slave != this)
     {
-        slave.setLambdaFromQP(*m_slice, m_rdCost.m_qp);
         slave.m_slice = m_slice;
         slave.m_frame = m_frame;
-
+        slave.setLambdaFromQP(pme.mode.cu, m_rdCost.m_qp);
         slave.m_me.setSourcePU(*pme.mode.fencYuv, pme.pu.ctuAddr, 
pme.pu.cuAbsPartIdx, pme.pu.puAbsPartIdx, pme.pu.width, pme.pu.height);
     }
 
@@ -2523,9 +2525,6 @@
     uint32_t log2CUSize = cuGeom.log2CUSize;
     int sizeIdx = log2CUSize - 2;
 
-    uint32_t tqBypass = cu.m_tqBypass[0];
-    m_quant.setQPforQuant(interMode.cu);
-
     resiYuv->subtract(*fencYuv, *predYuv, log2CUSize);
 
     uint32_t tuDepthRange[2];
@@ -2536,6 +2535,7 @@
     Cost costs;
     estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
 
+    uint32_t tqBypass = cu.m_tqBypass[0];
     if (!tqBypass)
     {
         uint32_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], 
fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
diff -r 6a0a37c01cff -r 68a13226d586 source/encoder/search.h
--- a/source/encoder/search.h   Sat Apr 25 00:39:48 2015 -0500
+++ b/source/encoder/search.h   Sat Apr 25 01:39:55 2015 -0500
@@ -287,7 +287,7 @@
     ~Search();
 
     bool     initSearch(const x265_param& param, ScalingList& scalingList);
-    void     setLambdaFromQP(const Slice& slice, int qp);
+    int      setLambdaFromQP(const CUData& ctu, int qp); /* returns real quant 
QP in valid spec range */
 
     // mark temp RD entropy contexts as uninitialized; useful for finding 
loads without stores
     void     invalidateContexts(int fromDepth);
_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

[x265] [PATCH 6 of 6 REV2] analysis: always configure quant QP directly after setting RD lambda

Reply via email to