[x265] [PATCH RFC] adapt psy-rd from x264

Steve Borho Thu, 08 May 2014 18:08:16 -0700

# HG changeset patch
# User Sumalatha Polureddy<[email protected]>
# Date 1399547936 -19800
#      Thu May 08 16:48:56 2014 +0530
# Node ID 51e8421442b54929dc6d40f1ae9745faefa1c57c
# Parent  4a36a281e77c479bb9ecff6a09d3b2da5b216668
adapt psy-rd from x264


In this initial implementation, we only use sa8d to estimate the energy of the
source and reconstructed blocks. I'd like feedback specifically on the new
psyCost() and calcPsyRdCost() methods of TComRdCost.

diff -r 4a36a281e77c -r 51e8421442b5 doc/reST/cli.rst
--- a/doc/reST/cli.rst  Thu May 08 17:39:10 2014 -0500
+++ b/doc/reST/cli.rst  Thu May 08 16:48:56 2014 +0530
@@ -617,6 +617,12 @@
 
        **Range of values:** 0: least .. 6: full RDO analysis
 
+.. option:: --psy-rd <float>
+
+       Influence rate distortion optimizations to try to preserve the
+       energy of the source image in the encoded image, at the expense of
+       compression efficiency. Default 1.0
+
 .. option:: --signhide, --no-signhide
 
        Hide sign bit of one coeff per TU (rdo). Default enabled
diff -r 4a36a281e77c -r 51e8421442b5 source/Lib/TLibCommon/TComRdCost.h
--- a/source/Lib/TLibCommon/TComRdCost.h        Thu May 08 17:39:10 2014 -0500
+++ b/source/Lib/TLibCommon/TComRdCost.h        Thu May 08 16:48:56 2014 +0530
@@ -54,16 +54,20 @@
 {
 private:
 
-    uint64_t  m_lambdaMotionSSE;  // m_lambda2 w/ 16 bits of fraction
+    uint64_t  m_lambdaMotionSSE;  // m_lambda2 w/ 8 bits of fraction
 
-    uint64_t  m_lambdaMotionSAD;  // m_lambda w/ 16 bits of fraction
+    uint64_t  m_lambdaMotionSAD;  // m_lambda w/ 8 bits of fraction
 
     uint64_t  m_cbDistortionWeight;
 
     uint64_t  m_crDistortionWeight;
 
+    uint64_t  m_psyRdScale;            // Psy RD strength w/ 8 bits of fraction
+
 public:
 
+    static const pixel zeroPel[MAX_CU_SIZE * MAX_CU_SIZE];
+
     void setLambda(double lambda2, double lambda)
     {
         m_lambdaMotionSSE = (uint64_t)floor(256.0 * lambda2);
@@ -80,6 +84,16 @@
         m_crDistortionWeight = (uint64_t)floor(256.0 * crDistortionWeight);
     }
 
+    void setPsyRdScale(double scale)
+    {
+        m_psyRdScale = (uint64_t)floor(256.0 * scale);
+    }
+
+    inline bool psyRdEnabled() const
+    {
+        return !!m_psyRdScale;
+    }
+
     inline uint64_t calcRdCost(uint32_t distortion, uint32_t bits)
     {
         X265_CHECK(abs(distortion + ((bits * m_lambdaMotionSSE + 128) >> 8)) -
@@ -88,6 +102,23 @@
         return distortion + ((bits * m_lambdaMotionSSE + 128) >> 8);
     }
 
+    /* return the difference in energy between the source block and the recon 
block */
+    inline uint32_t psyCost(int size, pixel *source, intptr_t sstride, pixel 
*recon, intptr_t rstride)
+    {
+        return abs(primitives.sa8d[size](source, sstride, (pixel*)zeroPel, 
MAX_CU_SIZE) -
+                   primitives.sa8d[size](recon,  rstride, (pixel*)zeroPel, 
MAX_CU_SIZE));
+    }
+
+    /* return the RD cost of this prediction, including the effect of psy-rd */
+    inline uint64_t calcPsyRdCost(uint32_t distortion, uint32_t bits, uint32_t 
psycost)
+    {
+        uint64_t tot = bits + (((psycost * m_psyRdScale) + 128) >> 8);
+        X265_CHECK(abs(distortion + ((tot * m_lambdaMotionSSE + 128) >> 8)) -
+                      (distortion + (float)tot * m_lambdaMotionSSE / 256.0) < 
2,
+                   "calcPsyRdCost wrap detected dist: %d, tot %d, lambda: 
%d\n", distortion, tot, (int)m_lambdaMotionSSE);
+        return distortion + ((tot * m_lambdaMotionSSE + 128) >> 8);
+    }
+
     inline uint64_t calcRdSADCost(uint32_t sadCost, uint32_t bits)
     {
         X265_CHECK(abs(sadCost + ((bits * m_lambdaMotionSAD + 128) >> 8)) -
diff -r 4a36a281e77c -r 51e8421442b5 source/Lib/TLibEncoder/TEncCu.cpp
--- a/source/Lib/TLibEncoder/TEncCu.cpp Thu May 08 17:39:10 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu May 08 16:48:56 2014 +0530
@@ -1394,9 +1394,20 @@
     m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), 
outTempCU->getCUSize(0), bCodeDQP);
 
     m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
+    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
 
-    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
-    outTempCU->m_totalCost = 
m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    if (m_rdCost->psyRdEnabled())
+    {
+        int part = g_convertToBit[outTempCU->getCUSize(0)];
+        TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
+        uint32_t psyRdCost = m_rdCost->psyCost(part, 
m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+                                                     
recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+        outTempCU->m_totalCost = 
m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, 
psyRdCost);
+    }
+    else
+    {
+        outTempCU->m_totalCost = 
m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    }
 
     xCheckDQP(outTempCU);
     xCheckBestMode(outBestCU, outTempCU, depth);
@@ -1431,12 +1442,21 @@
     // Encode Coefficients
     bool bCodeDQP = getdQPFlag();
     m_entropyCoder->encodeCoeff(outTempCU, 0, depth, outTempCU->getCUSize(0), 
outTempCU->getCUSize(0), bCodeDQP);
+    m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
+    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
 
-    m_rdGoOnSbacCoder->store(m_rdSbacCoders[depth][CI_TEMP_BEST]);
-
-    outTempCU->m_totalBits = m_entropyCoder->getNumberOfWrittenBits();
-    outTempCU->m_totalCost = 
m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
-
+    if (m_rdCost->psyRdEnabled())
+    {
+        int part = g_convertToBit[outTempCU->getCUSize(0)];
+        TComPicYuv *recon = outTempCU->getPic()->getPicYuvRec();
+        uint32_t psyRdCost = m_rdCost->psyCost(part, 
m_origYuv[depth]->getLumaAddr(), m_origYuv[depth]->getStride(),
+            recon->getLumaAddr(outTempCU->getAddr()), recon->getStride());
+        outTempCU->m_totalCost = 
m_rdCost->calcPsyRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits, 
psyRdCost);
+    }
+    else
+    {
+        outTempCU->m_totalCost = 
m_rdCost->calcRdCost(outTempCU->m_totalDistortion, outTempCU->m_totalBits);
+    }
     xCheckDQP(outTempCU);
     xCheckBestMode(outBestCU, outTempCU, depth);
 }
diff -r 4a36a281e77c -r 51e8421442b5 source/Lib/TLibEncoder/TEncSearch.cpp
--- a/source/Lib/TLibEncoder/TEncSearch.cpp     Thu May 08 17:39:10 2014 -0500
+++ b/source/Lib/TLibEncoder/TEncSearch.cpp     Thu May 08 16:48:56 2014 +0530
@@ -47,8 +47,7 @@
 
 using namespace x265;
 
-//! \ingroup TLibEncoder
-//! \{
+ALIGN_VAR_32(const pixel, TComRdCost::zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = { 
0 };
 
 TEncSearch::TEncSearch()
 {
@@ -2695,7 +2694,17 @@
 
         cu->m_totalBits       = bits;
         cu->m_totalDistortion = distortion;
-        cu->m_totalCost       = m_rdCost->calcRdCost(distortion, bits);
+        if (m_rdCost->psyRdEnabled())
+        {
+            int part = g_convertToBit[cu->getCUSize(0)];
+            uint32_t psyRdCost = m_rdCost->psyCost(part, 
fencYuv->getLumaAddr(), fencYuv->getStride(),
+                                                         
outReconYuv->getLumaAddr(), outReconYuv->getStride());
+            cu->m_totalCost = m_rdCost->calcPsyRdCost(cu->m_totalDistortion, 
cu->m_totalBits, psyRdCost);
+        }
+        else
+        {
+            cu->m_totalCost = m_rdCost->calcRdCost(cu->m_totalDistortion, 
cu->m_totalBits);
+        }
 
         
m_rdGoOnSbacCoder->store(m_rdSbacCoders[cu->getDepth(0)][CI_TEMP_BEST]);
 
@@ -3170,8 +3179,6 @@
 
         const uint32_t numSamplesLuma = 1 << (trSizeLog2 << 1);
 
-        ALIGN_VAR_32(static const pixel, zeroPel[MAX_CU_SIZE * MAX_CU_SIZE]) = 
{ 0 };
-
         for (uint32_t subTUIndex = 0; subTUIndex < 2; subTUIndex++)
         {
             minCost[TEXT_LUMA][subTUIndex]     = MAX_INT64;
@@ -3180,7 +3187,7 @@
         }
 
         int partSize = partitionFromSizes(trWidth, trHeight);
-        uint32_t distY = 
primitives.sse_sp[partSize](resiYuv->getLumaAddr(absTUPartIdx), 
resiYuv->m_width, (pixel*)zeroPel, trWidth);
+        uint32_t distY = 
primitives.sse_sp[partSize](resiYuv->getLumaAddr(absTUPartIdx), 
resiYuv->m_width, (pixel*)TComRdCost::zeroPel, trWidth);
 
         if (outZeroDist)
         {
@@ -3265,7 +3272,7 @@
             {
                 uint32_t subTUBufferOffset = widthC * heightC * 
tuIterator.m_section;
 
-                distU = 
m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU),
 resiYuv->m_cwidth, (pixel*)zeroPel, widthC));
+                distU = 
m_rdCost->scaleChromaDistCb(primitives.sse_sp[partSizeC](resiYuv->getCbAddr(tuIterator.m_absPartIdxTURelCU),
 resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
 
                 if (outZeroDist)
                 {
@@ -3335,7 +3342,7 @@
                     primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, 
stride, 0);
                 }
 
-                distV = 
m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU),
 resiYuv->m_cwidth, (pixel*)zeroPel, widthC));
+                distV = 
m_rdCost->scaleChromaDistCr(primitives.sse_sp[partSizeC](resiYuv->getCrAddr(tuIterator.m_absPartIdxTURelCU),
 resiYuv->m_cwidth, (pixel*)TComRdCost::zeroPel, widthC));
                 if (outZeroDist)
                 {
                     *outZeroDist += distV;
diff -r 4a36a281e77c -r 51e8421442b5 source/common/common.h
--- a/source/common/common.h    Thu May 08 17:39:10 2014 -0500
+++ b/source/common/common.h    Thu May 08 16:48:56 2014 +0530
@@ -165,6 +165,7 @@
 #define X265_LOG2F(x) log2f(x)
 #define X265_LOG2(x)  log2(x)
 #endif
+#define FIX8(f) ((int)(f * (1 << 8) + .5))
 
 /* defined in common.cpp */
 int64_t x265_mdate(void);
diff -r 4a36a281e77c -r 51e8421442b5 source/common/param.cpp
--- a/source/common/param.cpp   Thu May 08 17:39:10 2014 -0500
+++ b/source/common/param.cpp   Thu May 08 16:48:56 2014 +0530
@@ -156,6 +156,7 @@
     param->cbQpOffset = 0;
     param->crQpOffset = 0;
     param->rdPenalty = 0;
+    param->psyRd = 1.0;
 
     /* Rate control options */
     param->rc.vbvMaxBitrate = 0;
@@ -588,6 +589,7 @@
     OPT("cbqpoffs") p->cbQpOffset = atoi(value);
     OPT("crqpoffs") p->crQpOffset = atoi(value);
     OPT("rd") p->rdLevel = atoi(value);
+    OPT("psyrd") p->psyRd = atof(value);
     OPT("signhide") p->bEnableSignHiding = atobool(value);
     OPT("lft") p->bEnableLoopFilter = atobool(value);
     OPT("sao") p->bEnableSAO = atobool(value);
@@ -915,7 +917,7 @@
           "Aq-Mode is out of range");
     CHECK(param->rc.aqStrength < 0 || param->rc.aqStrength > 3,
           "Aq-Strength is out of range");
-
+    CHECK(param->psyRd < 0 || 2.0 < param->psyRd, "Psy-rd strength must be 
between 0 and 2.0");
     CHECK(param->bEnableWavefront < 0, "WaveFrontSynchro cannot be negative");
     CHECK((param->vui.aspectRatioIdc < 0
            || param->vui.aspectRatioIdc > 16)
@@ -1061,6 +1063,7 @@
     x265_log(param, X265_LOG_INFO, "Lookahead / bframes / badapt        : %d / 
%d / %d\n", param->lookaheadDepth, param->bframes, param->bFrameAdaptive);
     x265_log(param, X265_LOG_INFO, "b-pyramid / weightp / weightb / refs: %d / 
%d / %d / %d\n",
              param->bBPyramid, param->bEnableWeightedPred, 
param->bEnableWeightedBiPred, param->maxNumReferences);
+
     switch (param->rc.rateControlMode)
     {
     case X265_RC_ABR:
@@ -1091,6 +1094,7 @@
     TOOLOPT(param->bEnableConstrainedIntra, "cip");
     TOOLOPT(param->bEnableEarlySkip, "esd");
     fprintf(stderr, "rd=%d ", param->rdLevel);
+    fprintf(stderr, "psyrd=%.1lf ", param->psyRd);
 
     TOOLOPT(param->bEnableLoopFilter, "lft");
     if (param->bEnableSAO)
diff -r 4a36a281e77c -r 51e8421442b5 source/encoder/cturow.cpp
--- a/source/encoder/cturow.cpp Thu May 08 17:39:10 2014 -0500
+++ b/source/encoder/cturow.cpp Thu May 08 16:48:56 2014 +0530
@@ -34,7 +34,7 @@
     m_rdGoOnSbacCoder.init(&m_rdGoOnBinCodersCABAC);
     m_sbacCoder.init(&m_binCoderCABAC);
     m_trQuant.init(1 << top->m_quadtreeTULog2MaxSize, top->bEnableRDOQ, 
top->bEnableRDOQTS, top->param->bEnableTSkipFast);
-
+    m_rdCost.setPsyRdScale(top->param->rdLevel >= 4 ? top->param->psyRd : 0);
     m_rdSbacCoders = new TEncSbac * *[g_maxCUDepth + 1];
     m_binCodersCABAC = new TEncBinCABAC * *[g_maxCUDepth + 1];
 
diff -r 4a36a281e77c -r 51e8421442b5 source/x265.cpp
--- a/source/x265.cpp   Thu May 08 17:39:10 2014 -0500
+++ b/source/x265.cpp   Thu May 08 16:48:56 2014 +0530
@@ -140,6 +140,7 @@
     { "cbqpoffs",       required_argument, NULL, 0 },
     { "crqpoffs",       required_argument, NULL, 0 },
     { "rd",             required_argument, NULL, 0 },
+    { "psy-rd",         required_argument, NULL, 0 },
     { "no-signhide",          no_argument, NULL, 0 },
     { "signhide",             no_argument, NULL, 0 },
     { "no-lft",               no_argument, NULL, 0 },
@@ -379,6 +380,7 @@
     H0("   --cbqpoffs <integer>          Chroma Cb QP Offset. Default %d\n", 
param->cbQpOffset);
     H0("   --crqpoffs <integer>          Chroma Cr QP Offset. Default %d\n", 
param->crQpOffset);
     H0("   --rd <0..6>                   Level of RD in mode decision 
0:least....6:full RDO. Default %d\n", param->rdLevel);
+    H0("   --psy-rd <0..2.0>             Strength of psycho-visual 
optimization. Default %f\n", param->psyRd);
     H0("   --[no-]signhide               Hide sign bit of one coeff per TU 
(rdo). Default %s\n", OPT(param->bEnableSignHiding));
     H0("\nLoop filters (deblock and SAO):\n");
     H0("   --[no-]lft                    Enable Deblocking Loop Filter. 
Default %s\n", OPT(param->bEnableLoopFilter));
diff -r 4a36a281e77c -r 51e8421442b5 source/x265.h
--- a/source/x265.h     Thu May 08 17:39:10 2014 -0500
+++ b/source/x265.h     Thu May 08 16:48:56 2014 +0530
@@ -579,6 +579,11 @@
      * efficiency at a major cost of performance. Default is no RDO (0) */
     int       rdLevel;
 
+    /* Psycho-visual rate-distortion strength. Only has an effect in presets
+     * which use RDO. It makes mode decision favor options which preserve the
+     * energy of the source, at the cost of lost compression. Default 1.0 */
+    double     psyRd;
+
     /*== Coding tools ==*/
 
     /* Enable the implicit signaling of the sign bit of the last coefficient of
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

[x265] [PATCH RFC] adapt psy-rd from x264

Reply via email to