Does it make sense to try this for DC coefficients?
On Tue, Jul 22, 2014 at 9:24 PM, Steve Borho <st...@borho.org> wrote: > On 07/22, sumala...@multicorewareinc.com wrote: > > # HG changeset patch > > # User Sumalatha Polureddy<sumala...@multicorewareinc.com> > > # Date 1406032149 -19800 > > # Node ID 37e03dcd2e4f0b5894880ff8c097bd6e11590459 > > # Parent d303b4d860e9f06396a156726dd518d0f41fe796 > > psyrdoq: implementation of psyrdoq > > > > diff -r d303b4d860e9 -r 37e03dcd2e4f > source/Lib/TLibCommon/TComTrQuant.cpp > > --- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 21 22:43:38 2014 > -0500 > > +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 22 17:59:09 2014 > +0530 > > @@ -64,6 +64,8 @@ > > return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); > // min(x, y) > > } > > > > +#define SIGN(x,y) ((x^(y >> 31))-(y >> 31)) > > + > > // > ==================================================================================================================== > > // TComTrQuant class member functions > > // > ==================================================================================================================== > > @@ -307,6 +309,8 @@ > > } > > > > uint32_t TComTrQuant::transformNxN(TComDataCU* cu, > > + pixel* fenc, > > + uint32_t fencStride, > > int16_t* residual, > > uint32_t stride, > > coeff_t* coeff, > > @@ -316,10 +320,10 @@ > > bool useTransformSkip, > > bool curUseRDOQ) > > { > > + int trSize = 1 << log2TrSize; > > if (cu->getCUTransquantBypass(absPartIdx)) > > { > > uint32_t numSig = 0; > > - int trSize = 1 << log2TrSize; > > for (int k = 0; k < trSize; k++) > > { > > for (int j = 0; j < trSize; j++) > > @@ -339,6 +343,12 @@ > > const uint32_t sizeIdx = log2TrSize - 2; > > int useDST = (sizeIdx == 0 && ttype == TEXT_LUMA && > cu->getPredictionMode(absPartIdx) == MODE_INTRA); > > int index = DCT_4x4 + sizeIdx - useDST; > > + if (psyRdoqEnabled()) > > + { > > + // converting pixel to int and putting in separate buffer > to take dct > > + primitives.square_copy_ps[sizeIdx](m_tmpfencBuf, > MAX_CU_SIZE, fenc, fencStride); > > + primitives.dct[index](m_tmpfencBuf, m_tmpfencCoeff, stride); > > + } > > primitives.dct[index](residual, m_tmpCoeff, stride); > > if (m_nr->bNoiseReduction) > > { > > @@ -356,7 +366,7 @@ > > > > if (m_useRDOQ && curUseRDOQ) > > { > > - return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize, > ttype, absPartIdx); > > + return xRateDistOptQuant(cu, m_tmpfencCoeff, m_tmpCoeff, coeff, > log2TrSize, ttype, absPartIdx); > > } > > return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx); > > } > > @@ -505,7 +515,7 @@ > > * Rate distortion optimized quantization for entropy > > * coding engines using probability models like CABAC > > */ > > -uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* > srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, > > +uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* > fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, > > TextType ttype, uint32_t > absPartIdx) > > { > > uint32_t trSize = 1 << log2TrSize; > > @@ -614,7 +624,7 @@ > > { > > level = xGetCodedLevel(costCoeff[scanPos], > curCostSig, costSig[scanPos], > > levelDouble, maxAbsLevel, > baseLevel, greaterOneBits, levelAbsBits, goRiceParam, > > - c1c2Idx, qbits, scaleFactor, > 1); > > + c1c2Idx, qbits, scaleFactor, > 1, srcCoeff[blkPos], fencCoeff[blkPos]); > > sigRateDelta[blkPos] = 0; > > } > > else > > @@ -631,7 +641,7 @@ > > curCostSig = xGetRateSigCoef(1, ctxSig); > > level = xGetCodedLevel(costCoeff[scanPos], > curCostSig, costSig[scanPos], > > levelDouble, > maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam, > > - c1c2Idx, qbits, > scaleFactor, 0); > > + c1c2Idx, qbits, > scaleFactor, 0, srcCoeff[blkPos], fencCoeff[blkPos]); > > } > > else > > { > > @@ -1126,7 +1136,9 @@ > > uint32_t c1c2Idx, > > int qbits, > > double scaleFactor, > > - bool last) const > > + bool last, > > + int signCoef, > > + int origCoef) const > > { > > uint32_t bestAbsLevel = 0; > > > > @@ -1155,7 +1167,18 @@ > > for (int absLevel = maxAbsLevel; absLevel >= minAbsLevel; > absLevel--) > > { > > X265_CHECK(fabs((double)err2 - double(levelDouble - (absLevel > << qbits)) * double(levelDouble - (absLevel << qbits)) * scaleFactor) < > 1e-5, "err2 check failure\n"); > > - double curCost = err2 + xGetICRateCost(absLevel, diffLevel, > greaterOneBits, levelAbsBits, absGoRice, c1c2Idx); > > + double curCost = 0; > > in x264, psy-trellis is not applied to the DC coeff nor to chroma; we > need to at least try to do that here to measure the effect. > > > + if (psyRdoqEnabled()) > > + { > > + int unquantAbsLevel = (maxAbsLevel << qbits); > > + int predictedCoef = origCoef - signCoef; > > + int psyValue = abs(unquantAbsLevel + SIGN(predictedCoef, > signCoef)); // test with differnt cases to check whether SIGN is required > > + curCost = err2 + xGetICRateCost(absLevel, diffLevel, > greaterOneBits, levelAbsBits, absGoRice, c1c2Idx) - (m_psyRdoqScale * > psyValue * m_lambda); > > + } > > + else > > + { > > + curCost = err2 + xGetICRateCost(absLevel, diffLevel, > greaterOneBits, levelAbsBits, absGoRice, c1c2Idx); > > + } > > style: no braces around single-line expressions > > > curCost += curCostSig; > > > > if (curCost < bestCodedCost) > > diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibCommon/TComTrQuant.h > > --- a/source/Lib/TLibCommon/TComTrQuant.h Mon Jul 21 22:43:38 2014 > -0500 > > +++ b/source/Lib/TLibCommon/TComTrQuant.h Tue Jul 22 17:59:09 2014 > +0530 > > @@ -120,8 +120,18 @@ > > // initialize class > > void init(bool useRDOQ); > > > > + void setPsyRdoqScale(double scale) > > + { > > + m_psyRdoqScale = scale; > > + } > > + > > + inline bool psyRdoqEnabled() const > > + { > > + return m_psyRdoqScale != 0; > > + } > > Don't use access methods for an internal member variable; I intend to > remove the similar methods from RDCost. It may be faster to keep a bool > for whether psy-rdoq is enabled, instead of a double comparison > > > + > > // transform & inverse transform functions > > - uint32_t transformNxN(TComDataCU* cu, int16_t* residual, uint32_t > stride, coeff_t* coeff, uint32_t log2TrSize, > > + uint32_t transformNxN(TComDataCU* cu, pixel* fenc, uint32_t > fencStride, int16_t* residual, uint32_t stride, coeff_t* coeff, uint32_t > log2TrSize, > > TextType ttype, uint32_t absPartIdx, bool > useTransformSkip = false, bool curUseRDOQ = true); > > > > void invtransformNxN(bool transQuantBypass, int16_t* residual, > uint32_t stride, coeff_t* coeff, uint32_t log2TrSize, TextType ttype, bool > bIntra, bool useTransformSkip, uint32_t numSig); > > @@ -207,7 +217,11 @@ > > bool m_useRDOQ; > > bool m_scalingListEnabledFlag; > > > > - int32_t* m_tmpCoeff; > > + double m_psyRdoqScale; > > + int32_t* m_tmpCoeff; // tmp buffer which stores the dct of residual > > + int32_t m_tmpfencCoeff[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer > which stores the dct of orig pix buffer > > + int16_t m_tmpfencBuf[MAX_CU_SIZE * MAX_CU_SIZE]; // tmp buffer > which stores short of orig pixel > > These buffers should have better names so they don't require trailing > comments, and use MAX_TS_SIZE. > > coeff_t m_resiDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE]; > coeff_t m_fencDctCoeff[MAX_TS_SIZE * MAX_TS_SIZE]; > int16_t m_fencShortBuf[MAX_TS_SIZE * MAX_TS_SIZE]; > > > int32_t* > m_quantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM]; > ///< array of quantization matrix coefficient 4x4 > > int32_t* > m_dequantCoef[ScalingList::NUM_SIZES][ScalingList::NUM_LISTS][ScalingList::NUM_REM]; > ///< array of dequantization matrix coefficient 4x4 > > > > @@ -221,11 +235,11 @@ > > uint32_t xQuant(TComDataCU* cu, int32_t* src, coeff_t* dst, > uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx); > > > > // RDOQ functions > > - uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, > coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t > absPartIdx); > > + uint32_t xRateDistOptQuant(TComDataCU* cu, int32_t* fencCoeff, > int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, > uint32_t absPartIdx); > > > > inline uint32_t xGetCodedLevel(double& codedCost, const double > curCostSig, double& codedCostSig, int levelDouble, > > uint32_t maxAbsLevel, uint32_t > baseLevel, const int *greaterOneBits, const int *levelAbsBits, uint32_t > absGoRice, > > - uint32_t c1c2Idx, int qbits, double > scale, bool bLast) const; > > + uint32_t c1c2Idx, int qbits, double > scale, bool bLast, int sign_coef, int orig_coef) const; > > > > inline double xGetICRateCost(uint32_t absLevel, int32_t diffLevel, > const int *greaterOneBits, const int *levelAbsBits, uint32_t absGoRice, > uint32_t c1c2Idx) const; > > > > diff -r d303b4d860e9 -r 37e03dcd2e4f > source/Lib/TLibEncoder/TEncSearch.cpp > > --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jul 21 22:43:38 2014 > -0500 > > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Jul 22 17:59:09 2014 > +0530 > > @@ -438,7 +438,7 @@ > > <snip TEncSearch diffs> > > > diff -r d303b4d860e9 -r 37e03dcd2e4f source/common/param.cpp > > --- a/source/common/param.cpp Mon Jul 21 22:43:38 2014 -0500 > > +++ b/source/common/param.cpp Tue Jul 22 17:59:09 2014 +0530 > > @@ -163,6 +163,7 @@ > > param->crQpOffset = 0; > > param->rdPenalty = 0; > > param->psyRd = 0.0; > > + param->psyRdoq = 0.5; > > The patch which is pushed should have the default be 0.0 - enabing the > psycho-visual optimations by default should be a seperate patch > > > param->bIntraInBFrames = 1; > > param->bLossless = 0; > > param->bCULossless = 0; > > @@ -413,11 +414,13 @@ > > { > > param->rc.aqStrength = 0.0; > > param->psyRd = 0.0; > > + param->psyRdoq = 0.0; > > } > > else if (!strcmp(tune, "ssim")) > > { > > param->rc.aqMode = X265_AQ_AUTO_VARIANCE; > > param->psyRd = 0.0; > > + param->psyRdoq = 0.0; > > } > > else if (!strcmp(tune, "fastdecode") || > > !strcmp(tune, "fast-decode")) > > later in this file are warnings that are emitted if psy-rd is enabled > and SSIM or PSNR are being reported. those checks need to include > psy-rdoq > > also, the psy-rdoq scale factor must be logged, if it is enabled. > > > diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/analysis.cpp > > --- a/source/encoder/analysis.cpp Mon Jul 21 22:43:38 2014 -0500 > > +++ b/source/encoder/analysis.cpp Tue Jul 22 17:59:09 2014 +0530 > > @@ -49,6 +49,7 @@ > > { > > m_param = top->m_param; > > m_trQuant.init(top->m_bEnableRDOQ); > > + m_trQuant.setPsyRdoqScale(top->m_param->psyRdoq); > > nit: drop top-> > > > if (!top->m_scalingList.m_bEnabled) > > { > > @@ -1874,7 +1875,7 @@ > > primitives.chroma[m_param->internalCsp].sub_ps[part](dst, > dststride, src1, src2, src1stride, src2stride); > > > > // Residual encoding > > - residualTransformQuantInter(cu, 0, m_tmpResiYuv[depth], > cu->getDepth(0), true); > > + residualTransformQuantInter(cu, 0, m_origYuv[0], > m_tmpResiYuv[depth], cu->getDepth(0), true); > > checkDQP(cu); > > > > if (lcu->getMergeFlag(absPartIdx) && > cu->getPartitionSize(0) == SIZE_2Nx2N && !cu->getQtRootCbf(0)) > > diff -r d303b4d860e9 -r 37e03dcd2e4f source/encoder/encoder.cpp > > --- a/source/encoder/encoder.cpp Mon Jul 21 22:43:38 2014 -0500 > > +++ b/source/encoder/encoder.cpp Tue Jul 22 17:59:09 2014 +0530 > > @@ -1296,8 +1296,17 @@ > > p->crQpOffset += 6; > > } > > > > - // disable RDOQ if psy-rd is enabled; until we make it psy-aware > > - m_bEnableRDOQ = p->psyRd == 0.0 && p->rdLevel >= 4; > > + // disable RDOQ when rdlevel < 4 > > + // disable RDOQ when psyrd is enabled and psyrdoq is disabled > > + if (p->rdLevel >= 4) > > + { > > + if (p->psyRd && !p->psyRdoq) > > + m_bEnableRDOQ = 0; > > + else > > + m_bEnableRDOQ = 1; > > + } > > + else > > + m_bEnableRDOQ = 0; > > conciseness is a virtue: > > // disable RDOQ if rdlevel < 4 or if psy-rd is enabled without psy-rdoq > m_bEnableRDOQ = p->rdLevel >= 4 ? (p->psyRdoq || !p->psyRd) : 0; > > > if (p->bLossless) > > { > > diff -r d303b4d860e9 -r 37e03dcd2e4f source/x265.h > > --- a/source/x265.h Mon Jul 21 22:43:38 2014 -0500 > > +++ b/source/x265.h Tue Jul 22 17:59:09 2014 +0530 > > @@ -613,6 +613,10 @@ > > * energy of the source, at the cost of lost compression. Default > 0.0 */ > > double psyRd; > > > > + /* Psycho-visual rate-distortion opt quantize strength. Only has an > effect in presets > > + * which use RDOQ. Default 0.0 */ > > white-space > > > + double psyRdoq; > > And of course, the final patch must bump X265_BUILD, add the CLI option and > param_parse support, and update doc/reST/cli.txt > > -- > Steve Borho > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel