Re: [x265] [PATCH 2 of 2 V2] framefilter: comment cleanups, use pixel data type
On Fri, Sep 13, 2013 at 11:11 PM, Steve Borho st...@borho.org wrote: # HG changeset patch # User Steve Borho st...@borho.org # Date 1379053732 18000 # Fri Sep 13 01:28:52 2013 -0500 # Node ID b8bb66cd21bcab6505b7fe321e95875861c84bda # Parent 2614338b90d3533c2760a94fa10ffb5dee57910c framefilter: comment cleanups, use pixel data type diff -r 2614338b90d3 -r b8bb66cd21bc source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Sep 13 10:55:03 2013 -0500 +++ b/source/encoder/frameencoder.cpp Fri Sep 13 01:28:52 2013 -0500 @@ -937,14 +937,13 @@ return; } } +// this row of CTUs has been encoded -// Active Loopfilter +// Run row-wise loop filters if (row = m_filterRowDelay) { m_frameFilter.processRow(row - m_filterRowDelay); } - -// this row of CTUs has been encoded if (row == m_numRows - 1) { for(int i = m_numRows - m_filterRowDelay; i m_numRows; i++) diff -r 2614338b90d3 -r b8bb66cd21bc source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cppFri Sep 13 10:55:03 2013 -0500 +++ b/source/encoder/framefilter.cppFri Sep 13 01:28:52 2013 -0500 @@ -170,7 +170,7 @@ m_sao.processSaoUnitRow(saoParam-saoLcuParam[2], row - 1, 2); } -// TODO: this code is NOT VERIFY because TransformSkip and PCM mode have some bugs, they always not active! +// TODO: this code is NOT VERIFIED because TransformSkip and PCM modes have some bugs, they are never enabled bool bPCMFilter = (m_pic-getSlice()-getSPS()-getUsePCM() m_pic-getSlice()-getSPS()-getPCMFilterDisableFlag()) ? true : false; if (bPCMFilter || m_pic-getSlice()-getPPS()-getTransquantBypassEnableFlag()) { @@ -187,8 +187,6 @@ // this row of CTUs has been encoded -// TODO: extend margins for motion reference - if (row 0) { processRowPost(row - 1); @@ -209,7 +207,7 @@ m_sao.processSaoUnitRow(saoParam-saoLcuParam[2], row, 2); } -// TODO: this code is NOT VERIFY because TransformSkip and PCM mode have some bugs, they always not active! +// TODO: this code is NOT VERIFIED because TransformSkip and PCM modes have some bugs, they are never enabled bool bPCMFilter = (m_pic-getSlice()-getSPS()-getUsePCM() m_pic-getSlice()-getSPS()-getPCMFilterDisableFlag()) ? true : false; if (bPCMFilter || m_pic-getSlice()-getPPS()-getTransquantBypassEnableFlag()) { @@ -234,10 +232,6 @@ const int lastH = ((recon-getHeight() % g_maxCUHeight) ? (recon-getHeight() % g_maxCUHeight) : g_maxCUHeight); const int realH = (row != m_numRows - 1) ? g_maxCUHeight : lastH; -// TODO: Remove when we confirm below code is right -//recon-xExtendPicCompBorder(recon-getLumaAddr(), recon-getStride(), recon-getWidth(), recon-getHeight(), recon-m_lumaMarginX, recon-m_lumaMarginY); -//recon-xExtendPicCompBorder(recon-getCbAddr(), recon-getCStride(), recon-getWidth() 1, recon-getHeight() 1, recon-m_chromaMarginX, recon-m_chromaMarginY); -//recon-xExtendPicCompBorder(recon-getCrAddr(), recon-getCStride(), recon-getWidth() 1, recon-getHeight() 1, recon-m_chromaMarginX, recon-m_chromaMarginY); // Border extend Left and Right primitives.extendRowBorder(recon-getLumaAddr(lineStartCUAddr), recon-getStride(), recon-getWidth(), realH, recon-getLumaMarginX()); primitives.extendRowBorder(recon-getCbAddr(lineStartCUAddr), recon-getCStride(), recon-getWidth() 1, realH 1, recon-getChromaMarginX()); @@ -248,9 +242,9 @@ { const intptr_t stride = recon-getStride(); const intptr_t strideC = recon-getCStride(); -Pel *pixY = recon-getLumaAddr(lineStartCUAddr) - recon-getLumaMarginX(); -Pel *pixU = recon-getCbAddr(lineStartCUAddr) - recon-getChromaMarginX(); -Pel *pixV = recon-getCrAddr(lineStartCUAddr) - recon-getChromaMarginX(); +pixel *pixY = recon-getLumaAddr(lineStartCUAddr) - recon-getLumaMarginX(); +pixel *pixU = recon-getCbAddr(lineStartCUAddr) - recon-getChromaMarginX(); +pixel *pixV = recon-getCrAddr(lineStartCUAddr) - recon-getChromaMarginX(); Not sure why Pel has been changed to pixel (dropping 16-bit support altogether ?) since getLuma/Cb/CrAddr still return Pel. Anyways, the sizeof operator in the following memcpy's should also be changed to pixel. for (int y = 0; y recon-getLumaMarginY(); y++) { @@ -269,9 +263,9 @@ { const intptr_t stride = recon-getStride(); const intptr_t strideC = recon-getCStride(); -Pel *pixY = recon-getLumaAddr(lineStartCUAddr) - recon-getLumaMarginX() + (realH - 1) * stride; -Pel *pixU = recon-getCbAddr(lineStartCUAddr) - recon-getChromaMarginX() + ((realH 1) - 1) * strideC; -Pel *pixV =
Re: [x265] [PATCH]RDLevel: Disable RDOQTS when RDO and/or TS are disabled.
Agreed. Resending the patch - alongwith a few cleanups to make it easier to track all elements in the param struct. On Mon, Sep 16, 2013 at 3:27 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On Mon, Sep 16, 2013 at 10:47 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Yes, this particular param flag is initialised to 1 (highest quality setting) in x265_param_default. I'm setting it to zero for a certain set of user defined parameters. What's the point of the first check ( http://mailman.videolan.org/pipermail/x265-devel/2013-September/000783.html ) which sets it to 1 then? - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH v2]: CLI: remove CLI option rdoqts; cleanup
Thanks for pointing that out, that was indeed unintentional. Pushed the fix. On Mon, Sep 16, 2013 at 9:11 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On Mon, Sep 16, 2013 at 1:30 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1379334518 -19800 # Node ID 46b065f7d676e7ff26c46a40f1790bdae290d7fa # Parent 881444f5910b2b0e0f286a6ca47fcc743515cbb2 CLI options: Eliminate rdoqts option; cleanup 1. Eliminate rdoqts CLI option: enabled when rdoq and ts are both enabled. 2. Rearrange default initialisations in x265_param_ t structure diff -r 881444f5910b -r 46b065f7d676 source/common/common.cpp --- a/source/common/common.cppMon Sep 16 09:41:34 2013 +0530 +++ b/source/common/common.cppMon Sep 16 17:58:38 2013 +0530 @@ -115,45 +115,58 @@ va_end(arg); } -extern C void x265_param_default(x265_param_t *param) This looks incorrect. The function needs to be exported and to be able to be called from C. diff -r 881444f5910b -r 46b065f7d676 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Sep 16 09:41:34 2013 +0530 +++ b/source/encoder/encoder.cppMon Sep 16 17:58:38 2013 +0530 @@ -219,6 +219,11 @@ _param-rc.rateControlMode = X265_RC_ABR; } +if(!(_param-bEnableRDOQ _param-bEnableTransformSkip)) +{ +_param-bEnableRDOQTS = 0; +} Please add a note in the commit message about this. Rest is OK. - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] do not use std;:list for the class AccessUnit
FrameEncoder and compressFrame needs to be refactored for mallocs and failures to be handled correctly. On Thu, Sep 19, 2013 at 1:31 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1379577682 -19800 # Node ID 92e1ac03b081eccd8bc797142cc22033c87d475d # Parent 26d6f155f8df69147f40f4945d99c29a52988c56 do not use std;:list for the class AccessUnit Removed std::list from encoder and nalunits diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibCommon/NAL.h --- a/source/Lib/TLibCommon/NAL.h Wed Sep 18 16:13:33 2013 -0500 +++ b/source/Lib/TLibCommon/NAL.h Thu Sep 19 13:31:22 2013 +0530 @@ -114,6 +114,7 @@ * emulation_prevention_three_byte symbols. */ NALUnitEBSP(OutputNALUnit nalu); +void init(OutputNALUnit nalu); }; } //! \} diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibEncoder/NALwrite.h --- a/source/Lib/TLibEncoder/NALwrite.h Wed Sep 18 16:13:33 2013 -0500 +++ b/source/Lib/TLibEncoder/NALwrite.h Thu Sep 19 13:31:22 2013 +0530 @@ -83,6 +83,14 @@ write(m_nalUnitData, nalu, m_packetSize); } +inline void NALUnitEBSP::init(OutputNALUnit nalu) +{ +m_nalUnitType = nalu.m_nalUnitType; +m_temporalId = nalu.m_temporalId; +m_reservedZero6Bits = nalu.m_reservedZero6Bits; +write(m_nalUnitData, nalu, m_packetSize); +} + void copyNaluData(OutputNALUnit naluDest, const OutputNALUnit naluSrc); } diff -r 26d6f155f8df -r 92e1ac03b081 source/Lib/TLibEncoder/TEncTop.cpp --- a/source/Lib/TLibEncoder/TEncTop.cppWed Sep 18 16:13:33 2013 -0500 +++ b/source/Lib/TLibEncoder/TEncTop.cppThu Sep 19 13:31:22 2013 +0530 @@ -114,7 +114,7 @@ for (int i = 0; i param.frameNumThreads; i++) { // Ensure frame encoder is idle before destroying it -AccessUnit tmp; +NALUnitEBSP **tmp = NULL; m_frameEncoder[i].getEncodedPicture(tmp); m_frameEncoder[i].destroy(); } @@ -155,19 +155,19 @@ } } -int TEncTop::getStreamHeaders(AccessUnit accessUnit) +int TEncTop::getStreamHeaders(NALUnitEBSP **nalunits) { -return m_frameEncoder-getStreamHeaders(accessUnit); +return m_frameEncoder-getStreamHeaders(nalunits); } /** \param flush force encoder to encode a frame \param pic_in input original YUV picture or NULL \param pic_out pointer to reconstructed picture struct - \param accessUnitsOut output bitstream + \param nalunitsoutput bitstream \retval number of encoded pictures */ -int TEncTop::encode(bool flush, const x265_picture_t* pic_in, x265_picture_t *pic_out, AccessUnit accessUnitOut) +int TEncTop::encode(bool flush, const x265_picture_t* pic_in, x265_picture_t *pic_out, NALUnitEBSP **nalunits) { if (pic_in) { @@ -207,7 +207,7 @@ // getEncodedPicture() should block until the FrameEncoder has completed // encoding the frame. This is how back-pressure through the API is // accomplished when the encoder is full. -TComPic *out = curEncoder-getEncodedPicture(accessUnitOut); +TComPic *out = curEncoder-getEncodedPicture(nalunits); if (!out flush) { @@ -221,7 +221,7 @@ { curEncoder = m_frameEncoder[m_curEncoder]; m_curEncoder = (m_curEncoder + 1) % param.frameNumThreads; -out = curEncoder-getEncodedPicture(accessUnitOut); +out = curEncoder-getEncodedPicture(nalunits); } while (!out flushed != m_curEncoder); } @@ -253,7 +253,7 @@ pic_out-stride[2] = recpic-getCStride(); } -double bits = calculateHashAndPSNR(out, accessUnitOut); +double bits = calculateHashAndPSNR(out, nalunits); // Allow this frame to be recycled if no frame encoders are using it for reference ATOMIC_DEC(out-m_countRefEncoders); @@ -481,7 +481,7 @@ /* Returns Number of bits in current encoded pic */ -double TEncTop::calculateHashAndPSNR(TComPic* pic, AccessUnit accessUnit) +double TEncTop::calculateHashAndPSNR(TComPic* pic, NALUnitEBSP **nalunits) { TComPicYuv* recon = pic-getPicYuvRec(); TComPicYuv* orig = pic-getPicYuvOrg(); @@ -537,8 +537,12 @@ OutputNALUnit onalu(NAL_UNIT_SUFFIX_SEI, 0); m_frameEncoder-m_seiWriter.writeSEImessage(onalu.m_Bitstream, sei_recon_picture_digest, pic-getSlice()-getSPS()); writeRBSPTrailingBits(onalu.m_Bitstream); - -accessUnit.insert(accessUnit.end(), new NALUnitEBSP(onalu)); + +int count = 0; +while(nalunits[count] != NULL) +count++; +nalunits[count] = (NALUnitEBSP *)X265_MALLOC(NALUnitEBSP, 1); +nalunits[count]-init(onalu); } /* calculate the size of the access unit, excluding: @@ -546,13
Re: [x265] [PATCH] lookahead: fix crash for I frame cost estimation
Can you try sending this as a fresh patch to the mailing list? Does not apply cleanly to the parent node. On Thu, Sep 19, 2013 at 2:46 PM, Deepthi Devaki Akkoorath deepthidev...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Devaki deepthidev...@multicorewareinc.com # Date 1379582068 -19800 # Node ID d52de033d7dde00255e9d55ece138c33fd61 # Parent 26d6f155f8df69147f40f4945d99c29a52988c56 lookahead: fix crash for I frame cost estimation diff -r 26d6f155f8df -r d52de033d7dd source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Wed Sep 18 16:13:33 2013 -0500 +++ b/source/encoder/slicetype.cpp Thu Sep 19 14:44:28 2013 +0530 @@ -360,44 +360,46 @@ mvmax.x = (uint16_t)((widthInCU - cux - 1) * cuSize + 8); mvmax.y = (uint16_t)((heightInCU - cuy - 1) * cuSize + 8); -for (int i = 0; i 1 + bBidir; i++) +if (p0 != p1) { -if (!bDoSearch[i]) +for (int i = 0; i 1 + bBidir; i++) { -/* Use previously calculated cost */ +if (!bDoSearch[i]) +{ +/* Use previously calculated cost */ +COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1); +continue; +} +int numc = 0; +MV mvc[4], mvp; +MV *fenc_mv = fenc_mvs[i]; + +/* Reverse-order MV prediction. */ +mvc[0] = 0; +mvc[2] = 0; +#define MVC(mv) mvc[numc++] = mv; +if (cux widthInCU - 1) +MVC(fenc_mv[1]); +if (cuy heightInCU - 1) +{ +MVC(fenc_mv[widthInCU]); +if (cux 0) +MVC(fenc_mv[widthInCU - 1]); +if (cux widthInCU - 1) +MVC(fenc_mv[widthInCU + 1]); +} +#undef MVC +if (numc = 1) +mvp = mvc[0]; +else +{ +x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]); +} + +*fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin, mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]); COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1); -continue; } -int numc = 0; -MV mvc[4], mvp; -MV *fenc_mv = fenc_mvs[i]; - -/* Reverse-order MV prediction. */ -mvc[0] = 0; -mvc[2] = 0; -#define MVC(mv) mvc[numc++] = mv; -if (cux widthInCU - 1) -MVC(fenc_mv[1]); -if (cuy heightInCU - 1) -{ -MVC(fenc_mv[widthInCU]); -if (cux 0) -MVC(fenc_mv[widthInCU - 1]); -if (cux widthInCU - 1) -MVC(fenc_mv[widthInCU + 1]); -} -#undef MVC -if (numc = 1) -mvp = mvc[0]; -else -{ -x265_median_mv(mvp, mvc[0], mvc[1], mvc[2]); -} - -*fenc_costs[i] = me.motionEstimate(i ? fref1 : fref0, mvmin, mvmax, mvp, numc, mvc, merange, *fenc_mvs[i]); -COPY2_IF_LT(bcost, *fenc_costs[i], listused, i + 1); } - if (!fenc-bIntraCalculated) { int nLog2SizeMinus2 = g_convertToBit[cuSize]; // partition size ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] Commit c56e392b2c68 fails to link with MinGW
Thanks, pushed fix. On Tue, Sep 24, 2013 at 2:14 PM, Nikos Barkas nikbar2...@yahoo.com wrote: There is a linking problem with revision c56e392b2c68 on MinGW. The error comes from common.cpp and is our old friend: undefined reference to `__sync_val_compare_and_swap_4' The file common.cpp must be added to the list of files for which -march=i686 is applied in source\common\CMakeLists.txt. Best regards Nikos ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] Fixed the --me 4 option error for Full search
# HG changeset patch # User sai...@multicorewareinc.com # Date 1381818060 -19800 # Tue Oct 15 11:51:00 2013 +0530 # Branch hsa # Node ID 5725e4986fa142c38ed7416f303f93604135c603 # Parent 0004c182f326197019a7426f080dd04ff99f39d3 Fixed the --me 4 cli option error diff -r 0004c182f326 -r 5725e4986fa1 source/encoder/motion.cpp --- a/source/encoder/motion.cpp Thu Oct 10 19:12:06 2013 -0500 +++ b/source/encoder/motion.cpp Tue Oct 15 11:51:00 2013 +0530 @@ -732,8 +732,9 @@ break; } } +break; } -break; + case X265_FULL_SEARCH: { // dead slow exhaustive search, but at least it uses sad_x4() @@ -767,8 +768,9 @@ COST_MV(tmv.x, tmv.y); } } +break; } - + default: assert(0); break; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Fixed issue with chroma 2xN block asm code
Pushed. On Thu, Oct 17, 2013 at 12:52 PM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1381994531 -19800 # Node ID 68f97c7f08392d387046736ae3e86095c653fd05 # Parent 5914800260d95a9bac7ce3eec2291e5c781e8422 Fixed issue with chroma 2xN block asm code diff -r 5914800260d9 -r 68f97c7f0839 source/common/x86/ipfilter8.asm --- a/source/common/x86/ipfilter8.asm Wed Oct 16 21:10:13 2013 +0530 +++ b/source/common/x86/ipfilter8.asm Thu Oct 17 12:52:11 2013 +0530 @@ -54,7 +54,7 @@ pmulhrsw%2, %3 packuswb%2, %2 pextrw [dstq], %2, 0 -pextrw [dstq + dststrideq], %2, 1 +pextrw [dstq + dststrideq], %2, 2 %endmacro ;- ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] lowres : removed duplicate code for extending right and bottom margin
Thanks - unintended duplicate copy. Pushed the same fix before I saw this. On Mon, Oct 21, 2013 at 1:16 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1382341604 -19800 # Node ID ff8860e0b3082cbb1848e0f5c89e73dc1d7aa87e # Parent fabb25ae4db4a06073decead3836521a14b5bde9 lowres : removed duplicate code for extending right and bottom margin diff -r fabb25ae4db4 -r ff8860e0b308 source/common/lowres.cpp --- a/source/common/lowres.cpp Sat Oct 19 18:08:07 2013 +0800 +++ b/source/common/lowres.cpp Mon Oct 21 13:16:44 2013 +0530 @@ -159,24 +159,6 @@ ::memcpy(src + y * srcStride, src, sizeof(Pel) * (extWidth)); } -/* extending right margin*/ -if (2 * width orig-getWidth()) -{ -for (y = 0; y srcHeight; y++) -{ -::memset(src + srcWidth, src[srcWidth - 1], sizeof(Pel) * (X265_LOWRES_CU_SIZE - 1)); -src += srcStride; -} -} - -/* extending bottom margin */ -src = orig-getLumaAddr() + (srcHeight - 1) * srcStride; - -for (y = 1; y = 2 * lines - srcHeight; y++) -{ -::memcpy(src + y * srcStride, src, sizeof(Pel) * (extWidth)); -} - /* downscale and generate 4 HPEL planes for lookahead */ primitives.frame_init_lowres_core(orig-getLumaAddr(), lowresPlane[0], lowresPlane[1], lowresPlane[2], lowresPlane[3], ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] ratecontrol: initialize frameThreads
Yes, hg import worked fine. You probably need to configure your username in git settings to firstname lastname. Does this help? https://help.github.com/articles/setting-your-username-in-git On Mon, Oct 21, 2013 at 10:52 PM, Rafaël Carré fun...@videolan.org wrote: https://bitbucket.org/multicoreware/x265/commits/3fe9a9d0a0b6279643c9272805f4d9b46c2cdcd9 =?utf-8?b?UmFmYcOrbCBDYXJyw6kgPGZ1bm1hbkB2aWRlb2xhbi5vcmc+?= committed Indeed :) I am sending these from my git hg clone with git send-email. Any idea how to make these patches more smooth from hg point of view? http://stackoverflow.com/questions/2626898/mercurial-copy-patch-to-repository seems to say hg import should work. How did you apply this one? Thanks, Le 21/10/2013 18:47, Deepthi Nandakumar a écrit : Pushed this - but the user name has not been configured properly. On Mon, Oct 21, 2013 at 6:16 PM, Rafaël Carré fun...@videolan.org wrote: Fix a floating point exceptio --- source/encoder/ratecontrol.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/encoder/ratecontrol.cpp b/source/encoder/ratecontrol.cpp index b588bde..9085ec3 100644 --- a/source/encoder/ratecontrol.cpp +++ b/source/encoder/ratecontrol.cpp @@ -129,6 +129,7 @@ void RateControl::calcAdaptiveQuantFrame(TComPic *pic) RateControl::RateControl(TEncCfg * _cfg) { this-cfg = _cfg; +frameThreads = cfg-param.frameNumThreads; bitrate = cfg-param.rc.bitrate * 1000; frameDuration = 1.0 / cfg-param.frameRate; ncu = (int)((cfg-param.sourceHeight * cfg-param.sourceWidth) / pow((int)cfg-param.maxCUSize, 2.0)); -- 1.8.3.2 ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] no-rdo: use bit estimates from ME to calculate RDcost
Steve, This is part of an ongoing change to rd 0/1 where we want to replace cost = distortion + lambda*(coeff + mv bits), as opposed to that derived from the RDO process. Here, the coeff bits have not been added, only me bits are considered. I believe we'll need an exhaustive set of tests for computer-generated video like sintel/bigbuckbunny, and at a later stage, we could add in presets for those (like x264 does). On Thu, Oct 31, 2013 at 1:23 AM, Steve Borho st...@borho.org wrote: On Wed, Oct 30, 2013 at 4:47 AM, deepthidev...@multicorewareinc.comwrote: # HG changeset patch # User Deepthi Devaki deepthidev...@multicorewareinc.com # Date 1383126419 -19800 # Node ID 77db80a67f4e55f22bc02ed02930a269bfac6b50 # Parent 74bf8634037ce3e673b21738a5ffaf1c14381414 no-rdo: use bit estimates from ME to calculate RDcost. bits estimated in ME stored in CU and used for calculating rdcost along with distortion. This results in better bitrate with no-rdo, with small drop in PSNR. I see this has been already pushed, but I'm not certain this is an unambiguously good trade-off: x265 sintel_trailer_2k_480p24.y4m out.hevc --rd 0 --b-adapt 2 -b3 --hash 1 before: encoded 1253 frames in 262.32s (4.78 fps), 143.50 kb/s, Global PSNR: 48.745 after: encoded 1253 frames in 259.50s (4.83 fps), 142.36 kb/s, Global PSNR: 48.655 diff -r 74bf8634037c -r 77db80a67f4e source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Oct 30 13:44:16 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Oct 30 15:16:59 2013 +0530 @@ -2115,7 +2115,7 @@ * \param bValid * \returns void */ -void TEncSearch::xMergeEstimation(TComDataCU* cu, int puIdx, uint32_t interDir, TComMvField* mvField, uint32_t mergeIndex, uint32_t outCost, TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, int numValidMergeCand) +void TEncSearch::xMergeEstimation(TComDataCU* cu, int puIdx, uint32_t interDir, TComMvField* mvField, uint32_t mergeIndex, uint32_t outCost, uint32_t outbits, TComMvField* mvFieldNeighbours, UChar* interDirNeighbours, int numValidMergeCand) { uint32_t absPartIdx = 0; int width = 0; @@ -2144,7 +2144,7 @@ { uint32_t costCand = MAX_UINT; uint32_t bitsCand = 0; - + cu-getCUMvField(REF_PIC_LIST_0)-m_mv[absPartIdx] = mvFieldNeighbours[0 + 2 * mergeCand].mv; cu-getCUMvField(REF_PIC_LIST_0)-m_refIdx[absPartIdx] = mvFieldNeighbours[0 + 2 * mergeCand].refIdx; cu-getCUMvField(REF_PIC_LIST_1)-m_mv[absPartIdx] = mvFieldNeighbours[1 + 2 * mergeCand].mv; @@ -2160,6 +2160,7 @@ if (costCand outCost) { outCost = costCand; +outbits = bitsCand; mvField[0] = mvFieldNeighbours[0 + 2 * mergeCand]; mvField[1] = mvFieldNeighbours[1 + 2 * mergeCand]; interDir = interDirNeighbours[mergeCand]; @@ -2226,6 +2227,8 @@ UChar interDirNeighbours[MRG_MAX_NUM_CANDS]; int numValidMergeCand = 0; +int totalmebits = 0; + for (int partIdx = 0; partIdx numPart; partIdx++) { uint32_t listCost[2] = { MAX_UINT, MAX_UINT }; @@ -2495,7 +2498,8 @@ // find Merge result uint32_t mrgCost = MAX_UINT; -xMergeEstimation(cu, partIdx, mrgInterDir, mrgMvField, mrgIndex, mrgCost, mvFieldNeighbours, interDirNeighbours, numValidMergeCand); +uint32_t mrgBits = 0; +xMergeEstimation(cu, partIdx, mrgInterDir, mrgMvField, mrgIndex, mrgCost, mrgBits, mvFieldNeighbours, interDirNeighbours, numValidMergeCand); if (mrgCost meCost) { // set Merge result @@ -2517,6 +2521,7 @@ #if CU_STAT_LOGFILE meCost += mrgCost; #endif +totalmebits += mrgBits; } else { @@ -2530,11 +2535,18 @@ #if CU_STAT_LOGFILE meCost += meCost; #endif +totalmebits += mebits; } } +else +{ +totalmebits += mebits; +} motionCompensation(cu, predYuv, REF_PIC_LIST_X, partIdx, bLuma, bChroma); } +cu-m_totalBits = totalmebits; + setWpScalingDistParam(cu, -1, REF_PIC_LIST_X); } diff -r 74bf8634037c -r 77db80a67f4e source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Wed Oct 30 13:44:16 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.h Wed Oct 30 15:16:59 2013 +0530 @@ -211,7 +211,7 @@ void xGetBlkBits(PartSize cuMode, bool bPSlice, int partIdx, uint32_t lastMode, uint32_t blockBit[3]); void xMergeEstimation(TComDataCU* cu, int partIdx, uint32_t uiInterDir, - TComMvField* pacMvField, uint32_t mergeIndex, uint32_t outCost, + TComMvField* pacMvField, uint32_t mergeIndex, uint32_t outCost, uint32_t outbits,
Re: [x265] [PATCH] no-rdo: Use entropy encoder for bit estimation
Posting the efficiency results FYI. The performance drop will be taken care of once the rd 0/1 refactor is complete. Before BasketballPass_416x240_50 (11.04 fps), 328.96 kb/s, Global PSNR: 35.613 After BasketballPass_416x240_50(10.70 fps), 326.64 kb/s, Global PSNR: 35.609 Before big_buck_bunny_360p24 (9.68 fps), 52.08 kb/s, Global PSNR: 43.549 After big_buck_bunny_360p24(9.12 fps), 51.80 kb/s, Global PSNR: 43.561 Before FourPeople_1280x720_60 (5.33 fps), 510.23 kb/s, Global PSNR: 39.653 After FourPeople_1280x720_60 (5.11 fps), 505.69 kb/s, Global PSNR: 39.654 Before sintel_trailer_2k_720p24(4.62 fps), 88.32 kb/s, Global PSNR: 54.869 After sintel_trailer_2k_720p24 (4.36 fps), 87.23 kb/s, Global PSNR: 54.802 Before Johnny_1280x720_60 (6.11 fps), 296.06 kb/s, Global PSNR: 40.525 After Johnny_1280x720_60 (5.96 fps), 294.26 kb/s, Global PSNR: 40.548 Before Kimono1_1920x1080_24(1.07 fps), 1811.05 kb/s, Global PSNR: 38.624 After Kimono1_1920x1080_24 (1.02 fps), 1798.91 kb/s, Global PSNR: 38.625 Before BasketballDrive_1920x1080 (1.16 fps), 3849.92 kb/s, Global PSNR: 37.146 After BasketballDrive_1920x1080(1.06 fps), 3820.42 kb/s, Global PSNR: 37.150 On Thu, Oct 31, 2013 at 12:44 PM, deepthidev...@multicorewareinc.comwrote: # HG changeset patch # User Deepthi Devaki deepthidev...@multicorewareinc.com # Date 1383203307 -19800 # Node ID 4b4332d038832ab8812773d618b38329ec75ae4b # Parent ec6b4d35f11053b06d0e1ea46df798ff89a4c127 no-rdo: Use entropy encoder for bit estimation. Instead of me-bit estimation, use entropy encoder. diff -r ec6b4d35f110 -r 4b4332d03883 source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Thu Oct 31 00:09:49 2013 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.h Thu Oct 31 12:38:27 2013 +0530 @@ -165,6 +165,12 @@ void xSetIntraResultQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, bool bLumaOnly, TComYuv* reconYuv); +// --- +// compute symbol bits +// --- + +uint32_t xSymbolBitsInter(TComDataCU* cu); + protected: // @@ -232,12 +238,6 @@ UInt64 rdCost, uint32_t outBits, uint32_t outDist, uint32_t *puiZeroDist); void xSetResidualQTData(TComDataCU* cu, uint32_t absPartIdx, uint32_t absTUPartIdx, TShortYUV* resiYuv, uint32_t depth, bool bSpatial); -// --- -// compute symbol bits -// --- - -uint32_t xSymbolBitsInter(TComDataCU* cu); - void setWpScalingDistParam(TComDataCU* cu, int refIdx, int picList); }; } diff -r ec6b4d35f110 -r 4b4332d03883 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Oct 31 00:09:49 2013 -0500 +++ b/source/encoder/compress.cpp Thu Oct 31 12:38:27 2013 +0530 @@ -228,6 +228,9 @@ int part = partitionFromSizes(outTempCU-getWidth(0), outTempCU-getHeight(0)); uint32_t distortion = primitives.sse_pp[part](m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), outPredYuv-getLumaAddr(), outPredYuv-getStride()); + m_rdGoOnSbacCoder-load(m_rdSbacCoders[outTempCU-getDepth(0)][CI_CURR_BEST]); +outTempCU-m_totalBits = m_search-xSymbolBitsInter(outTempCU); + outTempCU-m_totalCost = m_rdCost-calcRdCost(distortion, outTempCU-m_totalBits); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH RFC] presets: adjust presets to increase spread and align closer with x264 presets
Since the default preset is medium, shouldnt x265_param_default apply medium settings only? So that the if (!strcmp(preset, medium)) block here is empty?? In that case, the preset (if specified) will change only the ones which are different from medium? Also, there are a few params here that 1. arent used and arent initialised properly (eg, constrainedIntra, weightedBipred) lets remove them and add them back when the feature exists. 2. that are dependent on other param fields, like EnableRdo, EnabledRDOQ depends on RDLevel. This particular case is handled safely inside encoder::configure, but not sure about all such cases. Just thought this is a good time for springcleaning. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit
I have a few questions. 1. Do we need so many local variables? 2. Why are we adding outTempCU-cost to totalCost and then comparing against outBestCU-cost? That doesnt make much sense to me. AFAIk, outTempCU does not contain any valid data - we should remove this. 3. Should we be adding costCUColocated0 and costCUColocated1 also? Adding up spatial and temporal costs, and then comparing against a threshold derived from spatial costs - umm, no. Lets leave these out. 4. The rest of it looks ok, logically. But now you may need to re-tune this with different weights. Best, Deepthi On Thu, Nov 7, 2013 at 4:59 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddy # Date 1383823751 -19800 # Node ID a54b30b16e83048a7a2ef5584e1e1c9682216075 # Parent 0a1b379be359cbcf76140ac392104c856a037c78 no-rdo: giving weightage to the cost of all CU's and neighbour CU's for early exit Early exit is done when CU cost at depth n is lessthan sum of 60% of avgcost of all CU's and 40% of avgcost of neighbour CU's at same depth. diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.cpp --- a/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -69,6 +69,14 @@ m_ssimCnt = 0; m_frameTime = 0.0; m_elapsedCompressTime = 0.0; +m_avgCost[0] = 0; +m_avgCost[1] = 0; +m_avgCost[2] = 0; +m_avgCost[3] = 0; +m_count[0] = 0; +m_count[1] = 0; +m_count[2] = 0; +m_count[3] = 0; } TComPic::~TComPic() diff -r 0a1b379be359 -r a54b30b16e83 source/Lib/TLibCommon/TComPic.h --- a/source/Lib/TLibCommon/TComPic.h Thu Nov 07 18:17:52 2013 +0800 +++ b/source/Lib/TLibCommon/TComPic.h Thu Nov 07 16:59:11 2013 +0530 @@ -95,6 +95,8 @@ MD5Contextm_state[3]; uint32_t m_crc[3]; uint32_t m_checksum[3]; +UInt64m_avgCost[4]; +uint32_t m_count[4]; /* SSIM values per frame */ doublem_ssim; diff -r 0a1b379be359 -r a54b30b16e83 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 07 18:17:52 2013 +0800 +++ b/source/encoder/compress.cpp Thu Nov 07 16:59:11 2013 +0530 @@ -567,13 +567,14 @@ if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) { #if EARLY_EXIT // turn ON this to enable early exit -// early exit when the RD cost of best mode at depth n is less than the avgerage of RD cost of the -// CU's(above, aboveleft, aboveright, left, colocated) at depth n of previosuly coded CU's +// early exit when the RD cost of best mode at depth n is less than the sum of avgerage of RD cost of the neighbour +// CU's(above, aboveleft, aboveright, left, colocated) and all CU's at depth n with weightage for each quantity if (outBestCU != 0) { -UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCost = 0, avgCost = 0; +UInt64 costCU = 0, costCUAbove = 0, costCUAboveLeft = 0, costCUAboveRight = 0, costCULeft = 0, costCUColocated0 = 0, costCUColocated1 = 0, totalCostNeigh = 0, totalCostAll = 0; +double avgCost = 0; UInt64 countCU = 0, countCUAbove = 0, countCUAboveLeft = 0, countCUAboveRight = 0, countCULeft = 0, countCUColocated0 = 0, countCUColocated1 = 0; -UInt64 totalCount = 0; +UInt64 totalCountNeigh = 0, totalCountAll = 0; TComDataCU* above = outTempCU-getCUAbove(); TComDataCU* aboveLeft = outTempCU-getCUAboveLeft(); TComDataCU* aboveRight = outTempCU-getCUAboveRight(); @@ -614,10 +615,15 @@ countCUColocated1 = colocated1-m_count[depth]; } -totalCost = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; -totalCount = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; -if (totalCount != 0) -avgCost = totalCost / totalCount; +totalCostNeigh = costCU + costCUAbove + costCUAboveLeft + costCUAboveRight + costCULeft + costCUColocated0 + costCUColocated1; +totalCountNeigh = countCU + countCUAbove + countCUAboveLeft + countCUAboveRight + countCULeft + countCUColocated0 + countCUColocated1; + +totalCostAll = (outTempCU-getPic()-m_avgCost[depth] * outTempCU-getPic()-m_count[depth]) - totalCostNeigh; +totalCountAll = outTempCU-getPic()-m_count[depth] - totalCountNeigh; + +//giving 60% weight to all CU's and 40% weight to neighbour CU's +if (totalCountAll) +avgCost = ((0.6 * totalCostAll) + (0.4 *
Re: [x265] [PATCH] TShortYUV.cpp, added code to use new pixelsub_ps asm primitives
This patch cannot be accepted. 1. For generic 8bpp I420 optimizations, we can avoid the extra call to partitionFromSizes in chroma by passing in part, instead of partsize. This will remove the downshift and then upscale by 2 (!!) 2. We will need to handle multiple color spaces separately as luma and chroma tables are now aligned only for I420. On Wed, Nov 13, 2013 at 12:31 PM, muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384326072 -19800 # Wed Nov 13 12:31:12 2013 +0530 # Node ID 69d4c1cfc8bed7c63bfdaa1073196e0874d14ebe # Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d TShortYUV.cpp, added code to use new pixelsub_ps asm primitives diff -r c4ca80d19105 -r 69d4c1cfc8be source/common/TShortYUV.cpp --- a/source/common/TShortYUV.cpp Tue Nov 12 19:10:23 2013 +0530 +++ b/source/common/TShortYUV.cpp Wed Nov 13 12:31:12 2013 +0530 @@ -95,7 +95,8 @@ int src1Stride = srcYuv1-getStride(); int dstStride = m_width; -primitives.pixelsub_ps(x, y, dst, dstStride, src0, src1, src0Stride, src1Stride); +int part = partitionFromSizes(x, y); +primitives.luma_sub_ps[part](dst, dstStride, src0, src1, src0Stride, src1Stride); } void TShortYUV::subtractChroma(TComYuv* srcYuv0, TComYuv* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) @@ -113,8 +114,9 @@ int src1Stride = srcYuv1-getCStride(); int dstStride = m_cwidth; -primitives.pixelsub_ps(x, y, dstU, dstStride, srcU0, srcU1, src0Stride, src1Stride); -primitives.pixelsub_ps(x, y, dstV, dstStride, srcV0, srcV1, src0Stride, src1Stride); +int part = partitionFromSizes(2 * x, 2 * y); +primitives.chroma_sub_ps[part](dstU, dstStride, srcU0, srcU1, src0Stride, src1Stride); +primitives.chroma_sub_ps[part](dstV, dstStride, srcV0, srcV1, src0Stride, src1Stride); } void TShortYUV::addClip(TShortYUV* srcYuv0, TShortYUV* srcYuv1, unsigned int trUnitIdx, unsigned int partSize) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] pixelsub_ps, Removed unused old code
On hold until earlier patch has been fixed. On Wed, Nov 13, 2013 at 12:46 PM, muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1384326967 -19800 # Wed Nov 13 12:46:07 2013 +0530 # Node ID 481cdfc251de0f99ef0a3c4fd53c786b79b5f182 # Parent 69d4c1cfc8bed7c63bfdaa1073196e0874d14ebe pixelsub_ps, Removed unused old code diff -r 69d4c1cfc8be -r 481cdfc251de source/common/pixel.cpp --- a/source/common/pixel.cpp Wed Nov 13 12:31:12 2013 +0530 +++ b/source/common/pixel.cpp Wed Nov 13 12:46:07 2013 +0530 @@ -971,7 +971,6 @@ p.weightpUniPixel = weightUnidirPix; p.weightpUni = weightUnidir; -p.pixelsub_ps = pixelsub_ps_c; p.pixeladd_ss = pixeladd_ss_c; p.scale1D_128to64 = scale1D_128to64; diff -r 69d4c1cfc8be -r 481cdfc251de source/common/primitives.h --- a/source/common/primitives.hWed Nov 13 12:31:12 2013 +0530 +++ b/source/common/primitives.hWed Nov 13 12:46:07 2013 +0530 @@ -162,7 +162,6 @@ typedef void (*blockcpy_sp_t)(int bx, int by, int16_t *dst, intptr_t dstride, pixel *src, intptr_t sstride); // dst is aligned typedef void (*blockcpy_ps_t)(int bx, int by, pixel *dst, intptr_t dstride, int16_t *src, intptr_t sstride); // dst is aligned typedef void (*blockcpy_sc_t)(int bx, int by, int16_t *dst, intptr_t dstride, uint8_t *src, intptr_t sstride); // dst is aligned -typedef void (*pixelsub_ps_t)(int bx, int by, int16_t *dst, intptr_t dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1); typedef void (*pixeladd_ss_t)(int bx, int by, int16_t *dst, intptr_t dstride, int16_t *src0, int16_t *src1, intptr_t sstride0, intptr_t sstride1); typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, intptr_t sstride0, pixel *src1, intptr_t sstride1, int weight); typedef void (*blockfill_s_t)(int16_t *dst, intptr_t dstride, int16_t val); @@ -277,7 +276,6 @@ weightpUni_tweightpUni; weightpUniPixel_t weightpUniPixel; -pixelsub_ps_t pixelsub_ps; pixeladd_ss_t pixeladd_ss; pixelavg_pp_t pixelavg_pp[NUM_LUMA_PARTITIONS]; diff -r 69d4c1cfc8be -r 481cdfc251de source/common/vec/blockcopy-sse3.cpp --- a/source/common/vec/blockcopy-sse3.cpp Wed Nov 13 12:31:12 2013 +0530 +++ b/source/common/vec/blockcopy-sse3.cpp Wed Nov 13 12:46:07 2013 +0530 @@ -170,55 +170,6 @@ } } -void pixelsub_ps(int bx, int by, int16_t *dst, intptr_t dstride, uint8_t *src0, uint8_t *src1, intptr_t sstride0, intptr_t sstride1) -{ -size_t aligncheck = (size_t)dst | (size_t)src0 | bx | sstride0 | sstride1 | dstride; - -if (!(aligncheck 15)) -{ -// fast path, multiples of 16 pixel wide blocks -for (int y = 0; y by; y++) -{ -for (int x = 0; x bx; x += 16) -{ -__m128i word0, word1; -__m128i word3, word4; -__m128i mask = _mm_setzero_si128(); - -word0 = _mm_load_si128((__m128i const*)(src0 + x));// load 16 bytes from src1 -word1 = _mm_load_si128((__m128i const*)(src1 + x));// load 16 bytes from src2 - -word3 = _mm_unpacklo_epi8(word0, mask);// interleave with zero extensions -word4 = _mm_unpacklo_epi8(word1, mask); -_mm_store_si128((__m128i*)dst[x], _mm_subs_epi16(word3, word4));// store block into dst - -word3 = _mm_unpackhi_epi8(word0, mask);// interleave with zero extensions -word4 = _mm_unpackhi_epi8(word1, mask); -_mm_store_si128((__m128i*)dst[x + 8], _mm_subs_epi16(word3, word4));// store block into dst -} - -src0 += sstride0; -src1 += sstride1; -dst += dstride; -} -} -else -{ -// slow path, irregular memory alignments or sizes -for (int y = 0; y by; y++) -{ -for (int x = 0; x bx; x++) -{ -dst[x] = (int16_t)(src0[x] - src1[x]); -} - -src0 += sstride0; -src1 += sstride1; -dst += dstride; -} -} -} - void pixeladd_ss(int bx, int by, int16_t *dst, intptr_t dstride, int16_t *src0, int16_t *src1, intptr_t sstride0, intptr_t sstride1) { size_t aligncheck = (size_t)dst | (size_t)src0 | sstride0 | sstride1 | dstride; @@ -315,7 +266,6 @@ p.blockcpy_pp = blockcopy_pp; p.blockcpy_ps = blockcopy_ps; p.blockcpy_sp = blockcopy_sp; -p.pixelsub_ps = pixelsub_ps; p.pixeladd_ss = pixeladd_ss; #endif // if HIGH_BIT_DEPTH } diff -r 69d4c1cfc8be -r 481cdfc251de source/test/pixelharness.cpp --- a/source/test/pixelharness.cpp Wed Nov 13 12:31:12 2013 +0530 +++ b/source/test/pixelharness.cpp Wed Nov 13 12:46:07 2013 +0530 @@ -358,29 +358,6 @@ return true;
Re: [x265] [PATCH] TComYuv.cpp, blockcpy_pp asm integration
This cant be applied until the csp changes have been incorporated in. On Wed, Nov 13, 2013 at 4:23 PM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1384339140 -19800 # Node ID c0da70471ba63f052bd0e0cdf81af3d0ca9150a4 # Parent c4ca80d19105ccf1ba2ec14dd65915f2820a660d TComYuv.cpp, blockcpy_pp asm integration diff -r c4ca80d19105 -r c0da70471ba6 source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Tue Nov 12 19:10:23 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Wed Nov 13 16:09:00 2013 +0530 @@ -245,10 +245,12 @@ void TComYuv::copyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { +int part = partitionFromSizes(width, height); + if (bLuma) -copyPartToPartLuma(dstPicYuv, partIdx, width, height); +copyPartToPartLuma(dstPicYuv, partIdx, part); if (bChroma) -copyPartToPartChroma(dstPicYuv, partIdx, width m_hChromaShift, height m_vChromaShift); +copyPartToPartChroma(dstPicYuv, partIdx, part); } void TComYuv::copyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) @@ -259,7 +261,7 @@ copyPartToPartChroma(dstPicYuv, partIdx, width m_hChromaShift, height m_vChromaShift); } -void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height) +void TComYuv::copyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part) { Pel* src = getLumaAddr(partIdx); Pel* dst = dstPicYuv-getLumaAddr(partIdx); @@ -269,7 +271,6 @@ uint32_t srcstride = getStride(); uint32_t dststride = dstPicYuv-getStride(); -int part = partitionFromSizes(width, height); primitives.luma_copy_pp[part](dst, dststride, src, srcstride); } @@ -285,7 +286,7 @@ primitives.luma_copy_ps[part](dst, dststride, src, srcstride); } -void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height) +void TComYuv::copyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part) { Pel* srcU = getCbAddr(partIdx); Pel* srcV = getCrAddr(partIdx); @@ -297,8 +298,8 @@ uint32_t srcstride = getCStride(); uint32_t dststride = dstPicYuv-getCStride(); -primitives.blockcpy_pp(width, height, dstU, dststride, srcU, srcstride); -primitives.blockcpy_pp(width, height, dstV, dststride, srcV, srcstride); +primitives.chroma_copy_pp[part](dstU, dststride, srcU, srcstride); +primitives.chroma_copy_pp[part](dstV, dststride, srcV, srcstride); } void TComYuv::copyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height) diff -r c4ca80d19105 -r c0da70471ba6 source/Lib/TLibCommon/TComYuv.h --- a/source/Lib/TLibCommon/TComYuv.h Tue Nov 12 19:10:23 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.h Wed Nov 13 16:09:00 2013 +0530 @@ -136,9 +136,9 @@ // Copy YUV partition buffer to other YUV partition buffer voidcopyPartToPartYuv(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true); voidcopyPartToPartYuv(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, bool bLuma = true, bool bChroma = true); -voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); +voidcopyPartToPartLuma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part); voidcopyPartToPartLuma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); -voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); +voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t part); voidcopyPartToPartChroma(TShortYUV* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height); voidcopyPartToPartChroma(TComYuv* dstPicYuv, uint32_t partIdx, uint32_t width, uint32_t height, uint32_t chromaId); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] TComYuv::addAvg, primitive function for luma and chroma loops
Pushed. But next time, please organize your patches more clearly. 1. Add C primitive, if it does not exist. 2. Add the function pointer declarations and new primitive declarations to EncoderPrimitives struct. 3. Add testbench code for primitives. 4. Add asm code. Once all above patches have been reviewed, pushed and tested on all platforms, then you can integrate it with the actual encoder. On Mon, Nov 18, 2013 at 3:23 PM, dnyanesh...@multicorewareinc.com wrote: # HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1384768323 -19800 # Mon Nov 18 15:22:03 2013 +0530 # Node ID cdd54aa200bd635395c01bbb07c156be4edbf7b1 # Parent ac9e64d8a80bffe33fdaa0a9b83fdbe84f39d0b0 TComYuv::addAvg, primitive function for luma and chroma loops diff -r ac9e64d8a80b -r cdd54aa200bd source/Lib/TLibCommon/TComYuv.cpp --- a/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/Lib/TLibCommon/TComYuv.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -589,9 +589,7 @@ void TComYuv::addAvg(TShortYUV* srcYuv0, TShortYUV* srcYuv1, uint32_t partUnitIdx, uint32_t width, uint32_t height, bool bLuma, bool bChroma) { -int x, y; uint32_t src0Stride, src1Stride, dststride; -int shiftNum, offset; int16_t* srcY0 = srcYuv0-getLumaAddr(partUnitIdx); int16_t* srcU0 = srcYuv0-getCbAddr(partUnitIdx); @@ -605,61 +603,24 @@ Pel* dstU = getCbAddr(partUnitIdx); Pel* dstV = getCrAddr(partUnitIdx); +int part = partitionFromSizes(width, height); + if (bLuma) { src0Stride = srcYuv0-m_width; src1Stride = srcYuv1-m_width; dststride = getStride(); -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; -for (y = 0; y height; y++) -{ -for (x = 0; x width; x += 4) -{ -dstY[x + 0] = ClipY((srcY0[x + 0] + srcY1[x + 0] + offset) shiftNum); -dstY[x + 1] = ClipY((srcY0[x + 1] + srcY1[x + 1] + offset) shiftNum); -dstY[x + 2] = ClipY((srcY0[x + 2] + srcY1[x + 2] + offset) shiftNum); -dstY[x + 3] = ClipY((srcY0[x + 3] + srcY1[x + 3] + offset) shiftNum); -} - -srcY0 += src0Stride; -srcY1 += src1Stride; -dstY += dststride; -} +primitives.luma_addAvg[part](dstY, dststride, srcY0, src0Stride, srcY1, src1Stride); } if (bChroma) { -shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; -offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; - src0Stride = srcYuv0-m_cwidth; src1Stride = srcYuv1-m_cwidth; dststride = getCStride(); -width = m_hChromaShift; -height = m_vChromaShift; - -for (y = height - 1; y = 0; y--) -{ -for (x = width - 1; x = 0; ) -{ -// note: chroma min width is 2 -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -dstU[x] = ClipC((srcU0[x] + srcU1[x] + offset) shiftNum); -dstV[x] = ClipC((srcV0[x] + srcV1[x] + offset) shiftNum); -x--; -} - -srcU0 += src0Stride; -srcU1 += src1Stride; -srcV0 += src0Stride; -srcV1 += src1Stride; -dstU += dststride; -dstV += dststride; -} +primitives.chroma_addAvg[part](dstU, dststride, srcU0, src0Stride, srcU1, src1Stride); +primitives.chroma_addAvg[part](dstV, dststride, srcV0, src0Stride, srcV1, src1Stride); } } diff -r ac9e64d8a80b -r cdd54aa200bd source/common/pixel.cpp --- a/source/common/pixel.cpp Mon Nov 18 12:26:44 2013 +0530 +++ b/source/common/pixel.cpp Mon Nov 18 15:22:03 2013 +0530 @@ -794,6 +794,27 @@ a += dstride; } } + +templateint bx, int by +void addAvg(pixel* dst, intptr_t dstStride, int16_t* src0, intptr_t src0Stride, int16_t* src1, intptr_t src1Stride) +{ +int shiftNum, offset; +shiftNum = IF_INTERNAL_PREC + 1 - X265_DEPTH; +offset = (1 (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; + +for (int y = 0; y by; y++) +{ +for (int x = 0; x bx; x += 2) +{ +dst[x + 0] = ClipY((src0[x + 0] + src1[x + 0] + offset) shiftNum); +dst[x + 1] = ClipY((src0[x + 1] + src1[x + 1] + offset) shiftNum); +} + +src0 += src0Stride; +src1 += src1Stride; +dst += dstStride; +} +} } // end anonymous namespace namespace x265 { @@ -835,12 +856,14 @@ p.satd[LUMA_16x64] = satd816, 64; #define CHROMA(W, H) \ +p.chroma_addAvg[CHROMA_ ## W ## x ## H] = addAvgW, H; \ p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ##
Re: [x265] [PATCH] b-pyramid implementation: Allow the use of B-frames as references for non B and B frames
In encoder::configure, there should be some check for --b-adapt 0 --bframes 0, in which case print warning and disable b-pyramid. On Mon, Nov 18, 2013 at 3:40 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1384769433 -19800 # Node ID 1e22b93638072ed805478d7af17f90e285fb4969 # Parent 2321ebe0bf64e5f3c0034076c7edb3ecbcd48039 b-pyramid implementation: Allow the use of B-frames as references for non B and B frames when we enable the b-pyramid the bitrates efficienctly reduced and there is not much diff in the performance and the PSNR 00. increased some of the clips and decreased some of clips Test results for reference when enable and disable the b-pyramid: cli option : -b 10 --hash=1 -f 100 --b-pyramid=1 --ref=1 --b-adapt=2 Enable B-reference : --b-pyramid=1 Disable B-reference : --b-pyramid=0 Results: Enable / Disable clip - FourPeople_1280x720_60.yuv Total time taken - 9.70s (10.31 fps) / 9.93s (10.07 fps) Bitrates - 516.30 kb/s / 544.68 kb/s PSNR - 39.725 / 39.701 clip - BasketballDrive_1920x1080_50.y4m Total time taken - 39.06s (2.51 fps) / 38.98s (2.57 fps) Bitrates - 4166.92 kb/s / 4370.43 kb/s PSNR - 37.261 / 37.268 clip - Johnny_1280x720_60.y4m Total time taken - 8.88s (11.27 fps) / 11.08s (9.03 fps) Bitrates - 304.29 kb/s / 328.84 kb/s PSNR - 40.605 / 40.551 Total time taken - 30.97s (3.23 fps) / 33.65s (2.97 fps) Bitrates - 3496.84 kb/s / 3683.93 kb/s PSNR - 35.645 / 35.660 diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.cpp --- a/source/common/common.cpp Mon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.cpp Mon Nov 18 15:40:33 2013 +0530 @@ -54,6 +54,7 @@ static int parseCspName(const char *arg, int error); static int parseName(const char *arg, const char * const * names, int error); +static int parse_enum(const char *, const char * const * names, int *dst); using namespace x265; @@ -165,6 +166,7 @@ param-bframes = 3; param-lookaheadDepth = 40; param-bFrameAdaptive = X265_B_ADAPT_FAST; +param-bpyramid = 0; param-scenecutThreshold = 40; /* Magic number pulled in from x264*/ /* Intra Coding Tools */ @@ -532,7 +534,7 @@ } CHECK(param-bEnableWavefront 0, WaveFrontSynchro cannot be negative); - +CHECK(param-bpyramid = 2, b-pyramid is 0 or 1); return check_failed; } @@ -620,6 +622,7 @@ x265_log(param, X265_LOG_INFO, RDpenalty: %d\n, param-rdPenalty); } x265_log(param, X265_LOG_INFO, Lookahead / bframes / badapt : %d / %d / %d\n, param-lookaheadDepth, param-bframes, param-bFrameAdaptive); +x265_log(param, X265_LOG_INFO, b-pyramid / weightp / ref: %d / %d / %d\n, param-bpyramid, param-bEnableWeightedPred, param-maxNumReferences); x265_log(param, X265_LOG_INFO, tools: ); #define TOOLOPT(FLAG, STR) if (FLAG) fprintf(stderr, %s , STR) TOOLOPT(param-bEnableRectInter, rect); @@ -628,7 +631,6 @@ TOOLOPT(param-bEnableConstrainedIntra, cip); TOOLOPT(param-bEnableEarlySkip, esd); fprintf(stderr, rd=%d , param-rdLevel); -fprintf(stderr, ref=%d , param-maxNumReferences); TOOLOPT(param-bEnableLoopFilter, lft); if (param-bEnableSAO) @@ -650,7 +652,6 @@ else fprintf(stderr, tskip ); } -TOOLOPT(param-bEnableWeightedPred, weightp); TOOLOPT(param-bEnableWeightedBiPred, weightbp); TOOLOPT(param-rc.aqMode, aq); fprintf(stderr, \n); @@ -747,6 +748,15 @@ } OPT(input-csp) p-sourceCsp = ::parseCspName(value, berror); OPT(me)p-searchMethod = ::parseName(value, x265_motion_est_names, berror); +OPT(b-pyramid) +{ +berror |= parse_enum(value, x265_b_pyramid_names, p-bpyramid); +if (berror) +{ +berror = 0; +p-bpyramid = atoi(value); +} +} else return X265_PARAM_BAD_NAME; #undef OPT @@ -802,6 +812,7 @@ BOOL(p-bEnableSAO, sao); s += sprintf(s, sao-lcu-bounds=%d, p-saoLcuBoundary); s += sprintf(s, sao-lcu-opt=%d, p-saoLcuBasedOptimization); +s += sprintf(s, b-pyramid=%d, p-bpyramid); #undef BOOL return buf; @@ -843,3 +854,13 @@ error = 1; return a; } +static int parse_enum(const char *arg, const char * const * names, int *dst) +{ +for (int i = 0; names[i]; i++) +if (!strcmp(arg, names[i])) +{ +*dst = i; +return 0; +} +return -1; +} diff -r 2321ebe0bf64 -r 1e22b9363807 source/common/common.h --- a/source/common/common.hMon Nov 18 11:32:06 2013 +0530 +++ b/source/common/common.hMon Nov 18 15:40:33 2013 +0530 @@ -107,6 +107,7 @@ #define X265_LOG2(x) log2(x) #endif +static const char * const x265_b_pyramid_names[] = {none, normal, 0}; /* defined in common.cpp */ int64_t
Re: [x265] [PATCH] cleanup: removed unused code in pixel-a.asm
Does not apply at the tip. On Mon, Nov 25, 2013 at 11:40 AM, yuva...@multicorewareinc.com wrote: # HG changeset patch # User Yuvaraj Venkatesh yuva...@multicorewareinc.com # Date 1385359751 -19800 # Mon Nov 25 11:39:11 2013 +0530 # Node ID 90a80def0f1aabdf29e1f08dd0f2263d8e6af805 # Parent c0c862dc71fbd021efd3922de99da4f2f93e81f4 cleanup: removed unused code in pixel-a.asm diff -r c0c862dc71fb -r 90a80def0f1a source/common/x86/pixel-a.asm --- a/source/common/x86/pixel-a.asm Sun Nov 24 17:34:12 2013 +0800 +++ b/source/common/x86/pixel-a.asm Mon Nov 25 11:39:11 2013 +0530 @@ -7157,173 +7157,6 @@ %endif ; !ARCH_X86_64 %endmacro ; SA8D -;= -; SA8D_SATD -;= - -; %1: vertical/horizontal mode -; %2-%5: sa8d output regs (m0,m1,m2,m3,m4,m5,m8,m9) -; m10: satd result -; m6, m11-15: tmp regs -%macro SA8D_SATD_8x4 5 -%if %1 -LOAD_DIFF_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1 -HADAMARD 0, sumsub, %2, %3, 6 -HADAMARD 0, sumsub, %4, %5, 6 -SBUTTERFLYwd, %2, %3, 6 -SBUTTERFLYwd, %4, %5, 6 -HADAMARD2_2D %2, %4, %3, %5, 6, dq - -mova m12, m%2 -mova m13, m%3 -mova m14, m%4 -mova m15, m%5 -HADAMARD 0, sumsub, %2, %3, 6 -HADAMARD 0, sumsub, %4, %5, 6 -SBUTTERFLY qdq, 12, 13, 6 -HADAMARD 0, amax, 12, 13, 6 -SBUTTERFLY qdq, 14, 15, 6 -paddw m10, m12 -HADAMARD 0, amax, 14, 15, 6 -paddw m10, m14 -%else -LOAD_SUMSUB_8x4P %2, %3, %4, %5, 6, 11, 7, r0, r2, 1 -HADAMARD4_V %2, %3, %4, %5, 6 - -pabswm12, m%2 ; doing the abs first is a slight advantage -pabswm14, m%4 -pabswm13, m%3 -pabswm15, m%5 -HADAMARD 1, max, 12, 14, 6, 11 -paddwm10, m12 -HADAMARD 1, max, 13, 15, 6, 11 -paddwm10, m13 -%endif -%endmacro ; SA8D_SATD_8x4 - -; %1: add spilled regs? -; %2: spill regs? -%macro SA8D_SATD_ACCUM 2 -%if HIGH_BIT_DEPTH -pmaddwd m10, [pw_1] -HADDUWD m0, m1 -%if %1 -paddd m10, temp1 -padddm0, temp0 -%endif -%if %2 -mova temp1, m10 -pxorm10, m10 -%endif -%elif %1 -paddwm0, temp0 -%endif -%if %2 -mova temp0, m0 -%endif -%endmacro - -%macro SA8D_SATD 0 -%define vertical ((notcpuflag(ssse3) || cpuflag(atom)) || HIGH_BIT_DEPTH) -cglobal pixel_sa8d_satd_8x8_internal -SA8D_SATD_8x4 vertical, 0, 1, 2, 3 -SA8D_SATD_8x4 vertical, 4, 5, 8, 9 - -%if vertical ; sse2-style -HADAMARD2_2D 0, 4, 2, 8, 6, qdq, amax -HADAMARD2_2D 1, 5, 3, 9, 6, qdq, amax -%else; complete sa8d -SUMSUB_BADC w, 0, 4, 1, 5, 12 -HADAMARD 2, sumsub, 0, 4, 12, 11 -HADAMARD 2, sumsub, 1, 5, 12, 11 -SUMSUB_BADC w, 2, 8, 3, 9, 12 -HADAMARD 2, sumsub, 2, 8, 12, 11 -HADAMARD 2, sumsub, 3, 9, 12, 11 -HADAMARD 1, amax, 0, 4, 12, 11 -HADAMARD 1, amax, 1, 5, 12, 4 -HADAMARD 1, amax, 2, 8, 12, 4 -HADAMARD 1, amax, 3, 9, 12, 4 -%endif - -; create sa8d sub results -paddwm1, m2 -paddwm0, m3 -paddwm0, m1 - -SAVE_MM_PERMUTATION -ret - -;--- -; uint64_t pixel_sa8d_satd_16x16( pixel *, intptr_t, pixel *, intptr_t ) -;--- -cglobal pixel_sa8d_satd_16x16, 4,8-(mmsize/32),16,SIZEOF_PIXEL*mmsize -%define temp0 [rsp+0*mmsize] -%define temp1 [rsp+1*mmsize] -FIX_STRIDES r1, r3 -%if vertical==0 -mova m7, [hmul_8p] -%endif -lea r4, [3*r1] -lea r5, [3*r3] -pxorm10, m10 - -%if mmsize==32 -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 0, 1 -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 1, 0 -vextracti128 xm1, m0, 1 -vextracti128 xm2, m10, 1 -paddw xm0, xm1 -paddw xm10, xm2 -%else -lea r6, [r2+8*SIZEOF_PIXEL] -lea r7, [r0+8*SIZEOF_PIXEL] - -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 0, 1 -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 1, 1 - -mov r0, r7 -mov r2, r6 - -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 1, 1 -call pixel_sa8d_satd_8x8_internal -SA8D_SATD_ACCUM 1, 0 -%endif - -; xop already has fast horizontal sums -%if cpuflag(sse4) notcpuflag(xop) HIGH_BIT_DEPTH==0 -pmaddwd xm10, [pw_1] -HADDUWD xm0, xm1 -phaddd xm0, xm10 ; sa8d1 sa8d2 satd1 satd2 -pshufd xm1, xm0, q2301 ; sa8d2 sa8d1 satd2 satd1 -paddd xm0, xm1; sa8d sa8d satd satd -movdr0d, xm0 -pextrd eax, xm0, 2 -%else -%if HIGH_BIT_DEPTH -HADDD xm0, xm1 -HADDD xm10, xm2 -%else -HADDUW xm0, xm1
Re: [x265] [PATCH] RD merge and cost fixes: use sa8d_inter, add early-skip param that was missed in earlier commit
Reverting the sa8d_inter changes. This block always uses square CUs, so sa8d primitives are sufficient. # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1385631244 -19800 # Node ID 2ba6c26c9febdc8c57d3014c0cf98d4897d3992d # Parent ca8c57f0c53248a36db6d04639c39ac0e2829fcd RD merge: add in early-skip param. diff -r ca8c57f0c532 -r 2ba6c26c9feb source/encoder/compress.cpp --- a/source/encoder/compress.cppThu Nov 28 13:52:19 2013 +0530 +++ b/source/encoder/compress.cppThu Nov 28 15:04:04 2013 +0530 @@ -420,7 +420,7 @@ /* Compute Merge Cost */ xComputeCostMerge2Nx2N(m_bestMergeCU[depth], m_mergeCU[depth], m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]); -if (!m_bestMergeCU[depth]-isSkipped(0)) +if (!(m_cfg-param.bEnableEarlySkip m_bestMergeCU[depth]-isSkipped(0))) { /*Compute 2Nx2N mode costs*/ { On Thu, Nov 28, 2013 at 2:54 PM, deep...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1385630646 -19800 # Node ID 4f0b72baee90e9cf63ab2015b65f3d820c87d129 # Parent ca8c57f0c53248a36db6d04639c39ac0e2829fcd RD merge and cost fixes: use sa8d_inter, add early-skip param that was missed in earlier commit. diff -r ca8c57f0c532 -r 4f0b72baee90 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Nov 28 13:52:19 2013 +0530 +++ b/source/encoder/compress.cpp Thu Nov 28 14:54:06 2013 +0530 @@ -211,7 +211,7 @@ outTempCU-m_totalBits = 0; m_search-predInterSearch(outTempCU, outPredYuv, bUseMRG, true, false); int part = g_convertToBit[outTempCU-getWidth(0)]; -uint32_t distortion = primitives.sa8d[part](m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), +uint32_t distortion = primitives.sa8d_inter[part](m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), outPredYuv-getLumaAddr(), outPredYuv-getStride()); outTempCU-m_totalCost = m_rdCost-calcRdSADCost(distortion, outTempCU-m_totalBits); } @@ -420,7 +420,7 @@ /* Compute Merge Cost */ xComputeCostMerge2Nx2N(m_bestMergeCU[depth], m_mergeCU[depth], m_modePredYuv[3][depth], m_bestMergeRecoYuv[depth]); -if (!m_bestMergeCU[depth]-isSkipped(0)) +if (!(m_cfg-param.bEnableEarlySkip m_bestMergeCU[depth]-isSkipped(0))) { /*Compute 2Nx2N mode costs*/ { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] log: output intra type statistics of I frame, bug fix in intra percentage calculation
Build fails. On Thu, Nov 28, 2013 at 11:10 AM, kavi...@multicorewareinc.com wrote: # HG changeset patch # User Kavitha Sampath kavi...@multicorewareinc.com # Date 1385616934 -19800 # Thu Nov 28 11:05:34 2013 +0530 # Branch stable # Node ID 8519dc4a5b9e53f1ed6f2f52294d7caea2803bc3 # Parent f92e0c49a9f0a0e6f6db3eb247bd04431eb75b1e log: output intra type statistics of I frame, bug fix in intra percentage calculation diff -r f92e0c49a9f0 -r 8519dc4a5b9e source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Nov 27 20:50:08 2013 -0600 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Nov 28 11:05:34 2013 +0530 @@ -551,6 +551,11 @@ } outTempCU-initEstData(depth, qp); +if (depth g_maxCUDepth - 2) +{ +memcpy(m_log-tempIntra[depth], m_log-cntIntra, sizeof(m_log-cntIntra)); +memcpy(m_log-tempIntraDist[depth], m_log-cuIntraDistribution, sizeof(m_log-cuIntraDistribution)); +} // further split if (bSubBranch bTrySplitDQP depth g_maxCUDepth - g_addCUDepth) @@ -575,19 +580,9 @@ m_rdSbacCoders[nextDepth][CI_CURR_BEST]-load(m_rdSbacCoders[nextDepth][CI_NEXT_BEST]); } -// The following if condition has to be commented out in case the early Abort based on comparison of parentCu cost, childCU cost is not required. -if (outBestCU-isIntra(0)) -{ -xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth); -} -else -{ -xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth); -} -{ -outTempCU-copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth); // Keep best part data to current temporary data. - xCopyYuv2Tmp(subBestPartCU[partUnitIdx]-getTotalNumPart() * partUnitIdx, nextDepth); -} +xCompressIntraCU(subBestPartCU[partUnitIdx], subTempPartCU[partUnitIdx], nextDepth); +outTempCU-copyPartFrom(subBestPartCU[partUnitIdx], partUnitIdx, nextDepth); // Keep best part data to current temporary data. + xCopyYuv2Tmp(subBestPartCU[partUnitIdx]-getTotalNumPart() * partUnitIdx, nextDepth); } else if (bInSlice) { @@ -635,28 +630,39 @@ m_rdSbacCoders[nextDepth][CI_NEXT_BEST]-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); if (outBestCU-m_totalCost outTempCU-m_totalCost) { +if (depth g_maxCUDepth - 2) +{ +memcpy(m_log-cntIntra, m_log-tempIntra[depth], sizeof(m_log-tempIntra[depth])); +memcpy(m_log-cuIntraDistribution, m_log-tempIntraDist[depth], sizeof(m_log-tempIntraDist[depth])); +} I'm uncomfortable seeing memcpy's in the innermost CU-analysis loop. Why is this necessary? m_log-cntIntra[depth]++; -for (int i = 0; i 4; i++) +if (outBestCU-getLumaIntraDir()[0] 1) +m_log-cuIntraDistribution[depth][ANGULAR_MODE_ID]++; +else + m_log-cuIntraDistribution[depth][outBestCU-getLumaIntraDir()[0]]++; +} +else +{ +if (depth == g_maxCUDepth - 2) { -if (outTempCU-getPartitionSize(i) != SIZE_NxN) -m_log-cntIntra[depth + 1]--; -else -m_log-cntIntraNxN--; +for (int i = 0; i 16; i = i + 4) +{ +if (outTempCU-getPartitionSize(i) != SIZE_NxN) +{ +m_log-cntIntra[depth + 1]++; +if (outTempCU-getLumaIntraDir()[i] 1) +m_log-cuIntraDistribution[depth + 1][ANGULAR_MODE_ID]++; +else +m_log-cuIntraDistribution[depth + 1][outTempCU-getLumaIntraDir()[i]]++; +} +else +m_log-cntIntraNxN++; +} } -m_log-cntIntra[depth + 1] += boundaryCu; } xCheckBestMode(outBestCU, outTempCU, depth); // RD compare current prediction with split prediction. } -if (depth == g_maxCUDepth - 1 bSubBranch) -{ -if (outBestCU-getPartitionSize(0) == SIZE_NxN) -{ -m_log-cntIntraNxN++; -} -else -m_log-cntIntra[depth]++; -} outBestCU-copyToPic(depth); // Copy Best data to Picture for next partition prediction. // Copy Yuv data to picture Yuv diff -r f92e0c49a9f0 -r 8519dc4a5b9e source/Lib/TLibEncoder/TEncCu.h --- a/source/Lib/TLibEncoder/TEncCu.h Wed Nov 27 20:50:08 2013 -0600 +++ b/source/Lib/TLibEncoder/TEncCu.h Thu Nov 28 11:05:34 2013 +0530 @@ -66,6 +66,8 @@
Re: [x265] [PATCH] aq: bug fix for hash mismatch between recon with decoded output
Pushed. So what are the latest results for different sequences on enabling aq-mode? On Tue, Dec 3, 2013 at 4:31 PM, Aarthi Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1386068495 -19800 # Tue Dec 03 16:31:35 2013 +0530 # Node ID 660ec2c027982db73366560ca8f600e5d86cc2e3 # Parent 86d23688b0174e06f3949c81ac182ba3e83908d1 aq: bug fix for hash mismatch between recon with decoded output diff -r 86d23688b017 -r 660ec2c02798 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Tue Dec 03 11:24:15 2013 +0530 +++ b/source/encoder/compress.cpp Tue Dec 03 16:31:35 2013 +0530 @@ -74,6 +74,7 @@ cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +xCheckDQP(cu); } void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize) @@ -302,6 +303,7 @@ //No-residue mode m_search-encodeResAndCalcRdInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true); +xCheckDQP(outTempCU); tmp = outTempCU; outTempCU = outBestCU; @@ -313,6 +315,7 @@ //Encode with residue m_search-estimateRDInterCU(outTempCU, m_origYuv[depth], bestPredYuv, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], false); +xCheckDQP(outTempCU); if (outTempCU-m_totalCost outBestCU-m_totalCost)//Choose best from no-residue mode and residue mode { @@ -485,6 +488,7 @@ m_search-estimateRDInterCU(outBestCU, m_origYuv[depth], m_bestPredYuv[depth], m_tmpResiYuv[depth], m_bestResiYuv[depth], m_bestRecoYuv[depth], false); +xCheckDQP(outBestCU); if (m_bestMergeCU[depth]-m_totalCost outBestCU-m_totalCost) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] rd level: use cu coeff array while computing coefficients instead of temp buffer
Build fails. Restore qtlayer. On Mon, Dec 16, 2013 at 3:07 PM, deepthidev...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Devaki deepthidev...@multicorewareinc.com # Date 1387185624 -19800 # Node ID f20e5ab835cd7071d9ebeabff50f6b9fef4d3e39 # Parent 9bb16a023918c342b907d106b4a3d59ec2473bc1 rd level: use cu coeff array while computing coefficients instead of temp buffer diff -r 9bb16a023918 -r f20e5ab835cd source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Dec 16 14:33:57 2013 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Dec 16 14:50:24 2013 +0530 @@ -3266,9 +3266,7 @@ if (cu-getPredictionMode(0) == MODE_INTER) { residualTransformQuantInter(cu, 0, 0, resiYuv, cu-getDepth(0), true); -xSetResidualQTData(cu, 0, 0, NULL, cu-getDepth(0), false); uint32_t width = cu-getWidth(0); -xSetResidualQTData(cu, 0, 0, resiYuv, cu-getDepth(0), true); reconYuv-addClip(predYuv, resiYuv, 0, width); if (cu-getMergeFlag(0) cu-getPartitionSize(0) == SIZE_2Nx2N cu-getQtRootCbf(0) == 0) @@ -3322,10 +3320,10 @@ if (bCheckFull) { const uint32_t numCoeffPerAbsPartIdxIncrement = cu-getSlice()-getSPS()-getMaxCUWidth() * cu-getSlice()-getSPS()-getMaxCUHeight() (cu-getSlice()-getSPS()-getMaxCUDepth() 1); -const uint32_t qtlayer = cu-getSlice()-getSPS()-getQuadtreeTULog2MaxSize() - trSizeLog2; -TCoeff *coeffCurY = m_qtTempCoeffY[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx); -TCoeff *coeffCurU = m_qtTempCoeffCb[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx 2); -TCoeff *coeffCurV = m_qtTempCoeffCr[qtlayer] + (numCoeffPerAbsPartIdxIncrement * absPartIdx 2); + +TCoeff *coeffCurY = cu-getCoeffY() + (numCoeffPerAbsPartIdxIncrement * absPartIdx); +TCoeff *coeffCurU = cu-getCoeffCb() + (numCoeffPerAbsPartIdxIncrement * absPartIdx 2); +TCoeff *coeffCurV = cu-getCoeffCr() + (numCoeffPerAbsPartIdxIncrement * absPartIdx 2); int trWidth = 0, trHeight = 0, trWidthC = 0, trHeightC = 0; uint32_t absTUPartIdxC = absPartIdx; @@ -3370,64 +3368,55 @@ if (absSumY) { -int16_t *curResiY = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx); +int16_t *curResiY = resiYuv-getLumaAddr(absTUPartIdx); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_LUMA, cu-getSlice()-getSPS()-getQpBDOffsetY(), 0); int scalingListType = 3 + g_eTTable[(int)TEXT_LUMA]; assert(scalingListType 6); -assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE); - m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, MAX_CU_SIZE, coeffCurY, trWidth, trHeight, scalingListType, false, lastPosY); //this is for inter mode only + m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT, curResiY, resiYuv-m_width, coeffCurY, trWidth, trHeight, scalingListType, false, lastPosY); //this is for inter mode only } else { -int16_t *ptr = m_qtTempTComYuv[qtlayer].getLumaAddr(absTUPartIdx); -assert(m_qtTempTComYuv[qtlayer].m_width == MAX_CU_SIZE); - +int16_t *ptr = resiYuv-getLumaAddr(absTUPartIdx); assert(trWidth == trHeight); -primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, MAX_CU_SIZE, 0); +primitives.blockfill_s[(int)g_convertToBit[trWidth]](ptr, resiYuv-m_width, 0); } if (bCodeChroma) { if (absSumU) { -int16_t *pcResiCurrU = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC); +int16_t *pcResiCurrU = resiYuv-getCbAddr(absTUPartIdxC); int curChromaQpOffset = cu-getSlice()-getPPS()-getChromaCbQpOffset() + cu-getSlice()-getSliceQpDeltaCb(); m_trQuant-setQPforQuant(cu-getQP(0), TEXT_CHROMA, cu-getSlice()-getSPS()-getQpBDOffsetC(), curChromaQpOffset); int scalingListType = 3 + g_eTTable[(int)TEXT_CHROMA_U]; assert(scalingListType 6); -assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2); - m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, MAX_CU_SIZE / 2, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU); + m_trQuant-invtransformNxN(cu-getCUTransquantBypass(absPartIdx), REG_DCT, pcResiCurrU, resiYuv-m_cwidth, coeffCurU, trWidthC, trHeightC, scalingListType, false, lastPosU); } else { -int16_t *ptr = m_qtTempTComYuv[qtlayer].getCbAddr(absTUPartIdxC); -assert(m_qtTempTComYuv[qtlayer].m_cwidth == MAX_CU_SIZE / 2); - +int16_t *ptr = resiYuv-getCbAddr(absTUPartIdxC);
Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable
Am I missing something here? You have added code in the else part - if(AQ is disabled), we are already calculating variance for weightP. On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1387195927 -19800 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d # Parent 3dae450a06a4b390ce6fd947d5095b739b01c6d8 rc: Calculate the variance data for weighted prediction if aq-mode is disable diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530 @@ -116,6 +116,15 @@ for (int cuxy = 0; cuxy cuCount; cuxy++ ) pic-m_lowres.invQscaleFactor[cuxy] = 256; } + + /* Need variance data for weighted prediction */ +if (cfg-param.bEnableWeightedPred) +{ +for (int cuy = 0; cuy maxRow; cuy++ ) +for (int cux = 0; cux maxCol; cux++ ) +acEnergyCu(pic, cux, cuy); +} + } else { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable
Ughh - there's something wrong with my hg import. All clear now. On Mon, Dec 16, 2013 at 10:34 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Am I missing something here? You have added code in the else part - if(AQ is disabled), we are already calculating variance for weightP. On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1387195927 -19800 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d # Parent 3dae450a06a4b390ce6fd947d5095b739b01c6d8 rc: Calculate the variance data for weighted prediction if aq-mode is disable diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530 @@ -116,6 +116,15 @@ for (int cuxy = 0; cuxy cuCount; cuxy++ ) pic-m_lowres.invQscaleFactor[cuxy] = 256; } + + /* Need variance data for weighted prediction */ +if (cfg-param.bEnableWeightedPred) +{ +for (int cuy = 0; cuy maxRow; cuy++ ) +for (int cux = 0; cux maxCol; cux++ ) +acEnergyCu(pic, cux, cuy); +} + } else { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] rc: Calculate the variance data for weighted prediction if aq-mode is disable
What about the variance value returned by acEnergyCu? On Mon, Dec 16, 2013 at 10:43 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Ughh - there's something wrong with my hg import. All clear now. On Mon, Dec 16, 2013 at 10:34 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Am I missing something here? You have added code in the else part - if(AQ is disabled), we are already calculating variance for weightP. On Mon, Dec 16, 2013 at 5:43 PM, Gopu Govindaswamy g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1387195927 -19800 # Node ID bdc7afa5ba5afee7113ac0d427fbfc660359716d # Parent 3dae450a06a4b390ce6fd947d5095b739b01c6d8 rc: Calculate the variance data for weighted prediction if aq-mode is disable diff -r 3dae450a06a4 -r bdc7afa5ba5a source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppMon Dec 16 13:57:58 2013 +0530 +++ b/source/encoder/ratecontrol.cppMon Dec 16 17:42:07 2013 +0530 @@ -116,6 +116,15 @@ for (int cuxy = 0; cuxy cuCount; cuxy++ ) pic-m_lowres.invQscaleFactor[cuxy] = 256; } + + /* Need variance data for weighted prediction */ +if (cfg-param.bEnableWeightedPred) +{ +for (int cuy = 0; cuy maxRow; cuy++ ) +for (int cux = 0; cux maxCol; cux++ ) +acEnergyCu(pic, cux, cuy); +} + } else { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] Warnings during build - on Mac OS X 10.6.8
Thanks, should be fixed now. On Tue, Dec 17, 2013 at 4:08 AM, Selur hyb...@selur.de wrote: -BEGIN PGP SIGNED MESSAGE- Hash: SHA256 Nothing seriously, just wanted to note that building on Mac OS throws some warnings during building. /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h: In constructor ‘x265::TComSlice::TComSlice()’: /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h:1359: warning: ‘x265::TComSlice::m_avgQpRc’ will be initialized after /Users/selur/x265/source/Lib/TLibCommon/TComSlice.h:1326: warning: ‘x265::TComSPS* x265::TComSlice::m_sps’ /Users/selur/x265/source/Lib/TLibCommon/TComSlice.cpp:49: warning: when initialized here /Users/selur/x265/source/common/vec/vec-primitives.cpp:66: warning: unused parameter ‘p’ /Users/selur/x265/source/common/vec/vec-primitives.cpp:66: warning: unused parameter ‘cpuMask’ Cu Selur -BEGIN PGP SIGNATURE- Version: GnuPG v2.0.22 (MingW32) iQIcBAEBCAAGBQJSr4DhAAoJEJA5OiupSg93vskP/RUJZ22YyAJ3rN2G2cfz5KBc YQwo4aQQH10VtNOKriXMaD5U5ty7DBNJCKY5D1OPv7xpGQdTJVW5D6ClxDhYVTM2 WbvzONjPfLf8Fq+tKfS1pSnOtYUKeanpjl/GZh6I39XMRYC4XwK3c5AYSBB6c1Nx oRx+X3tPKXRYB3g5l988Lt8oQjD+fNpcvHhqOWn2GzRAD7n3zwp5ekZTAsz1DjPP EF9xMIngLLhyY/hiEALs661FirZSAqRHKtGWlcriovX2lXC1cFdI1r2LS428e2ND B9V56w0pFEnWpD1n2N7sM4p6keFfb9isSxZ3hB/DvN1qOM3cMx0UXuzCXa7fNKNN 1BTyopaZwRpWZDSHouItiPUTpkPLBpHYqifbhRD5XmFuOX+gkO47z5VL+9hKnhWQ YMRhuU5zsmb7epTJaVsf+MQWd2R2UWuBKfgx799AVVU94ls3rHd1cPzP5KJZTW1m 4KkNJ1Pcl+smzUaPL/GtFKyw4uNP8B8MFYSXV5T4E6RqZay4NYeDk312k4xdGg/Q zKzUXBeQrYdyogspbWt3x1xlgox9aP6pWOMkIxchJETDzYt2ll5z9q14SMi88tm7 bafsCOK1moxrJkBRz6zutJ97AJGtDaEt7qu0Pu1bfFsUIMkhf/eq2pOR3zoKzaBU oeVWgXy6N0bxfcnzVvGG =aX90 -END PGP SIGNATURE- ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] cutree: bug fixes. correct the timescale used in getQScale()
On Fri, Dec 27, 2013 at 11:07 PM, Aarthi Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1388165826 -19800 # Fri Dec 27 23:07:06 2013 +0530 # Node ID 964e5bc90ad2a0f80980409046a13b4cbaf11a03 # Parent 8b5c5fe7fbc923684af45e5ff7a0ed5ed6e83db9 cutree: bug fixes. correct the timescale used in getQScale() diff -r 8b5c5fe7fbc9 -r 964e5bc90ad2 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppFri Dec 27 15:30:41 2013 +0530 +++ b/source/encoder/ratecontrol.cppFri Dec 27 23:07:06 2013 +0530 @@ -633,10 +633,11 @@ if (cfg-param.rc.cuTree) { -double scale = curSlice-getSPS()-getVuiParameters()-getTimingInfo()-getTimeScale(); -double units = curSlice-getSPS()-getVuiParameters()-getTimingInfo()-getNumUnitsInTick(); -double timescale = units / scale; -q = pow(BASE_FRAME_DURATION / CLIP_DURATION(frameDuration * timescale), 1 - cfg-param.rc.qCompress); +// Scale and units are obtained from rateNum and rateDenom for videos with fixed frame rates. +double scale = cfg-param.frameRate * 2; +double numTicks = 1; +double timescale = numTicks / scale; +q = pow(BASE_FRAME_DURATION / CLIP_DURATION(2 * timescale), 1 - cfg-param.rc.qCompress); Good catch. Currently, these SPS/VUI parameters arent even set. } else q = pow(rce-blurredComplexity, 1 - cfg-param.rc.qCompress); diff -r 8b5c5fe7fbc9 -r 964e5bc90ad2 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Dec 27 15:30:41 2013 +0530 +++ b/source/encoder/slicetype.cpp Fri Dec 27 23:07:06 2013 +0530 @@ -1394,7 +1394,7 @@ memset(Frames[b]-propagateCost, 0, widthInCU * sizeof(uint16_t)); uint16_t StrideInCU = (uint16_t)widthInCU; -for (uint16_t block_y = 0; block_y heightInCU; block_y += 16) +for (uint16_t block_y = 0; block_y heightInCU; block_y++) { int cuIndex = block_y * StrideInCU; /* TODO This function go into ASM */ @@ -1404,7 +1404,7 @@ if (referenced) propagate_cost += widthInCU; -for (uint16_t block_x = 0; block_x widthInCU; block_x += 16, cuIndex++) +for (uint16_t block_x = 0; block_x widthInCU; block_x++, cuIndex++) { int propagate_amount = scratch[block_x]; /* Don't propagate for an intra block. */ ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Modifications to address review comments to support multiple color space format
This is a CMake + VS configuration issue. A quick Google search - please read the following: http://stackoverflow.com/questions/2849517/linking-problem-fatal-error-lnk1112-module-machine-type-x64-conflicts-with-t On Sat, Jan 11, 2014 at 12:28 AM, Purvin Pandit purv...@hotmail.com wrote: I tried to complie x265 under windows for VS. I get the following errors any suggestions: Error 5 error LNK1112: module machine type 'X86' conflicts with target machine type 'x64' X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\pixel-a.asm.obj 1 x265-shared Error 6 error LNK1112: module machine type 'x64' conflicts with target machine type 'X86' X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\encoder\encoder.dir\Release\NALwrite.obj 1 x265-static Error 7 error LNK1181: cannot open input file 'Release\x265-static.lib' X:\x265\multicoreware-x265-b970ffbdd696\build\vc10-x86_64\LINK cli Thanks, -Purvin ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] rd Level: improve bitrate and psnr in rd level 2
On Fri, Jan 10, 2014 at 2:10 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddy # Date 1389343212 -19800 # Node ID 104fb077a9813f0f3bb94c370e134d0d6d180809 # Parent 80b63c3ee144e6edbafbbe281ad3d1d8505be1f6 rd Level: improve bitrate and psnr in rd level 2 a. Always allow intra mode for mode decision b. increase more merge skips at each depth Results are below CLI: input.y4m -o bitstream.hevc -r recon.y4m --preset veryfast --rd 2 rd3 / previous rd2 / latest rd2 BasketballDrive_1920x1080_50 fps: 4.24/5.615.7 psnr: 33.67/33.95/33.677 birate: 2211/2272/2188 Kimono1_1920x1080_24 fps: 5.45/6.11/6.1 psnr: 35.004/35.144/34.835 birate: 713/740/692 FourPeople_1280x720_60 fps: 7.99/10.86/11.46 psnr: 34.93/35.17/34.931 birate: 320/341/341 big_buck_bunny_360p24 fps: 38.98/49.32/44.46 psnr: 33.21/33.36/33.167 birate: 56/60/56 Johnny_1280x720_60 fps: 8.21/9.58/11.32 psnr: 36.74/37.238/37.01 birate: 203/204/206 diff -r 80b63c3ee144 -r 104fb077a981 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Thu Jan 09 12:50:16 2014 +0550 +++ b/source/encoder/compress.cpp Fri Jan 10 14:10:12 2014 +0530 @@ -517,12 +517,6 @@ bdoIntra = (outBestCU-getCbf(0, TEXT_LUMA) || outBestCU-getCbf(0, TEXT_CHROMA_U) || outBestCU-getCbf(0, TEXT_CHROMA_V)); } -else -{ -uint32_t threshold[4] = { 2, 6000, 1600, 500 }; -int index = 4 - g_convertToBit[outBestCU-getWidth(0)]; -bdoIntra = (outBestCU-m_totalDistortion threshold[index]); -} This change is valid. if (bdoIntra) { xComputeCostIntraInInter(m_intraInInterCU[depth], SIZE_2Nx2N); @@ -587,8 +581,12 @@ } m_search-encodeResAndCalcRdInterCU(m_mergeCU[depth], m_origYuv[depth], bestMergePred, m_tmpResiYuv[depth], m_bestResiYuv[depth], m_tmpRecoYuv[depth], true); - -if (m_mergeCU[depth]-m_totalCost outBestCU-m_totalCost) +double lambda[4]; +lambda[0] = 1.06; +lambda[1] = 1.5; +lambda[2] = 1.1; +lambda[3] = 1.0; This part looks like an artificial way of forcing more skips. I'd like to understand why merge-skips have reduced so much in the first place in rd 2, and then fix the root cause. +if (m_mergeCU[depth]-m_totalCost lambda[depth] * outBestCU-m_totalCost) { outBestCU = m_mergeCU[depth]; tempYuv = m_bestRecoYuv[depth]; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] Re:x265-devel Digest, Vol 8, Issue 1
Hello, When open-GOP is disabled, ie closed GOP, then IDR frames are used. Deepthi On Thu, Jan 16, 2014 at 7:44 AM, i...@sina.com wrote: Hi Deepthi, Can openGOP of x265 support random access of video streaming like IDR? 1. Re: [PATCH] slicetype: remove --refresh and use --open-gop(default: enable) (Deepthi Nandakumar) -- Message: 1 Date: Fri, 3 Jan 2014 16:19:29 +0530 From: Deepthi Nandakumar deep...@multicorewareinc.com To: Development for x265 x265-devel@videolan.org, Tom Vaughan tom.vaug...@multicorewareinc.com Subject: Re: [x265] [PATCH] slicetype: remove --refresh and use --open-gop(default: enable) Message-ID: caaeo3ugez1jaq2ta0hj4ycstrtoamw5zxc3tokv3bb3vtjy...@mail.gmail.com Content-Type: text/plain; charset=iso-8859-1 We'd like to request any relevant feedback on this patch. Essentially, we're removing the refresh parameter, since it conflicts with the openGOP parameter. By default, an openGOP will be enabled, which means we could have both RADL/RASL following CRA pictures. The encoder could control the references, and thus ensure that all leading pictures are decodable (ie that they are always RADL). But IMO, this destroys the purpose of openGOP (?). Thoughts/opinions welcome. Thanks, Deepthi ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] asm : saturation bug fix for luma_vss asm routine
This patch is pending, right Nabajit? I havent pushed the luma_vss /chroma_vss assembly patches or the testbench edits to luma_vss. On Wed, Jan 29, 2014 at 1:56 PM, chen chenm...@163.com wrote: @@ -5105,8 +5108,9 @@ pmaddwdm5, [r6 + 3 * 16] paddd m1, m5 ;m1=[1+2+3+4+5+6+7+8] Row2 end psrad m1, 6 - -packssdw m0, m1 +pand m1, m7 + +packusdw m0, m1 movlps [r2], m0 movhps [r2 + r3], m0 PAND + PACKUSDW may avoid overflow problem, but it is wrong way here as you said, you got a result value 0x8D84, it is overflow on 16bits, so we need to find really reason, I check the HM code, it use Short, so I suggest you catch input data and put into HM to check HM's output. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] testbench: stress test support for all pixelharness functions
Does not apply at the tip. Please pull and resend. On Wed, Feb 5, 2014 at 12:31 PM, muru...@multicorewareinc.com wrote: # HG changeset patch # User Murugan Vairavel muru...@multicorewareinc.com # Date 1391582491 -19800 # Wed Feb 05 12:11:31 2014 +0530 # Node ID b690a5b68676e2b4800d5e0a490d04ee05b9435f # Parent 711827aaab063bb0e02aa6ae52cdd9e7b8b9fef4 testbench: stress test support for all pixelharness functions diff -r 711827aaab06 -r b690a5b68676 source/test/pixelharness.cpp --- a/source/test/pixelharness.cpp Tue Feb 04 13:00:44 2014 +0530 +++ b/source/test/pixelharness.cpp Wed Feb 05 12:11:31 2014 +0530 @@ -33,39 +33,84 @@ #define INCR 32 #define STRIDE 64 #define ITERS 100 +#define MAX_HEIGHT 64 +#define PAD_ROWS 64 +#define BUFFSIZE STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS +#define TEST_CASES 3 +#define SMAX (1 12) +#define SMIN (-1 12) PixelHarness::PixelHarness() { -int maxheight = 64; -int padrows = 64; -int bufsize = STRIDE * (maxheight + padrows) + INCR * ITERS; +int bufsize = STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS; /* 64 pixels wide, 2k deep */ -pbuf1 = X265_MALLOC(pixel, bufsize); -pbuf2 = X265_MALLOC(pixel, bufsize); -pbuf3 = X265_MALLOC(pixel, bufsize); -pbuf4 = X265_MALLOC(pixel, bufsize); +pbuf1 = (pixel*)X265_MALLOC(pixel, bufsize); +pbuf2 = (pixel*)X265_MALLOC(pixel, bufsize); +pbuf3 = (pixel*)X265_MALLOC(pixel, bufsize); +pbuf4 = (pixel*)X265_MALLOC(pixel, bufsize); -ibuf1 = X265_MALLOC(int, bufsize); +ibuf1 = (int*)X265_MALLOC(int, bufsize); -sbuf1 = X265_MALLOC(int16_t, bufsize); -sbuf2 = X265_MALLOC(int16_t, bufsize); -sbuf3 = X265_MALLOC(int16_t, bufsize); +sbuf1 = (int16_t*)X265_MALLOC(int16_t, bufsize); +sbuf2 = (int16_t*)X265_MALLOC(int16_t, bufsize); +sbuf3 = (int16_t*)X265_MALLOC(int16_t, bufsize); -if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 || !sbuf3 || !ibuf1) +/*Test Case buffer array */ +pixel_test_buff = (pixel**)X265_MALLOC(pixel*, TEST_CASES); +short_test_buff = (int16_t**)X265_MALLOC(int16_t*, TEST_CASES); +short_test_buff1 = (int16_t**)X265_MALLOC(int16_t*, TEST_CASES); +int_test_buff= (int**)X265_MALLOC(int*, TEST_CASES); + +if (!pbuf1 || !pbuf2 || !pbuf3 || !pbuf4 || !sbuf1 || !sbuf2 || !sbuf3 || !ibuf1 || +!pixel_test_buff || !short_test_buff || !int_test_buff || !short_test_buff1) { fprintf(stderr, malloc failed, unable to initiate tests!\n); exit(1); } +for (int i = 0; i TEST_CASES; i++) +{ +pixel_test_buff[i] = (pixel*)X265_MALLOC(pixel, BUFFSIZE); +short_test_buff[i] = (int16_t*)X265_MALLOC(int16_t, BUFFSIZE); +short_test_buff1[i] = (int16_t*)X265_MALLOC(int16_t, BUFFSIZE); +int_test_buff[i]= (int*)X265_MALLOC(int, BUFFSIZE); +if (!pixel_test_buff[i] || !short_test_buff[i] || !int_test_buff[i] || !short_test_buff1[i]) +{ +fprintf(stderr, Init_Test_Case_buffers: malloc failed, unable to initiate tests!\n); +exit(-1); +} +} + +/*[0] --- Random values */ +/*[1] --- Minimum*/ +/*[2] --- Maximum*/ + +for (int i = 0; i BUFFSIZE; i++) +{ +pixel_test_buff[0][i] = rand() % PIXEL_MAX; +short_test_buff[0][i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX)); +short_test_buff1[0][i] = rand() PIXEL_MAX; //For block copy only +int_test_buff[0][i] = rand() % INT32_MAX; + +pixel_test_buff[1][i] = PIXEL_MIN; +short_test_buff[1][i] = SMIN; +short_test_buff1[1][i] = PIXEL_MIN; +int_test_buff[1][i] = SHORT_MIN; + +pixel_test_buff[2][i] = PIXEL_MAX; +short_test_buff[2][i] = SMAX; +short_test_buff1[2][i] = PIXEL_MAX; +int_test_buff[2][i] = SHORT_MAX; +} + + for (int i = 0; i bufsize; i++) { pbuf1[i] = rand() PIXEL_MAX; pbuf2[i] = rand() PIXEL_MAX; pbuf3[i] = rand() PIXEL_MAX; pbuf4[i] = rand() PIXEL_MAX; - -#define SMAX (1 12) sbuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX)); sbuf2[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; //max(SHORT_MIN, min(rand(), SMAX)); ibuf1[i] = (rand() % (2 * SMAX + 1)) - SMAX - 1; @@ -83,6 +128,17 @@ X265_FREE(sbuf1); X265_FREE(sbuf2); X265_FREE(sbuf3); +for (int i = 0; i TEST_CASES; i++) +{ +X265_FREE(pixel_test_buff[i]); +X265_FREE(short_test_buff[i]); +X265_FREE(short_test_buff1[i]); +X265_FREE(int_test_buff[i]); +} +X265_FREE(pixel_test_buff); +X265_FREE(short_test_buff); +X265_FREE(short_test_buff1); +X265_FREE(int_test_buff); }
Re: [x265] [PATCH] slicetype: bug fix for cuTree, use type int32_t for listamount and propagate_amount to calculate valid propagate_cost
On Fri, Feb 7, 2014 at 5:48 AM, g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy # Date 1391732264 28800 # Thu Feb 06 16:17:44 2014 -0800 # Node ID 0198815523c1e653fee59f8b6ee58bffbfb12131 # Parent 634bc0b1c24653dd254df77cd80f96f81e71e888 slicetype: bug fix for cuTree, use type int32_t for listamount and propagate_amount to calculate valid propagate_cost diff -r 634bc0b1c246 -r 0198815523c1 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Wed Feb 05 23:10:22 2014 -0600 +++ b/source/encoder/slicetype.cpp Thu Feb 06 16:17:44 2014 -0800 @@ -824,10 +824,10 @@ void Lookahead::estimateCUPropagate(Lowres **frames, double averageDuration, int p0, int p1, int b, int referenced) { uint16_t *refCosts[2] = { frames[p0]-propagateCost, frames[p1]-propagateCost }; -int distScaleFactor = (((b - p0) 8) + ((p1 - p0) 1)) / (p1 - p0); -int bipredWeight = cfg-param.bEnableWeightedBiPred ? 64 - (distScaleFactor 2) : 32; +int32_t distScaleFactor = (((b - p0) 8) + ((p1 - p0) 1)) / (p1 - p0); +int32_t bipredWeight = cfg-param.bEnableWeightedBiPred ? 64 - (distScaleFactor 2) : 32; MV *mvs[2] = { frames[b]-lowresMvs[0][b - p0 - 1], frames[b]-lowresMvs[1][p1 - b - 1] }; -int bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; +int32_t bipredWeights[2] = { bipredWeight, 64 - bipredWeight }; memset(scratch, 0, widthInCU * sizeof(int)); @@ -840,8 +840,8 @@ if (!referenced) memset(frames[b]-propagateCost, 0, widthInCU * sizeof(uint16_t)); -uint16_t StrideInCU = (uint16_t)widthInCU; -for (uint16_t blocky = 0; blocky heightInCU; blocky++) +int32_t StrideInCU = widthInCU; +for (int32_t blocky = 0; blocky heightInCU; blocky++) Why have these unsigned loop indices been changed to signed? rest looks valid. { int cuIndex = blocky * StrideInCU; /* TODO This function go into ASM */ @@ -851,24 +851,24 @@ if (referenced) propagateCost += widthInCU; -for (uint16_t blockx = 0; blockx widthInCU; blockx++, cuIndex++) +for (int32_t blockx = 0; blockx widthInCU; blockx++, cuIndex++) { -int propagate_amount = scratch[blockx]; +int32_t propagate_amount = scratch[blockx]; /* Don't propagate for an intra block. */ if (propagate_amount 0) { /* Access width-2 bitfield. */ -int lists_used = frames[b]-lowresCosts[b - p0][p1 - b][cuIndex] LOWRES_COST_SHIFT; +int32_t lists_used = frames[b]-lowresCosts[b - p0][p1 - b][cuIndex] LOWRES_COST_SHIFT; /* Follow the MVs to the previous frame(s). */ -for (uint16_t list = 0; list 2; list++) +for (int32_t list = 0; list 2; list++) { if ((lists_used list) 1) { -#define CLIP_ADD(s, x) (s) = X265_MIN((s) + (x), (1 16) - 1) -uint16_t listamount = (uint16_t)propagate_amount; +#define CLIP_ADD(s, x) (s) = (uint16_t) X265_MIN((s) + (x), (1 16) - 1) +int32_t listamount = propagate_amount; /* Apply bipred weighting. */ if (lists_used == 3) -listamount = (uint16_t)(listamount * bipredWeights[list] + 32) 6; +listamount = (listamount * bipredWeights[list] + 32) 6; /* Early termination for simple case of mv0. */ if (!mvs[list][cuIndex].word) @@ -877,20 +877,20 @@ continue; } -uint16_t x = mvs[list][cuIndex].x; -uint16_t y = mvs[list][cuIndex].y; -int cux = (x 5) + blockx; -int cuy = (y 5) + blocky; -int idx0 = cux + cuy * StrideInCU; -int idx1 = idx0 + 1; -int idx2 = idx0 + StrideInCU; -int idx3 = idx0 + StrideInCU + 1; +int32_t x = mvs[list][cuIndex].x; +int32_t y = mvs[list][cuIndex].y; +int32_t cux = (x 5) + blockx; +int32_t cuy = (y 5) + blocky; +int32_t idx0 = cux + cuy * StrideInCU; +int32_t idx1 = idx0 + 1; +int32_t idx2 = idx0 + StrideInCU; +int32_t idx3 = idx0 + StrideInCU + 1; x = 31; y = 31; -uint16_t idx0weight = (uint16_t)(32 - y) * (32 - x); -uint16_t idx1weight = (uint16_t)(32 - y) * x; -uint16_t idx2weight = (uint16_t)y *
Re: [x265] I would like to contribute to x265
Hi, Can you take a look at what the following does? Does the decoder actually detect HRD parameters? # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1387524067 -19800 # Node ID 3e794e059f7ffe0edaaf5432df5297631a0f44f6 # Parent 8133378e225020dbdd747d42a021588bef679ec3 Enable VUI diff -r 8133378e2250 -r 3e794e059f7f source/encoder/encoder.cpp --- a/source/encoder/encoder.cppThu Dec 19 17:47:16 2013 +0530 +++ b/source/encoder/encoder.cppFri Dec 20 12:51:07 2013 +0530 @@ -1367,13 +1367,13 @@ m_bUseASR = false; // adapt search range based on temporal distances m_recoveryPointSEIEnabled = 0; m_bufferingPeriodSEIEnabled = 0; -m_pictureTimingSEIEnabled = 0; +m_pictureTimingSEIEnabled = 1; m_displayOrientationSEIAngle = 0; m_gradualDecodingRefreshInfoEnabled = 0; m_decodingUnitInfoSEIEnabled = 0; m_useScalingListId = 0; m_activeParameterSetsSEIEnabled = 0; -m_vuiParametersPresentFlag = false; +m_vuiParametersPresentFlag = true; m_minSpatialSegmentationIdc = 0; m_aspectRatioIdc = 0; m_sarWidth = 0; diff -r 8133378e2250 -r 3e794e059f7f source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cppThu Dec 19 17:47:16 2013 +0530 +++ b/source/encoder/frameencoder.cppFri Dec 20 12:51:07 2013 +0530 @@ -136,7 +136,7 @@ m_sps.setNumLongTermRefPicSPS(0); if (m_cfg-getPictureTimingSEIEnabled() || m_cfg-getDecodingUnitInfoSEIEnabled()) { -m_sps.setHrdParameters(m_cfg-param.frameRate, 0, m_cfg-param.rc.bitrate, m_cfg-param.bframes 0); +m_sps.setHrdParameters(m_cfg-param.frameRate, 1, m_cfg-param.rc.bitrate, m_cfg-param.bframes 0); } if (m_cfg-getBufferingPeriodSEIEnabled() || m_cfg-getPictureTimingSEIEnabled() || m_cfg-getDecodingUnitInfoSEIEnabled()) { Thanks, Deepthi On Tue, Feb 11, 2014 at 7:14 AM, dave dtyx...@gmail.com wrote: On 02/10/2014 01:41 PM, Steve Borho wrote: On Mon, Feb 10, 2014 at 1:46 PM, dave dtyx...@gmail.com wrote: On 02/10/2014 10:41 AM, Steve Borho wrote: On Thu, Jan 30, 2014 at 12:31 PM, Steve Borho st...@borho.org wrote: On Wed, Jan 29, 2014 at 5:13 PM, dave dtyx...@gmail.com wrote: Hi All, I would like to offer my services and contribute to x265 development. From the wiki it looks like there are plenty things to do but I don't want to duplicate or interfere with the work of anyone else so if someone can give me something to do I would appreciate it. I am open to anything needed by x265, both c/c++ and assembly work though I don't mind being given something simple just to get started. You can find me in the x265 irc channel as dtyx265. Hi Dave. I've been collecting the more pressing TODO items in the bitbucket repository's issue tracker: https://bitbucket.org/multicoreware/x265/issues?status=newstatus=open #21 (enabling the VUI message) is the most pressing of the simple problems. That would be a great place to start. Hi Dave, How are things going on this front? -- Steve Borho ___ x265-devel mailing listx265-devel@videolan.orghttps://mailman.videolan.org/listinfo/x265-devel I studied the VUI in the h265 spec, appendix E and have been studying the x265 code from your suggested starting point, setVuiParametersPresentFlag(). It looks like most fields are set to spec defaults. Some look like values that can be options specified by the user, others look like values that are calculated from encoding a video. Can you tell me more about just what pts and dts are? I understand generally what they are but it seems like there are a few places in the VUI where they might play a role in calculating values. I haven't had a chance yet to compare to x264 code yet so if it all becomes obvious there then I will get it. pts is the presentation time stamp of a frame, the point at which it is supposed to be displayed by the decoder. dts is the decode time stamp of a frame, the point when the decoder is supposed to begin decoding it. Both are usually specified in units of the frame rate. Since the pts dts are frame parameters and the VUI is a stream parameter, I don't they are directly related, except that the denominator is likely signaled in some way. I tried to create a user account on bitbucket so I could have issue 21 assigned to me but I keep getting BB might not allow issues to be assigned to users who don't have push access anyway, so don't be too concerned about this. You can add a comment to the issue stating you are working on it. Patches should go through this mailing list anyway. -- Steve ___ x265-devel mailing listx265-devel@videolan.orghttps://mailman.videolan.org/listinfo/x265-devel I think the denominator that you are looking for is already set in class TimingInfo. vui_num_units_in_tick(confusingly named, if I
Re: [x265] [Bug]reconstruction yuv picture diff with HM decoder out
Hi, On Mon, Feb 17, 2014 at 12:38 PM, z...@rock-chips.com z...@rock-chips.comwrote: hi, x265 members 1. [Bug report] We found that x265 (version 0.7+2-4b8901ae94ece1ac ) recon yuv data diff with HM decode out when config CQP mode with QP=34 and set rd=0 or 1 or 2,I think it's a serious bug! with command like this --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 0 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 1 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 2 rd 0 and 1 are not finalised yet. We are investigating the hash mismatch in rd 2 clip with our clips; would be good if you can share the source. 2. [Proprose] The x265 codec encodes video con-tent using a fixed quantization step, thus leading to a variable bitrate stream which may not be suitable for the many multi-media applications where a constant bandwidth is required. Therefore, maybe adaptive quantization step may be better. Adaptive Quantization is already implemented. You may use --aq-mode 1 (enabled by default in the medium preset). I'm looking forward to you thks z...@rock-chips.com ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] asm: added 16bpp support for dct[4x4, 8x8], idct4x4, dst4x4 and idst4x4 primitives
Has this been fixed? Murugan - have you reproduced/fixed this issue? On Sat, Feb 15, 2014 at 12:13 AM, Steve Borho st...@borho.org wrote: On Fri, Feb 14, 2014 at 12:39 PM, Steve Borho st...@borho.org wrote: On Fri, Feb 14, 2014 at 4:41 AM, dnyanesh...@multicorewareinc.comwrote: # HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1392374441 -19800 # Fri Feb 14 16:10:41 2014 +0530 # Node ID 831536babdc08f1553a10754bf2a4f4af6aa1695 # Parent ed310b17ff6681f191c85341cf6efe7a50770143 asm: added 16bpp support for dct[4x4, 8x8], idct4x4, dst4x4 and idst4x4 primitives with this patch applied, if I fixup the elif problems, I get occasional dequant test failures on 8bpp mac. steve@zeppelin ./test/TestBench Using random seed 52FE6216 8bpp Testing primitives: SSE2 Testing primitives: SSE3 Testing primitives: SSSE3 Testing primitives: SSE4 dequant: Failed! Sorry, the dequant test failures appear to be caused by Murugan's testbench changes. I'm dequeuing those as well until we understand why the test is failing. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [Bug]reconstruction yuv picture diff with HM decoder out
Hi, We've checked out for hash mismatch for all our hash clips with --rd 2. Can you share the source that caused a mismatch? That will help us identify the issue. Thanks, Deepthi On Mon, Feb 17, 2014 at 12:38 PM, z...@rock-chips.com z...@rock-chips.comwrote: hi, x265 members 1. [Bug report] We found that x265 (version 0.7+2-4b8901ae94ece1ac ) recon yuv data diff with HM decode out when config CQP mode with QP=34 and set rd=0 or 1 or 2,I think it's a serious bug! with command like this --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 0 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 1 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 2 2. [Proprose] The x265 codec encodes video con-tent using a fixed quantization step, thus leading to a variable bitrate stream which may not be suitable for the many multi-media applications where a constant bandwidth is required. Therefore, maybe adaptive quantization step may be better. I'm looking forward to you thks z...@rock-chips.com ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] encoder: enable VUI; set HRD parameters in SPS
This patch has been superseded by the one that enables it from the CLI. I removed this one from patch list on patchworks. On Fri, Feb 21, 2014 at 1:14 AM, Steve Borho st...@borho.org wrote: On Thu, Feb 20, 2014 at 12:17 PM, dave dtyx...@gmail.com wrote: # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1392883371 -19800 # Node ID 3934859d310bcc3f54ad1855dd94bd71eb0e7457 # Parent 3389061b75a486e004409ab628c46fed39d03b72 encoder: enable VUI; set HRD parameters in SPS. You can now add a VUI on the cli. Use --vui to get a vui will all default values or any vui specific options including --nal-hrd to generate a VUI with an HRD though currently the HRD will only have all default values. agreed, we should try to follow x264's CLI and defaults as much as possible for new features that come online. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Add x265 API defination to api.cpp
Ok, looks good except x265_ssim should not be extern, and should be a part of common.cpp and not api.cpp. On Tue, Feb 25, 2014 at 2:33 PM, sa...@multicorewareinc.com wrote: # HG changeset patch # User Sagar Kotecha sa...@multicorewareinc.com # Date 1393318766 -19800 # Tue Feb 25 14:29:26 2014 +0530 # Node ID 504c2a959e5815cb3020033289137f64cb458aee # Parent a36a669d09e89332dd91817afdf139853ba3ad03 Add x265 API defination to api.cpp diff -r a36a669d09e8 -r 504c2a959e58 source/common/common.cpp --- a/source/common/common.cpp Tue Feb 25 02:22:06 2014 -0600 +++ b/source/common/common.cpp Tue Feb 25 14:29:26 2014 +0530 @@ -134,23 +134,3 @@ va_end(arg); } -extern C -x265_picture *x265_picture_alloc() -{ -return (x265_picture*)x265_malloc(sizeof(x265_picture)); -} - -extern C -void x265_picture_init(x265_param *param, x265_picture *pic) -{ -memset(pic, 0, sizeof(x265_picture)); - -pic-bitDepth = param-internalBitDepth; -pic-colorSpace = param-internalCsp; -} - -extern C -void x265_picture_free(x265_picture *p) -{ -return x265_free(p); -} diff -r a36a669d09e8 -r 504c2a959e58 source/encoder/CMakeLists.txt --- a/source/encoder/CMakeLists.txt Tue Feb 25 02:22:06 2014 -0600 +++ b/source/encoder/CMakeLists.txt Tue Feb 25 14:29:26 2014 +0530 @@ -58,4 +58,5 @@ compress.cpp reference.cpp reference.h encoder.cpp encoder.h + api.cpp weightPrediction.cpp) diff -r a36a669d09e8 -r 504c2a959e58 source/encoder/api.cpp --- /dev/null Thu Jan 01 00:00:00 1970 + +++ b/source/encoder/api.cppTue Feb 25 14:29:26 2014 +0530 @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2013 x265 project + * + * Authors: Steve Borho st...@borho.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licens...@multicorewareinc.com. + */ + +#include TLibCommon/CommonDef.h +#include param.h +#include encoder.h +#include frameencoder.h + +using namespace x265; + +extern C +x265_encoder *x265_encoder_open(x265_param *param) +{ +x265_setup_primitives(param, -1); // -1 means auto-detect if uninitialized + +if (x265_check_params(param)) +return NULL; + +if (x265_set_globals(param)) +return NULL; + +Encoder *encoder = new Encoder; +if (encoder) +{ +// these may change params for auto-detect, etc +encoder-determineLevelAndProfile(param); +encoder-configure(param); + +// save a copy of final parameters in TEncCfg +memcpy(encoder-param, param, sizeof(*param)); + +x265_print_params(param); +encoder-create(); +encoder-init(); +} + +return encoder; +} + +extern C +int x265_encoder_headers(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal) +{ +if (!pp_nal || !enc) +return 0; + +Encoder *encoder = static_castEncoder*(enc); + +int ret = 0; +NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 }; +if (!encoder-getStreamHeaders(nalunits)) +{ +int nalcount = encoder-extractNalData(nalunits); +*pp_nal = encoder-m_nals[0]; +if (pi_nal) *pi_nal = nalcount; +} +else if (pi_nal) +{ +*pi_nal = 0; +ret = -1; +} + +for (int i = 0; i MAX_NAL_UNITS; i++) +{ +if (nalunits[i]) +{ +free(nalunits[i]-m_nalUnitData); +X265_FREE(nalunits[i]); +} +} + +return ret; +} + +extern C +int x265_encoder_encode(x265_encoder *enc, x265_nal **pp_nal, uint32_t *pi_nal, x265_picture *pic_in, x265_picture *pic_out) +{ +if (!enc) +return -1; + +Encoder *encoder = static_castEncoder*(enc); +NALUnitEBSP *nalunits[MAX_NAL_UNITS] = { 0, 0, 0, 0, 0 }; +int numEncoded = encoder-encode(!pic_in, pic_in, pic_out, nalunits); + +if (pp_nal numEncoded 0) +{ +int nalcount = encoder-extractNalData(nalunits); +*pp_nal = encoder-m_nals[0];
Re: [x265] [PATCH] all_angs_pred_32x32, asm code improvement
The earlier patch was pushed, Praveen. Can you send a new patch which just removes the unused statements? ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2/2] Fix x86_64 build
Can you convert this to an hg patch? Git patches don't apply cleanly On Wed, Mar 5, 2014 at 4:08 PM, Rafaël Carré fun...@videolan.org wrote: --- source/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 2febfaa..d2fea1a 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -21,7 +21,7 @@ configure_file(${PROJECT_SOURCE_DIR}/x265_config.h.in SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH}) -if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86) +if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL i386 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86 OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL x86_64) set(X86 1) add_definitions(-DX265_ARCH_X86=1) if(${CMAKE_SIZEOF_VOID_P} MATCHES 8) -- 1.9.0 ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] fix chroma lambda weighting
Thanks, very interesting fix. Can you give us some background here? Where did we the divide by 3.0 come from? On Tue, Mar 25, 2014 at 9:01 AM, Satoshi Nakagawa nakagawa...@oki.comwrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1395672158 -32400 # Mon Mar 24 23:42:38 2014 +0900 # Node ID 08584b5913bce6a5f9d2f0d408fcdace6aa83a65 # Parent fdd7c6168cf42a11240ff1c7fc7b401605524db2 fix chroma lambda weighting diff -r fdd7c6168cf4 -r 08584b5913bc source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Mar 21 14:44:35 2014 -0500 +++ b/source/encoder/frameencoder.cpp Mon Mar 24 23:42:38 2014 +0900 @@ -335,11 +335,10 @@ // instead we weight the distortion of chroma. int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() + slice-getSliceQpDeltaCb(); int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset - +double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() + slice-getSliceQpDeltaCr(); qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset +double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset double chromaLambda = lambda / crWeight; m_rows[row].m_search.setQPLambda(qp, lambda, chromaLambda); @@ -376,10 +375,10 @@ int qpc; int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() + slice-getSliceQpDeltaCb(); qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset +double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() + slice-getSliceQpDeltaCr(); qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc])); // takes into account of the chroma qp mapping and chroma qp Offset +double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset double chromaLambda = lambda / crWeight; // NOTE: set SAO lambda every Frame ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] fix: calcrecon asm code for HIGH_BIT_DEPTH
Hi, The dependent patch on calcRecon has been backed out. This does not apply anymore. Deepthi On Wed, Apr 2, 2014 at 12:45 PM, Satoshi Nakagawa nakagawa...@oki.comwrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1396422746 -32400 # Wed Apr 02 16:12:26 2014 +0900 # Node ID a7e96e9909068758588832e47637c7f8e30e5228 # Parent 03bad90e94adce6fb89c6d5edb86919a1e783402 fix: calcrecon asm code for HIGH_BIT_DEPTH diff -r 03bad90e94ad -r a7e96e990906 source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Wed Apr 02 06:51:35 2014 +0530 +++ b/source/common/x86/pixel-util8.asm Wed Apr 02 16:12:26 2014 +0900 @@ -91,16 +91,16 @@ ; store recipred[] movh[r3], m0 -movhps [r3 + r5], m0 +movhps [r3 + r6], m0 ; store recqt[] movh[r2], m0 -movhps [r2 + r6], m0 +movhps [r2 + r5], m0 lea r0, [r0 + r4 * 2] lea r1, [r1 + r4 * 2] -lea r2, [r2 + r6 * 2] -lea r3, [r3 + r5 * 2] +lea r2, [r2 + r5 * 2] +lea r3, [r3 + r6 * 2] dec t7b jnz.loop @@ -291,7 +291,7 @@ ; store recqt[] movu[r2], m0 movu[r2 + 16], m1 -add r2, r6 +add r2, r5 lea r0, [r0 + r4 * 2] lea r1, [r1 + r4 * 2] ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] cli: set aq strength to 0 in CQP
On Wed, Apr 2, 2014 at 5:20 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1396439378 -19800 # Wed Apr 02 17:19:38 2014 +0530 # Node ID 32ba982c5279dce5abd718453b3c1a6affb51ce5 # Parent 03bad90e94adce6fb89c6d5edb86919a1e783402 param: set aq strength to 0 in CQP diff -r 03bad90e94ad -r 32ba982c5279 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Wed Apr 02 06:51:35 2014 +0530 +++ b/source/encoder/encoder.cpp Wed Apr 02 17:19:38 2014 +0530 @@ -1415,6 +1415,7 @@ p-rc.aqMode = X265_AQ_NONE; p-rc.bitrate = 0; p-rc.cuTree = 0; +p-rc.aqStrength = 0; } if (p-rc.aqMode == 0 p-rc.cuTree) @@ -1434,6 +1435,11 @@ p-rc.aqMode = X265_AQ_NONE; } +if (p-rc.aqMode = X265_AQ_NONE p-rc.cuTree == 0) typo above (==), please send a new patch. +{ +p-rc.aqStrength = 0; +} + if (p-internalCsp != X265_CSP_I420) { x265_log(p, X265_LOG_WARNING, !! HEVC Range Extension specifications are not finalized !!\n); On Wed, Apr 2, 2014 at 4:46 PM, Tim Walker tdskywal...@gmail.com wrote: On 02 Apr 2014, at 08:11, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1396419089 -19800 # Wed Apr 02 11:41:29 2014 +0530 # Node ID 4d614af01c0c101b7862cd9aa5675457b14af1d4 # Parent 03bad90e94adce6fb89c6d5edb86919a1e783402 cli: set aq strength to 0 in CQP The commit message is wrong. This seems to affect libx265, not the command-line interface. Tim ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] cli: set aq strength to 0 in CQP
Never mind, fixed. On Wed, Apr 2, 2014 at 5:37 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: On Wed, Apr 2, 2014 at 5:20 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1396439378 -19800 # Wed Apr 02 17:19:38 2014 +0530 # Node ID 32ba982c5279dce5abd718453b3c1a6affb51ce5 # Parent 03bad90e94adce6fb89c6d5edb86919a1e783402 param: set aq strength to 0 in CQP diff -r 03bad90e94ad -r 32ba982c5279 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Wed Apr 02 06:51:35 2014 +0530 +++ b/source/encoder/encoder.cpp Wed Apr 02 17:19:38 2014 +0530 @@ -1415,6 +1415,7 @@ p-rc.aqMode = X265_AQ_NONE; p-rc.bitrate = 0; p-rc.cuTree = 0; +p-rc.aqStrength = 0; } if (p-rc.aqMode == 0 p-rc.cuTree) @@ -1434,6 +1435,11 @@ p-rc.aqMode = X265_AQ_NONE; } +if (p-rc.aqMode = X265_AQ_NONE p-rc.cuTree == 0) typo above (==), please send a new patch. +{ +p-rc.aqStrength = 0; +} + if (p-internalCsp != X265_CSP_I420) { x265_log(p, X265_LOG_WARNING, !! HEVC Range Extension specifications are not finalized !!\n); On Wed, Apr 2, 2014 at 4:46 PM, Tim Walker tdskywal...@gmail.com wrote: On 02 Apr 2014, at 08:11, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1396419089 -19800 # Wed Apr 02 11:41:29 2014 +0530 # Node ID 4d614af01c0c101b7862cd9aa5675457b14af1d4 # Parent 03bad90e94adce6fb89c6d5edb86919a1e783402 cli: set aq strength to 0 in CQP The commit message is wrong. This seems to affect libx265, not the command-line interface. Tim ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] vbv: log frame-average QPs for VBV; even when AQ is disabled
Sure. On Fri, Apr 11, 2014 at 11:41 AM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: hello, reminder : this bug-fix is still pending. can we have this patch pushed in to fix the bug, if there is no problems with it? On Tue, Apr 8, 2014 at 8:33 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1396969086 -19800 # Tue Apr 08 20:28:06 2014 +0530 # Node ID cac0dcd5a5c2470194d58057d9decd38da3e4405 # Parent b5caca9954f36fc8e1cfb9e25f96288bf3aa18e2 vbv: log frame-average QPs for VBV; even when AQ is disabled. diff -r b5caca9954f3 -r cac0dcd5a5c2 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppTue Apr 08 16:13:11 2014 +0530 +++ b/source/encoder/ratecontrol.cppTue Apr 08 20:28:06 2014 +0530 @@ -1027,6 +1027,8 @@ pic-m_avgQpRc /= (pic-getFrameHeightInCU() * pic-getFrameWidthInCU()); rce-qpaRc = pic-m_avgQpRc; +// copy avg RC qp to m_avgQpAq. To print out the correct qp when aq/cutree is disabled. +pic-m_avgQpAq = pic-m_avgQpRc; } if (pic-m_qpaAq) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] fix: constrained intra
Thanks, verified and pushed. Do you mind including some more information on this option to the docs? This only affects intra decisions in inter-slices, I think? Deepthi On Tue, Apr 15, 2014 at 10:16 AM, Satoshi Nakagawa nakagawa...@oki.comwrote: Was it generating hash mistakes without these fixes? Yes, when encoding with --constraind-intra option. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Tuesday, April 15, 2014 1:29 PM To: Development for x265 Subject: Re: [x265] fix: constrained intra On Mon, Apr 14, 2014 at 10:36 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1397532876 -32400 # Tue Apr 15 12:34:36 2014 +0900 # Node ID 5dde9f4817813e96116df5b86925fa3fc5eff2a8 # Parent 08d64a70594ed31cd80046bd4a7e9fa52119be47 fix: constrained intra Was it generating hash mistakes without these fixes? diff -r 08d64a70594e -r 5dde9f481781 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Apr 14 13:18:18 2014 -0500 +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Apr 15 12:34:36 2014 +0900 @@ -79,12 +79,24 @@ int leftUnits = cuHeightInUnits 1; partIdxLB= g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)]; -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred()) +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +} +else +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +} width = cuWidth2 + 1; height = cuHeight2 + 1; @@ -238,12 +250,24 @@ int leftUnits = cuHeightInUnits 1; partIdxLB= g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)]; -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred()) +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
Re: [x265] [PATCH] vbv:reinit cu data when row vbv re-rencode is triggered
I see what you're trying to do here. m_baseQP is always zero in normal vbv operation, but contains previous QP values during re-encode. A better solution would be to find out which field in TComDataCU actually contains uncleared values, and clear them in TComDataCU::initCU. On Sat, Apr 19, 2014 at 1:07 AM, Steve Borho st...@borho.org wrote: On Fri, Apr 18, 2014 at 7:31 AM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1397824258 -19800 # Fri Apr 18 18:00:58 2014 +0530 # Node ID 3d680a36ea9acc45f3db128a5efe557a79a3026b # Parent 03aa222ff8eb5ffbe65bd5bf522cad6561210024 vbv:reinit cu data when row vbv re-rencode is triggered. space after colon no need for end punctuation on summary line diff -r 03aa222ff8eb -r 3d680a36ea9a source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Apr 18 17:32:14 2014 +0530 +++ b/source/encoder/frameencoder.cpp Fri Apr 18 18:00:58 2014 +0530 @@ -1100,6 +1100,8 @@ { if (!row) m_pic-m_rowDiagQp[row] = m_pic-m_avgQpRc; +if (cu-m_baseQp != 0) +cu-initEstData(0); this doesn't look to be the appropriate place for this sort of reset. If it needs to happen after a row restart shouldn't it be done in the restart loop below? Here it looks like it could be calling initEstData() many times, redundantly. if (row = col row m_vbvResetTriggerRow != row) cu-m_baseQp = m_pic-getCU(cuAddr - numCols + 1)-m_baseQp; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] cutree: adjust the rowSatd with qpoffset only for reference frames
Thanks a lot, yes, it is a typo. On May 16, 2014 6:20 AM, den c dnc...@gmail.com wrote: On 5/15/14, g...@multicorewareinc.com g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy # Date 1400146948 -19800 # Thu May 15 15:12:28 2014 +0530 # Node ID 794b7d744a14c653f76748b4ec3033b040c3e783 # Parent 6ca880d7e68361e5b9e2353f05369d9e5c40a29f cutree: adjust the rowSatd with qpoffset only for reference frames diff -r 6ca880d7e683 -r 794b7d744a14 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cppThu May 15 12:31:06 2014 +0530 +++ b/source/encoder/slicetype.cppThu May 15 15:12:28 2014 +0530 @@ -1148,7 +1148,7 @@ { int64_t score = 0; int *rowSatd = frames[b]-rowSatds[b - p0][p1 - b]; -double *qp_offset = IS_X265_TYPE_B(frames[b]-sliceType) ? frames[b]-qpAqOffset : frames[b]-qpOffset; +double *qp_offset = (frames[b]-sliceType = X265_TYPE_B) ? I think this is typo and needed: (frames[b]-sliceType == X265_TYPE_B)). Correct me if i worng. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] fix : square chroma transform expected error message
Can we have more detail on this, Ashok? Both trWidthC, trHeightC and widthC and heightC should have the same values. Why exactly was this assert being triggered in the regression tests? On Mon, May 19, 2014 at 7:19 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1400507347 -19800 # Mon May 19 19:19:07 2014 +0530 # Node ID 8647c7861144eee4a0f96687794607b3e98d7b9f # Parent ba2a9f61ea06f0ac799d8c0247eec770065465bb fix : square chroma transform expected error message diff -r ba2a9f61ea06 -r 8647c7861144 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri May 16 19:20:46 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon May 19 19:19:07 2014 +0530 @@ -2975,7 +2975,7 @@ else { int16_t *ptr = resiYuv-getCbAddr(absTUPartIdxC); -X265_CHECK(trWidthC == trHeightC, square chroma transform expected\n); +X265_CHECK(widthC == heightC, square chroma transform expected\n); primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv-m_cwidth, 0); } if (absSumV) @@ -2991,7 +2991,7 @@ else { int16_t *ptr = resiYuv-getCrAddr(absTUPartIdxC); -X265_CHECK(trWidthC == trHeightC, square chroma transform expected\n); +X265_CHECK(widthC == heightC, square chroma transform expected\n); primitives.blockfill_s[(int)g_convertToBit[trWidthC]](ptr, resiYuv-m_cwidth, 0); } cu-setCbfPartRange(absSumU ? setCbf : 0, TEXT_CHROMA_U, absTUPartIdxC, tuIterator.m_absPartIdxStep); @@ -3348,7 +3348,7 @@ { int16_t *ptr = m_qtTempShortYuv[qtlayer].getCbAddr(tuIterator.m_absPartIdxTURelCU); const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth; -X265_CHECK(trWidthC == trHeightC, square chroma transform expected\n); +X265_CHECK(widthC == heightC, square chroma transform expected\n); primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0); } @@ -3416,7 +3416,7 @@ { int16_t *ptr = m_qtTempShortYuv[qtlayer].getCrAddr(tuIterator.m_absPartIdxTURelCU); const uint32_t stride = m_qtTempShortYuv[qtlayer].m_cwidth; -X265_CHECK(trWidthC == trHeightC, square chroma transform expected\n); +X265_CHECK(widthC == heightC, square chroma transform expected\n); primitives.blockfill_s[(int)g_convertToBit[widthC]](ptr, stride, 0); } ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] CLI: Default rate control mode in help output - CQ or CRF?
On Tue, May 20, 2014 at 1:30 PM, Mario *LigH* Rohkrämer cont...@ligh.dewrote: In the help output of the x265 CLI, I see both default values for CRF (28.0) and CQ (32). Are they valid at the same time? Or is the default QP outdated since CRF is the default rate control mode? Yes, CRF is the default mode. With nothing specified --crf 32 is the default for ratecontrol. I'm removing the default QP value to avoid confusion. I believe if I explicitly define either a CRF or CQ value, this will set the rate control mode to either CRF or CQ depending which comes last in the command line? Correct. -- Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] CLI: Default rate control mode in help output - CQ or CRF?
Sorry, yes, typo: --crf 28.0 is the default. On Tue, May 20, 2014 at 3:14 PM, Mario *LigH* Rohkrämer cont...@ligh.dewrote: Am 20.05.2014, 11:30 Uhr, schrieb Deepthi Nandakumar deep...@multicorewareinc.com: With nothing specified --crf 32 is the default for ratecontrol. No, actually --crf 28.0, I believe? -- Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] psyrd: use psyrdcost for intra for rdLevels 5
psy-rd is enabled only in full-rdo decisions (rdLevel = 5). Your patch calculates intra-psy rd cost for rd=4 and lower, which is where xEncodeIntrainInter is called. On Thu, May 22, 2014 at 3:31 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1400752845 -19800 # Node ID fc400f71de9dcbaeda5c0669edabd27e288cdf2c # Parent f39484bb3eecc8cfca0448c63f16fe8dacc54d7f psyrd: use psyrdcost for intra for rdLevels 5 diff -r f39484bb3eec -r fc400f71de9d source/encoder/compress.cpp --- a/source/encoder/compress.cpp Tue May 20 22:02:00 2014 -0500 +++ b/source/encoder/compress.cpp Thu May 22 15:30:45 2014 +0530 @@ -70,7 +70,18 @@ m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); -cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +int part = g_convertToBit[cu-getCUSize(0)]; +uint32_t psyRdCost = m_rdCost-psyCost(part, m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), +m_tmpRecoYuv[depth]-getLumaAddr(), m_tmpRecoYuv[depth]-getStride()); +cu-m_totalCost = m_rdCost-calcPsyRdCost(cu-m_totalDistortion, cu-m_totalBits, psyRdCost); + +} +else +{ +cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +} } void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] psyrd: use psyrdcost for intra for rdLevels 5
On Thu, May 22, 2014 at 4:23 PM, Sumalatha Polureddy sumala...@multicorewareinc.com wrote: psy-rd is enabled for full-rdo decision(rd=5) and for rd= 4 This is a bug - psy-rd should be enabled only for rd=5. Fixing now. m_rdCost.setPsyRdScale(top-param-rdLevel = 4 ? top-param-psyRd : 0); so this patch is required for rd=4 where inter uses psyrdcost and intra was using rdcost. For other lower rd levels 4, m_psyRdScale = 0, so psycost will not have much effect on RDcost On Thu, May 22, 2014 at 3:45 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: psy-rd is enabled only in full-rdo decisions (rdLevel = 5). Your patch calculates intra-psy rd cost for rd=4 and lower, which is where xEncodeIntrainInter is called. On Thu, May 22, 2014 at 3:31 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1400752845 -19800 # Node ID fc400f71de9dcbaeda5c0669edabd27e288cdf2c # Parent f39484bb3eecc8cfca0448c63f16fe8dacc54d7f psyrd: use psyrdcost for intra for rdLevels 5 diff -r f39484bb3eec -r fc400f71de9d source/encoder/compress.cpp --- a/source/encoder/compress.cpp Tue May 20 22:02:00 2014 -0500 +++ b/source/encoder/compress.cpp Thu May 22 15:30:45 2014 +0530 @@ -70,7 +70,18 @@ m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); -cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +int part = g_convertToBit[cu-getCUSize(0)]; +uint32_t psyRdCost = m_rdCost-psyCost(part, m_origYuv[depth]-getLumaAddr(), m_origYuv[depth]-getStride(), +m_tmpRecoYuv[depth]-getLumaAddr(), m_tmpRecoYuv[depth]-getStride()); +cu-m_totalCost = m_rdCost-calcPsyRdCost(cu-m_totalDistortion, cu-m_totalBits, psyRdCost); + +} +else +{ +cu-m_totalCost = m_rdCost-calcRdCost(cu-m_totalDistortion, cu-m_totalBits); +} } void TEncCu::xComputeCostIntraInInter(TComDataCU* cu, PartSize partSize) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH RFC] psyrd: use psyrdcost for selecting best mode across depth
On Mon, May 26, 2014 at 3:02 PM, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1401087565 -19800 # Node ID cbe5cc0e48b4122518ca732a1b32de16ba25c467 # Parent 5134e76aa729b6fece18701fdc00390c2f2ffb32 psyrd: use psyrdcost for selecting best mode across depth diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Thu May 22 21:46:21 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon May 26 12:29:25 2014 +0530 @@ -218,6 +218,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; m_numPartitions= pic-getNumPartInCU(); char* qp = pic-getCU(getAddr())-getQP(); m_baseQp = pic-getCU(getAddr())-m_baseQp; @@ -309,6 +310,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; uint8_t cuSize = g_maxCUSize depth; @@ -345,6 +347,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; uint8_t cuSize = g_maxCUSize depth; @@ -393,6 +396,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; m_numPartitions= cu-getTotalNumPart() 2; for (int i = 0; i 4; i++) @@ -457,6 +461,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; m_numPartitions= cu-getTotalNumPart() 2; for (int i = 0; i 4; i++) @@ -520,6 +525,7 @@ m_sa8dCost = MAX_INT64; m_totalDistortion = 0; m_totalBits= 0; +m_psyCost = 0; m_numPartitions= cu-getTotalNumPart() 2; TComDataCU* rpcCU = m_pic-getCU(m_cuAddr); @@ -550,6 +556,7 @@ m_totalDistortion += cu-m_totalDistortion; m_totalBits+= cu-m_totalBits; +m_psyCost += cu-m_psyCost; uint32_t offset = cu-getTotalNumPart() * partUnitIdx; uint32_t numPartition = cu-getTotalNumPart(); @@ -610,6 +617,7 @@ rpcCU-m_totalCost = m_totalCost; rpcCU-m_totalDistortion = m_totalDistortion; rpcCU-m_totalBits = m_totalBits; +rpcCU-m_psyCost = m_psyCost; int sizeInBool = sizeof(bool) * m_numPartitions; int sizeInChar = sizeof(char) * m_numPartitions; @@ -695,6 +703,7 @@ cu-m_totalCost = m_totalCost; cu-m_totalDistortion = m_totalDistortion; cu-m_totalBits = m_totalBits; +cu-m_psyCost = m_psyCost; int sizeInBool = sizeof(bool) * qNumPart; int sizeInChar = sizeof(char) * qNumPart; diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hThu May 22 21:46:21 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.hMon May 26 12:29:25 2014 +0530 @@ -168,6 +168,7 @@ uint64_t m_totalCost; /// sum of partition RD costs uint32_t m_totalDistortion; /// sum of partition distortion uint32_t m_totalBits; /// sum of partition signal bits +uint32_t m_psyCost; uint64_t m_avgCost[4]; // stores the avg cost of CU's in frame for each depth uint32_t m_count[4]; uint64_t m_sa8dCost; diff -r 5134e76aa729 -r cbe5cc0e48b4 source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Thu May 22 21:46:21 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Mon May 26 12:29:25 2014 +0530 @@ -601,7 +601,14 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits -outBestCU-m_totalCost = m_rdCost-calcRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +outBestCU-m_totalCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, outBestCU-m_psyCost); If our objective is to catch unintended comparisons between psyCost and regular RD cost, you should save the above cost (with psyRd enabled) into m_psyCost. +} +else +{ +outBestCU-m_totalCost = m_rdCost-calcRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits); +} } outTempCU-initEstData(depth); @@ -648,7 +655,14 @@ m_entropyCoder-encodeSplitFlag(outTempCU, 0, depth); outTempCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits } -outTempCU-m_totalCost = m_rdCost-calcRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits); +if (m_rdCost-psyRdEnabled()) +{ +
Re: [x265] [PATCH] param: log CBR if vbvMaxrate, abr bitrate and vbv bufsize are equal
CBR is defined when vbvMaxRate = Bitrate, the buffer size need not necessarily be 1-sec. On Thu, Jun 5, 2014 at 12:14 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1401950608 -19800 # Thu Jun 05 12:13:28 2014 +0530 # Node ID 21c0d24fe9f6b8c20115b64f36c95e184b7aa78a # Parent 817c25f5ace136c60100dabb128dda6056c86bfb param: log CBR if vbvMaxrate, abr bitrate and vbv bufsize are equal diff -r 817c25f5ace1 -r 21c0d24fe9f6 source/common/param.cpp --- a/source/common/param.cpp Thu Jun 05 11:43:17 2014 +0530 +++ b/source/common/param.cpp Thu Jun 05 12:13:28 2014 +0530 @@ -1130,7 +1130,11 @@ else switch (param-rc.rateControlMode) { case X265_RC_ABR: -x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength / CUTree : ABR-%d kbps / %0.1f / %d\n, param-rc.bitrate, +if (param-rc.vbvMaxBitrate == param-rc.bitrate param-rc.vbvMaxBitrate == param-rc.vbvBufferSize) +x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength / CUTree : CBR-%d kbps / %0.1f / %d\n, param-rc.bitrate, + param-rc.aqStrength, param-rc.cuTree); +else +x265_log(param, X265_LOG_INFO, Rate Control / AQ-Strength / CUTree : ABR-%d kbps / %0.1f / %d\n, param-rc.bitrate, param-rc.aqStrength, param-rc.cuTree); break; case X265_RC_CQP: ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 6] rc: define default setting and validations for 2 pass states
On Mon, Jun 16, 2014 at 12:20 AM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1402857077 -19800 # Mon Jun 16 00:01:17 2014 +0530 # Node ID d1f636a6dc0a11657bb25be650de60345c2952c8 # Parent 438a03ff94830fbc17457b6f324397e643c17cba rc: define default setting and validations for 2 pass states diff -r 438a03ff9483 -r d1f636a6dc0a source/common/param.cpp --- a/source/common/param.cpp Sun Jun 15 23:59:14 2014 +0530 +++ b/source/common/param.cpp Mon Jun 16 00:01:17 2014 +0530 @@ -180,7 +180,12 @@ param-rc.cuTree = 1; param-rc.rfConstantMax = 0; param-rc.rfConstantMin = 0; - +param-rc.pszStatIn = x265_2pass.log; +param-rc.pszStatOut = x264_2pass.log; +param-rc.complexityBlur = 20; +param-rc.qblur = 0.5; +param-rc.statRead = false; +param-rc.statWrite = false; /* Quality Measurement Metrics */ param-bEnablePsnr = 0; param-bEnableSsim = 0; @@ -414,6 +419,18 @@ return 0; } +extern C +void x265_param_apply_fastfirstpass(x265_param *param) +{ +/* Set faster options in case of turbo firstpass. */ +if( param-rc.statWrite !param-rc.statRead ) +{ +param-maxNumReferences = 1; +param-searchMethod = X265_DIA_SEARCH; +param-bEnableEarlySkip = 1; +} / Hmm, making 2-pass work correctly is priority. We can focus on these turbo first-pass enhancements later? This could affect bitrate predictions. +} + static int x265_atobool(const char *str, bool bError) { @@ -911,6 +928,8 @@ if (s) x265_log(param, X265_LOG_WARNING, --tune %s should be used if attempting to benchmark %s!\n, s, s); } +if (param-bOpenGOP param-rc.statRead) +param-lookaheadDepth = 0; CHECK(param-rc.qp -6 * (param-internalBitDepth - 8) || param-rc.qp 51, QP exceeds supported range (-QpBDOffsety to 51)); @@ -966,7 +985,7 @@ Rate control mode is out of range); CHECK(param-rdLevel 0 || param-rdLevel 6, RD Level is out of range); -CHECK(param-bframes param-lookaheadDepth, +CHECK(param-bframes param-lookaheadDepth !param-rc.statRead, Lookahead depth must be greater than the max consecutive bframe count); CHECK(param-bframes 0, bframe count should be greater than zero); @@ -1045,6 +1064,9 @@ Target bitrate can not be less than zero); if (param-noiseReduction) CHECK(100 param-noiseReduction || param-noiseReduction 1000, Valid noise reduction range 100 - 1000); +CHECK(param-rc.rateControlMode == X265_RC_CRF param-rc.statRead, + Constant rate-factor is incompatible with 2pass); + And CQP also? return check_failed; } diff -r 438a03ff9483 -r d1f636a6dc0a source/x265.cpp --- a/source/x265.cpp Sun Jun 15 23:59:14 2014 +0530 +++ b/source/x265.cpp Mon Jun 16 00:01:17 2014 +0530 @@ -591,6 +591,7 @@ } #endif // if HIGH_BIT_DEPTH +x265_param_apply_fastfirstpass(param); InputFileInfo info; info.filename = inputfn; info.depth = inputBitDepth; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 3] lambda: change chroma lambda distortion weighting to resemble x264
Agreed, thats what I thought too. But the g_chromatable is not in the HEVC spec at all, it's just carried over from HM. On Jun 16, 2014 5:38 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: On Mon, Jun 16, 2014 at 4:49 PM, deep...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1402916716 -19800 # Mon Jun 16 16:35:16 2014 +0530 # Node ID 4d76a9c8b5abbf143e5869d55cf80a8816d99a68 # Parent ff3a85f715d43e2c21aec295426ae9dbe7c03d75 lambda: change chroma lambda distortion weighting to resemble x264. 1. x264 scales the chroma distortion by a factor derived from a lambda offset table when psyRd is enabled. 2. This patch also removes the separate Cb and Cr distortion weights that were carried over from HM, and replaces it with 256 when psy-rd is disabled, and the above-mentioned lambda offset when it is enabled. diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:12:00 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:35:16 2014 +0530 @@ -143,16 +143,22 @@ return false; } -void TEncSearch::setQP(int qp, double crWeight, double cbWeight) +void TEncSearch::setQP(int qp) { -double lambda2 = x265_lambda2_tab[qp]; -double chromaLambda = lambda2 / crWeight; +double lambda2 = x265_lambda2_tab[qp]; + +#define SPEC_QP(x) X265_MIN(x, QP_MAX_SPEC) +int effective_chroma_qp = chroma_qp_table[SPEC_QP(qp)] + X265_MAX(qp - QP_MAX_SPEC, 0); +double chromaLambda = x265_lambda2_tab[effective_chroma_qp]; +int chroma_offset_idx = X265_MIN (qp - effective_chroma_qp + 12, MAX_CHROMA_LAMBDA_OFFSET); +uint64_t chromaWeight = m_rdCost-psyRdEnabled() ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; +#undef SPEC_QP Luma to chroma qp mapping should be done as mentioned in the HEVC standards. the array chroma_qp_table[] is written according to H.264 spec. We cant be using that for HEVC. g_chromaScale[chFmt][qpc]) - gives the correct mapping of chroma qp as per HEVC spec. chroma qp needs to be obtained from luma qp (0-69) before it can be clipped to QP_MAX_SPEC for HEVC. m_me.setQP(qp); m_trQuant-setLambda(lambda2, chromaLambda); m_rdCost-setLambda(lambda2, x265_lambda_tab[qp]); -m_rdCost-setCbDistortionWeight(cbWeight); -m_rdCost-setCrDistortionWeight(crWeight); +m_rdCost-setCbDistortionWeight(chromaWeight); +m_rdCost-setCrDistortionWeight(chromaWeight); } void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma) diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 16 16:12:00 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 16 16:35:16 2014 +0530 @@ -142,7 +142,7 @@ void setRDGoOnSbacCoder(TEncSbac* rdGoOnSbacCoder) { m_rdGoOnSbacCoder = rdGoOnSbacCoder; } -void setQP(int QP, double crWeight, double cbWeight); +void setQP(int QP); TEncSearch(); virtual ~TEncSearch(); diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Jun 16 16:12:00 2014 +0530 +++ b/source/encoder/frameencoder.cpp Mon Jun 16 16:35:16 2014 +0530 @@ -363,22 +363,8 @@ } void FrameEncoder::setLambda(int qp, int row) -{ -TComSlice* slice = m_pic-getSlice(); -int chFmt = slice-getSPS()-getChromaFormatIdc(); - -// for RDO -// in RdCost there is only one lambda because the luma and chroma bits are not separated, -// instead we weight the distortion of chroma. -int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() + slice-getSliceQpDeltaCb(); -int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset - -chromaQPOffset = slice-getPPS()-getChromaCrQpOffset() + slice-getSliceQpDeltaCr(); -qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double crWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset - -m_rows[row].m_search.setQP(qp, crWeight, cbWeight); +{ +m_rows[row].m_search.setQP(qp); } void FrameEncoder::compressFrame() @@ -387,7 +373,6 @@ int64_t startCompressTime = x265_mdate(); TEncEntropy* entropyCoder = getEntropyCoder(0); TComSlice* slice = m_pic-getSlice(); -int chFmt = slice-getSPS()-getChromaFormatIdc(); int totalCoded= (int)m_top-m_encodedFrameNum - 1; m_nalCount = 0; @@ -515,21 +500,13 @@ } int qp = slice-getSliceQp
Re: [x265] [PATCH 2 of 3] lambda: change chroma lambda distortion weighting to resemble x264
Ok, thanks. The g_chromaScale table is likely constructed from the H.265 range extension spec, so I was looking in the wrong place. So, the QPs will be reverted but the chroma lambda changes will still hold. New patch in the works. On Mon, Jun 16, 2014 at 8:10 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: pg 155 of HEVC standard: Table 8-9 – Specification of QpC as a function of qPi qPi 30 30 31 32 33 34 35 36 37 38 39 40 41 42 43 43 QpC = qPi 29 30 31 32 33 33 34 34 35 35 36 36 37 37 = qPi − 6 AFAIK, values in g_chromaScale[CHROMA_420] follows the spec exactly. not sure how the values for other chroma formats were derived. On Mon, Jun 16, 2014 at 7:42 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Agreed, thats what I thought too. But the g_chromatable is not in the HEVC spec at all, it's just carried over from HM. On Jun 16, 2014 5:38 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: On Mon, Jun 16, 2014 at 4:49 PM, deep...@multicorewareinc.com wrote: # HG changeset patch # User Deepthi Nandakumar deep...@multicorewareinc.com # Date 1402916716 -19800 # Mon Jun 16 16:35:16 2014 +0530 # Node ID 4d76a9c8b5abbf143e5869d55cf80a8816d99a68 # Parent ff3a85f715d43e2c21aec295426ae9dbe7c03d75 lambda: change chroma lambda distortion weighting to resemble x264. 1. x264 scales the chroma distortion by a factor derived from a lambda offset table when psyRd is enabled. 2. This patch also removes the separate Cb and Cr distortion weights that were carried over from HM, and replaces it with 256 when psy-rd is disabled, and the above-mentioned lambda offset when it is enabled. diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:12:00 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Jun 16 16:35:16 2014 +0530 @@ -143,16 +143,22 @@ return false; } -void TEncSearch::setQP(int qp, double crWeight, double cbWeight) +void TEncSearch::setQP(int qp) { -double lambda2 = x265_lambda2_tab[qp]; -double chromaLambda = lambda2 / crWeight; +double lambda2 = x265_lambda2_tab[qp]; + +#define SPEC_QP(x) X265_MIN(x, QP_MAX_SPEC) +int effective_chroma_qp = chroma_qp_table[SPEC_QP(qp)] + X265_MAX(qp - QP_MAX_SPEC, 0); +double chromaLambda = x265_lambda2_tab[effective_chroma_qp]; +int chroma_offset_idx = X265_MIN (qp - effective_chroma_qp + 12, MAX_CHROMA_LAMBDA_OFFSET); +uint64_t chromaWeight = m_rdCost-psyRdEnabled() ? x265_chroma_lambda2_offset_tab[chroma_offset_idx] : 256; +#undef SPEC_QP Luma to chroma qp mapping should be done as mentioned in the HEVC standards. the array chroma_qp_table[] is written according to H.264 spec. We cant be using that for HEVC. g_chromaScale[chFmt][qpc]) - gives the correct mapping of chroma qp as per HEVC spec. chroma qp needs to be obtained from luma qp (0-69) before it can be clipped to QP_MAX_SPEC for HEVC. m_me.setQP(qp); m_trQuant-setLambda(lambda2, chromaLambda); m_rdCost-setLambda(lambda2, x265_lambda_tab[qp]); -m_rdCost-setCbDistortionWeight(cbWeight); -m_rdCost-setCrDistortionWeight(crWeight); +m_rdCost-setCbDistortionWeight(chromaWeight); +m_rdCost-setCrDistortionWeight(chromaWeight); } void TEncSearch::xEncSubdivCbfQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height, bool bLuma, bool bChroma) diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 16 16:12:00 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Jun 16 16:35:16 2014 +0530 @@ -142,7 +142,7 @@ void setRDGoOnSbacCoder(TEncSbac* rdGoOnSbacCoder) { m_rdGoOnSbacCoder = rdGoOnSbacCoder; } -void setQP(int QP, double crWeight, double cbWeight); +void setQP(int QP); TEncSearch(); virtual ~TEncSearch(); diff -r ff3a85f715d4 -r 4d76a9c8b5ab source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Jun 16 16:12:00 2014 +0530 +++ b/source/encoder/frameencoder.cpp Mon Jun 16 16:35:16 2014 +0530 @@ -363,22 +363,8 @@ } void FrameEncoder::setLambda(int qp, int row) -{ -TComSlice* slice = m_pic-getSlice(); -int chFmt = slice-getSPS()-getChromaFormatIdc(); - -// for RDO -// in RdCost there is only one lambda because the luma and chroma bits are not separated, -// instead we weight the distortion of chroma. -int chromaQPOffset = slice-getPPS()-getChromaCbQpOffset() + slice-getSliceQpDeltaCb(); -int qpc = Clip3(0, MAX_MAX_QP, qp + chromaQPOffset); -double cbWeight = pow(2.0, (qp - g_chromaScale[chFmt][qpc]) / 3.0); // takes into account of the chroma qp mapping and chroma qp Offset
Re: [x265] [PATCH] rc: introduce param variables for 2 pass
Need to bump up X265_BUILD On Tue, Jun 17, 2014 at 8:06 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1402997162 -19800 # Tue Jun 17 14:56:02 2014 +0530 # Node ID eb3b6d39b83102dfc7c665e3055c1ffda3322e3e # Parent 3a19a9fdb103979e65a9daf15c46c0735e8d743e rc: introduce param variables for 2 pass diff -r 3a19a9fdb103 -r eb3b6d39b831 source/x265.h --- a/source/x265.h Tue Jun 17 14:07:26 2014 +0530 +++ b/source/x265.h Tue Jun 17 14:56:02 2014 +0530 @@ -742,11 +742,25 @@ /* In CRF mode, maximum CRF as caused by VBV. 0 implies no limit */ doublerfConstantMax; - /* In CRF mode, minimum CRF as caused by VBV */ doublerfConstantMin; + +/* 2pass */ + +/* Filename of the 2pass output stats file. It's null during the final pass when + * stats file isn't written.*/ +char*statOutFileName; + +/* Filename of the 2pass input stats file that is used to load the data for subsequent passes. + * It's null during the first pass of a multipass encode. */ +char*statInFileName; + +/* temporally blur quants */ +double qblur; + +/* temporally blur complexity */ +floatcomplexityBlur; } rc; - /*== Video Usability Information ==*/ struct { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
This is primarily a visual quality improvement/psy-rd hack. In 444, since chroma resolution is on par with luma, and our eyes arent very sensitive to chroma, we increase the chroma QP so that those bits can be used up in luma. On Wed, Jun 25, 2014 at 4:35 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/25/2014 1:22 AM, deep...@multicorewareinc.com wrote: +/* In 444, chroma gets twice as much resolution, so halve quality when psy-rd is enabled */ +if (p-internalCsp == X265_CSP_I444 p-psyRd) +{ +p-cbQpOffset += 6; +p-crQpOffset += 6; +} I dont really understand what the reasoning is for this? Is it just to make it fit with the model psy-rd is currently using? - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
In a sense, psy-rd encapsulates all those r-d algorithms/tweaks/hacks that improve visual quality but may hurt objective metrics like psnr/ssim. In 444, this qp hack is likely to hurt objective metrics, hence it's turned on only if psychovisual improvement is desired. On Jun 25, 2014 7:02 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/25/2014 12:10 PM, Deepthi Nandakumar wrote: This is primarily a visual quality improvement/psy-rd hack. In 444, since chroma resolution is on par with luma, and our eyes arent very sensitive to chroma, we increase the chroma QP so that those bits can be used up in luma. Yah I get the idea of a chroma qp offset, I'm just wondering why it is specific to psy-rd? Cheers, - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 2] pass TLD into class FrameFilter
On Tue, Jun 24, 2014 at 5:36 AM, Min Chen chenm...@163.com wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1403568362 25200 # Node ID efa48bc0245bded1418db3c42b042acb9969146c # Parent 12c1d8aaa8f56a8f2de74c8ff1451d99d04c817d pass TLD into class FrameFilter diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/cturow.h --- a/source/encoder/cturow.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/cturow.h Mon Jun 23 17:06:02 2014 -0700 @@ -47,6 +47,10 @@ RDCost m_rdCost; TComTrQuant m_trQuant; +// NOTE: the maximum LCU 64x64 have 256 partitions +boolm_edgeFilter[256]; +uint8_t m_blockingStrength[256]; + void init(Encoder); ~ThreadLocalData(); }; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/encoder.cppMon Jun 23 17:06:02 2014 -0700 @@ -42,6 +42,7 @@ #include x265.h using namespace x265; +ThreadLocalData* Encoder::m_threadLocalData; Encoder::Encoder() { @@ -194,9 +195,10 @@ if (m_frameEncoder) { int numRows = (m_param-sourceHeight + g_maxCUSize - 1) / g_maxCUSize; +int numCols = (m_param-sourceWidth + g_maxCUSize - 1) / g_maxCUSize; for (int i = 0; i m_param-frameNumThreads; i++) { -if (!m_frameEncoder[i].init(this, numRows)) +if (!m_frameEncoder[i].init(this, numRows, numCols)) { x265_log(m_param, X265_LOG_ERROR, Unable to initialize frame encoder, aborting\n); m_aborted = true; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/encoder.h --- a/source/encoder/encoder.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/encoder.h Mon Jun 23 17:06:02 2014 -0700 @@ -175,7 +175,7 @@ x265_param*m_param; RateControl* m_rateControl; -ThreadLocalData* m_threadLocalData; +static ThreadLocalData* m_threadLocalData; bool m_bEnableRDOQ; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/frameencoder.cpp Mon Jun 23 17:06:02 2014 -0700 @@ -80,15 +80,17 @@ stop(); } -bool FrameEncoder::init(Encoder *top, int numRows) +bool FrameEncoder::init(Encoder *top, int numRows, int numCols) { bool ok = true; m_top = top; m_param = top-m_param; m_numRows = numRows; +m_numCols = numCols; m_filterRowDelay = (m_param-saoLcuBasedOptimization m_param-saoLcuBoundary) ? 2 : (m_param-bEnableSAO || m_param-bEnableLoopFilter ? 1 : 0); +m_filterRowDelayCus = m_filterRowDelay * numCols; m_rows = new CTURow[m_numRows]; for (int i = 0; i m_numRows; ++i) @@ -505,7 +507,7 @@ // Extend border after whole-frame SAO is finished for (int row = 0; row m_numRows; row++) { -m_frameFilter.processRowPost(row); +m_frameFilter.processRowPost(row, 0); } } @@ -845,7 +847,7 @@ } // setup thread-local data -ThreadLocalData tld = threadId = 0 ? m_top-m_threadLocalData[threadId] : m_tld; +ThreadLocalData tld = threadId = 0 ? Encoder::m_threadLocalData[threadId] : m_tld; tld.m_trQuant.m_nr = m_nr; tld.m_search.m_mref = m_mref; codeRow.setThreadLocalData(tld); @@ -856,7 +858,8 @@ tld.m_cuCoder.m_log = tld.m_cuCoder.m_sliceTypeLog[m_frame-getSlice()-getSliceType()]; int64_t startTime = x265_mdate(); -const uint32_t numCols = m_frame-getPicSym()-getFrameWidthInCU(); +assert(m_frame-getPicSym()-getFrameWidthInCU() == m_numCols); +const uint32_t numCols = m_numCols; const uint32_t lineStartCUAddr = row * numCols; bool bIsVbv = m_param-rc.vbvBufferSize 0 m_param-rc.vbvMaxBitrate 0; diff -r 12c1d8aaa8f5 -r efa48bc0245b source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Mon Jun 23 17:03:49 2014 -0700 +++ b/source/encoder/frameencoder.h Mon Jun 23 17:06:02 2014 -0700 @@ -65,15 +65,15 @@ void setThreadPool(ThreadPool *p); -bool init(Encoder *top, int numRows); +bool init(Encoder *top, int numRows, int numCols); void destroy(); void processRowEncoder(int row, const int threadId); -void processRowFilter(int row) +void processRowFilter(int row, const int threadId) { -m_frameFilter.processRow(row); +m_frameFilter.processRow(row, threadId); } void enqueueRowEncoder(int row) @@ -108,7 +108,7 @@ } else { -processRowFilter(realRow); +processRowFilter(realRow, threadId); // NOTE: Active next row if (realRow != m_numRows - 1) @@ -154,6 +154,7 @@ bool
Re: [x265] [PATCH 2 of 3] rc: accumulate mv bits, coeff bits per frame
Pls fix extra newlines and whitespace nits. On Wed, Jun 25, 2014 at 10:54 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403716735 -19800 # Wed Jun 25 22:48:55 2014 +0530 # Node ID 0995efabd44470c1192994e1aceeb40ae606467f # Parent e71e34d02de228eab43edf1910a71a44417d rc: accumulate mv bits, coeff bits per frame. diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:48:55 2014 +0530 @@ -555,6 +555,7 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits + if (m_rdCost-psyRdEnabled()) { outBestCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, @@ -616,7 +617,7 @@ m_entropyCoder-encodeSplitFlag(outTempCU, 0, depth); outTempCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits } - + if (m_rdCost-psyRdEnabled()) { outTempCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits, @@ -907,6 +908,7 @@ m_entropyCoder-resetBits(); m_entropyCoder-encodeSplitFlag(outBestCU, 0, depth); outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits + if (m_rdCost-psyRdEnabled()) { outBestCU-m_totalPsyCost = m_rdCost-calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, @@ -1168,6 +1170,7 @@ return; } + if (slice-getPPS()-getTransquantBypassEnableFlag()) { m_entropyCoder-encodeCUTransquantBypassFlag(cu, absPartIdx); @@ -1390,12 +1393,14 @@ m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { @@ -1437,12 +1442,13 @@ m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); - +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { @@ -1492,12 +1498,14 @@ m_entropyCoder-encodeSkipFlag(outTempCU, 0); m_entropyCoder-encodePredMode(outTempCU, 0); m_entropyCoder-encodePartSize(outTempCU, 0, depth); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); m_entropyCoder-encodeIPCMInfo(outTempCU, 0); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); outTempCU-m_totalRDCost = m_rdCost-calcRdCost(outTempCU-m_totalDistortion, outTempCU-m_totalBits); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; xCheckDQP(outTempCU); xCheckBestMode(outBestCU, outTempCU, depth); diff -r e71e34d02d77 -r 0995efabd444 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:48:55 2014 +0530 @@ -4211,6 +4211,7 @@ } m_entropyCoder-encodeSkipFlag(cu, 0); m_entropyCoder-encodeMergeIndex(cu, 0); +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); return m_entropyCoder-getNumberOfWrittenBits(); } else @@ -4225,8 +4226,11 @@ m_entropyCoder-encodePartSize(cu, 0, cu-getDepth(0)); m_entropyCoder-encodePredInfo(cu, 0); bool bDummy = false; +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); m_entropyCoder-encodeCoeff(cu, 0, cu-getDepth(0), cu-getCUSize(0), bDummy); -return m_entropyCoder-getNumberOfWrittenBits(); +int totalBits = m_entropyCoder-getNumberOfWrittenBits(); +cu-m_coeffBits = totalBits -
Re: [x265] [PATCH 1 of 3] Chroma QP Offset: increase chroma QP when psy-rd is enabled
Ok, we dont have a separate psy option.yet On Jun 26, 2014 6:00 PM, Derek Buitenhuis derek.buitenh...@gmail.com wrote: On 6/26/2014 6:35 AM, BugMaster wrote: That is separate --psy (--no-psy) option in x264 and not --psy-rd Yeah, that was my point. :) - Derek ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] RDOQ enabled rdLevel changed
Yes, the rest of the patch added support for lower rd levels. On Fri, Jun 27, 2014 at 6:17 AM, Satoshi Nakagawa nakagawa...@oki.com wrote: Is this change intended? # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1403689018 -19800 # Wed Jun 25 15:06:58 2014 +0530 # Node ID e2ed009d296af39926d79f1a245974f158d6861a # Parent 5797d6a8197c96b68752705167ced6cb63194013 psy-rd: implement psy-rd in rdlevel=4,3 and 2 diff -r 5797d6a8197c -r e2ed009d296a source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Wed Jun 25 18:21:34 2014 +0530 +++ b/source/encoder/encoder.cpp Wed Jun 25 15:06:58 2014 +0530 @@ -1267,7 +1267,7 @@ } // disable RDOQ if psy-rd is enabled; until we make it psy-aware -m_bEnableRDOQ = p-psyRd == 0.0 p-rdLevel = 4; +m_bEnableRDOQ = p-psyRd == 0.0 p-rdLevel = 1; if (p-bLossless) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 3] rc: accumulate mv bits, coeff bits per frame
Thanks, can you merge with the latest tip? On Fri, Jun 27, 2014 at 12:13 AM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403808050 -19800 # Fri Jun 27 00:10:50 2014 +0530 # Node ID 11ddf73017d44933090a8943f4fc5098b231b56d # Parent 1b669c33ff3a8d8f6c9bd1e18979c009baed2433 rc: accumulate mv bits, coeff bits per frame diff -r 1b669c33ff3a -r 11ddf73017d4 source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Fri Jun 27 00:10:50 2014 +0530 @@ -1365,12 +1365,14 @@ m_entropyCoder-encodePredMode(outTempCU, 0); m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { @@ -1411,12 +1413,14 @@ m_entropyCoder-encodePredMode(outTempCU, 0); m_entropyCoder-encodePartSize(outTempCU, 0, depth); m_entropyCoder-encodePredInfo(outTempCU, 0); +outTempCU-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); m_entropyCoder-encodeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; if (m_rdCost-psyRdEnabled()) { diff -r 1b669c33ff3a -r 11ddf73017d4 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Fri Jun 27 00:10:50 2014 +0530 @@ -4059,6 +4059,7 @@ } m_entropyCoder-encodeSkipFlag(cu, 0); m_entropyCoder-encodeMergeIndex(cu, 0); +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); return m_entropyCoder-getNumberOfWrittenBits(); } else @@ -4073,8 +4074,11 @@ m_entropyCoder-encodePartSize(cu, 0, cu-getDepth(0)); m_entropyCoder-encodePredInfo(cu, 0); bool bDummy = false; +cu-m_mvBits = m_entropyCoder-getNumberOfWrittenBits(); m_entropyCoder-encodeCoeff(cu, 0, cu-getDepth(0), cu-getCUSize(0), bDummy); -return m_entropyCoder-getNumberOfWrittenBits(); +int totalBits = m_entropyCoder-getNumberOfWrittenBits(); +cu-m_coeffBits = totalBits - cu-m_mvBits; +return totalBits; } } diff -r 1b669c33ff3a -r 11ddf73017d4 source/encoder/compress.cpp --- a/source/encoder/compress.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/encoder/compress.cpp Fri Jun 27 00:10:50 2014 +0530 @@ -63,6 +63,7 @@ m_entropyCoder-encodePredMode(cu, 0); m_entropyCoder-encodePartSize(cu, 0, depth); m_entropyCoder-encodePredInfo(cu, 0); +cu-m_mvBits += m_entropyCoder-getNumberOfWrittenBits(); // Encode Coefficients bool bCodeDQP = getdQPFlag(); @@ -71,6 +72,7 @@ m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); cu-m_totalBits = m_entropyCoder-getNumberOfWrittenBits(); +cu-m_coeffBits = cu-m_totalBits - cu-m_mvBits; if (m_rdCost-psyRdEnabled()) { int part = g_convertToBit[cu-getCUSize(0)]; diff -r 1b669c33ff3a -r 11ddf73017d4 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Wed Jun 25 22:46:45 2014 +0530 +++ b/source/encoder/frameencoder.cpp Fri Jun 27 00:10:50 2014 +0530 @@ -694,6 +694,11 @@ // Store probabilities of second LCU in line into buffer if (col == 1 m_param-bEnableWavefront) getBufferSBac(lin)-loadContexts(getSbacCoder(subStrm)); + +// Collect Frame Stats for 2 pass +m_frame-m_stats.mvBits += cu-m_mvBits; +m_frame-m_stats.coeffBits += cu-m_coeffBits; +m_frame-m_stats.miscBits += cu-m_totalBits - (cu-m_mvBits + cu-m_coeffBits); } if (slice-getPPS()-getCabacInitPresentFlag()) On Thu, Jun 26, 2014 at 4:25 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Pls fix extra newlines and whitespace nits. On Wed, Jun 25, 2014 at 10:54 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalaiaar...@multicorewareinc.com # Date 1403716735 -19800 # Wed Jun 25 22:48:55 2014 +0530 # Node ID 0995efabd44470c1192994e1aceeb40ae606467f # Parent e71e34d02de228eab43edf1910a71a44417d rc
Re: [x265] [PATCH] framefilter: remove heap corruption in tld
The framefilter structure needs ThreadLocalData m_tld, that has to be initialised, and then used if wpp is not enabled. Not sure what you're trying to do here? On Wed, Jul 2, 2014 at 2:20 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: sry, Thg client didnt sync the changes properly for the previous commit. here is the right one # HG changeset patch # User Aarthi Thirumalai # Date 1404290172 -19800 # Wed Jul 02 14:06:12 2014 +0530 # Node ID a5a439242bbf367f5d76356b841cfa1ee9e119e4 # Parent a18972fd05b1d6242a881bef979b9e1ff17543d9 framefilter: remove heap corruption in tld diff -r a18972fd05b1 -r a5a439242bbf source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530 @@ -171,7 +171,7 @@ uint32_t m_checksum[3]; double m_elapsedCompressTime; // elapsed time spent in worker threads double m_frameTime; // wall time from frame start to finish - +ThreadLocalData m_tld; volatile boolm_bAllRowsStop; volatile int m_vbvResetTriggerRow; @@ -191,7 +191,6 @@ Bitstream* m_outStreams; NoiseReduction m_nr; NALList m_nalList; -ThreadLocalData m_tld; Frame* m_frame; diff -r a18972fd05b1 -r a5a439242bbf source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cpp Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/framefilter.cpp Wed Jul 02 14:06:12 2014 +0530 @@ -124,8 +124,7 @@ void FrameFilter::processRow(int row, const int threadId) { PPAScopeEvent(Thread_filterCU); -assert(threadId = 0); -ThreadLocalData tld = Encoder::m_threadLocalData[threadId]; +ThreadLocalData tld = threadId = 0 ? Encoder::m_threadLocalData[threadId] : m_frame-m_tld; if (!m_param-bEnableLoopFilter !m_param-bEnableSAO) { On Wed, Jul 2, 2014 at 2:15 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1404290172 -19800 # Wed Jul 02 14:06:12 2014 +0530 # Node ID 69d9bd3eb5bd015d2e0c90d51eec0d7f8a4747d0 # Parent a18972fd05b1d6242a881bef979b9e1ff17543d9 framefilter: remove heap corruption in tld diff -r a18972fd05b1 -r 69d9bd3eb5bd source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530 @@ -191,7 +191,6 @@ Bitstream* m_outStreams; NoiseReduction m_nr; NALList m_nalList; -ThreadLocalData m_tld; Frame* m_frame; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] framefilter: remove heap corruption in tld
Hmm, I'm wondering doesnt m_tld rightfully belong to Encoder? On Wed, Jul 2, 2014 at 3:39 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: Exactly that! make FrameEncoder::m_tld a public member for framefilter to access, use it in when Wpp is disabled. On Wed, Jul 2, 2014 at 2:25 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: The framefilter structure needs ThreadLocalData m_tld, that has to be initialised, and then used if wpp is not enabled. Not sure what you're trying to do here? On Wed, Jul 2, 2014 at 2:20 PM, Aarthi Priya Thirumalai aar...@multicorewareinc.com wrote: sry, Thg client didnt sync the changes properly for the previous commit. here is the right one # HG changeset patch # User Aarthi Thirumalai # Date 1404290172 -19800 # Wed Jul 02 14:06:12 2014 +0530 # Node ID a5a439242bbf367f5d76356b841cfa1ee9e119e4 # Parent a18972fd05b1d6242a881bef979b9e1ff17543d9 framefilter: remove heap corruption in tld diff -r a18972fd05b1 -r a5a439242bbf source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530 @@ -171,7 +171,7 @@ uint32_t m_checksum[3]; double m_elapsedCompressTime; // elapsed time spent in worker threads double m_frameTime; // wall time from frame start to finish - +ThreadLocalData m_tld; volatile boolm_bAllRowsStop; volatile int m_vbvResetTriggerRow; @@ -191,7 +191,6 @@ Bitstream* m_outStreams; NoiseReduction m_nr; NALList m_nalList; -ThreadLocalData m_tld; Frame* m_frame; diff -r a18972fd05b1 -r a5a439242bbf source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cpp Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/framefilter.cpp Wed Jul 02 14:06:12 2014 +0530 @@ -124,8 +124,7 @@ void FrameFilter::processRow(int row, const int threadId) { PPAScopeEvent(Thread_filterCU); -assert(threadId = 0); -ThreadLocalData tld = Encoder::m_threadLocalData[threadId]; +ThreadLocalData tld = threadId = 0 ? Encoder::m_threadLocalData[threadId] : m_frame-m_tld; if (!m_param-bEnableLoopFilter !m_param-bEnableSAO) { On Wed, Jul 2, 2014 at 2:15 PM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1404290172 -19800 # Wed Jul 02 14:06:12 2014 +0530 # Node ID 69d9bd3eb5bd015d2e0c90d51eec0d7f8a4747d0 # Parent a18972fd05b1d6242a881bef979b9e1ff17543d9 framefilter: remove heap corruption in tld diff -r a18972fd05b1 -r 69d9bd3eb5bd source/encoder/frameencoder.h --- a/source/encoder/frameencoder.h Tue Jul 01 14:58:35 2014 -0500 +++ b/source/encoder/frameencoder.h Wed Jul 02 14:06:12 2014 +0530 @@ -191,7 +191,6 @@ Bitstream* m_outStreams; NoiseReduction m_nr; NALList m_nalList; -ThreadLocalData m_tld; Frame* m_frame; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] A compiler warning and a question
Thanks, that was a bug, fixed. On Wed, Jul 9, 2014 at 1:34 PM, Mario *LigH* Rohkrämer cont...@ligh.de wrote: v1.1+260-a1e46d813642 + h:/MSYS/home/Entwicklung/x265/source/Lib/TLibEncoder/TEncCu.cpp:1192:6: warning: unused parameter 'outBestPredYuv' [-Wunused-parameter] void TEncCu::xCheckRDCostMerge2Nx2N(TComDataCU* outBestCU, TComDataCU* outTempCU, bool *earlyDetectionSkipMode, TComYuv* outBestPredYuv, TComYuv* rpcYuvReconBest) ^ h:/MSYS/home/Entwicklung/x265/source/Lib/TLibEncoder/TEncCu.cpp:1192:6: warning: unused parameter 'rpcYuvReconBest' [-Wunused-parameter] + I made a trivial shell script to compile each both architectures (32 + 64 bit) and precisions (8 + 16 bpc) in a sequence. I noticed that the compiling order can be different for different runs. Sometimes target common is handled first, sometimes encoder. Probably nothing to worry about, just curious why it can be random. -- Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] use std::swap() for readability
Hello, We spent a bunch of effort last year to remove STL dependencies, since they cause serious trouble between different compilers (even between different compiler versions). This is especially since a lot of users will use x265 as a static library. Thanks, Deepthi On Wed, Jul 9, 2014 at 3:00 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1404898046 -32400 # Wed Jul 09 18:27:26 2014 +0900 # Node ID a3f4317f4acd89b7ef9bb8616068f9e4ff24328c # Parent 644773b8532929a30f910fd269f521e44621f2f7 use std::swap() for readability diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibCommon/TComSampleAdaptiveOffset.cpp --- a/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 13:55:42 2014 +0530 +++ b/source/Lib/TLibCommon/TComSampleAdaptiveOffset.cppWed Jul 09 18:27:26 2014 +0900 @@ -535,12 +535,10 @@ int isChroma = (yCbCr != 0) ? 1 : 0; int shift; int cuHeightTmp; -pixel* tmpLSwap; pixel* tmpL; pixel* tmpU; pixel* clipTbl = NULL; int32_t *offsetBo = NULL; -int32_t *tmp_swap; picWidthTmp = (isChroma == 0) ? m_picWidth : m_picWidth m_hChromaShift; picHeightTmp = (isChroma == 0) ? m_picHeight : m_picHeight m_vChromaShift; @@ -707,9 +705,7 @@ m_upBufft[startX] = signDown2; -tmp_swap = m_upBuff1; -m_upBuff1 = m_upBufft; -m_upBufft = tmp_swap; +std::swap(m_upBuff1, m_upBufft); rec += stride; } @@ -775,9 +771,7 @@ // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1) { -tmpLSwap = m_tmpL1; -m_tmpL1 = m_tmpL2; -m_tmpL2 = tmpLSwap; +std::swap(m_tmpL1, m_tmpL2); } } @@ -864,7 +858,6 @@ int frameWidthInCU = m_pic-getFrameWidthInCU(); int frameHeightInCU = m_pic-getFrameHeightInCU(); int stride; -pixel *tmpUSwap; int sChroma = (yCbCr == 0) ? 0 : 1; bool mergeLeftFlag; int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC; @@ -976,9 +969,7 @@ } } -tmpUSwap = m_tmpU1[yCbCr]; -m_tmpU1[yCbCr] = m_tmpU2[yCbCr]; -m_tmpU2[yCbCr] = tmpUSwap; +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]); } } @@ -1018,7 +1009,6 @@ int addr; int frameWidthInCU = m_pic-getFrameWidthInCU(); int stride; -pixel *tmpUSwap; int sChroma = (yCbCr == 0) ? 0 : 1; bool mergeLeftFlag; int saoBitIncrease = (yCbCr == 0) ? m_saoBitIncreaseY : m_saoBitIncreaseC; @@ -1122,9 +1112,7 @@ } } -tmpUSwap = m_tmpU1[yCbCr]; -m_tmpU1[yCbCr] = m_tmpU2[yCbCr]; -m_tmpU2[yCbCr] = tmpUSwap; +std::swap(m_tmpU1[yCbCr], m_tmpU2[yCbCr]); } } diff -r 644773b85329 -r a3f4317f4acd source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 13:55:42 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Wed Jul 09 18:27:26 2014 +0900 @@ -1258,22 +1258,11 @@ uint64_t bestCost = m_rdCost-psyRdEnabled() ? outBestCU-m_totalPsyCost : outBestCU-m_totalRDCost; if (tempCost bestCost) { -TComDataCU* tmp = outTempCU; -outTempCU = outBestCU; -outBestCU = tmp; - -// Change Prediction data -TComYuv* yuv = NULL; -yuv = outBestPredYuv; -outBestPredYuv = m_tmpPredYuv[depth]; -m_tmpPredYuv[depth] = yuv; - -yuv = rpcYuvReconBest; -rpcYuvReconBest = m_tmpRecoYuv[depth]; -m_tmpRecoYuv[depth] = yuv; - +std::swap(outBestCU, outTempCU); +std::swap(outBestPredYuv, m_tmpPredYuv[depth]); +std::swap(rpcYuvReconBest, m_tmpRecoYuv[depth]); m_rdSbacCoders[depth][CI_TEMP_BEST]-store(m_rdSbacCoders[depth][CI_NEXT_BEST]); -} +} outTempCU-setQPSubParts(origQP, 0, depth); outTempCU-setSkipFlagSubParts(false, 0, depth); if (!bestIsSkip) @@ -1446,21 +1435,14 @@ if (tempCost bestCost) { -TComYuv* yuv; // Change Information data -TComDataCU* cu = outBestCU; -outBestCU = outTempCU; -outTempCU = cu; +std::swap(outBestCU, outTempCU); // Change Prediction data -yuv = m_bestPredYuv[depth]; -m_bestPredYuv[depth] = m_tmpPredYuv[depth]; -m_tmpPredYuv[depth] = yuv; +std::swap(m_bestPredYuv[depth], m_tmpPredYuv[depth]); // Change Reconstruction data -yuv =
Re: [x265] [PATCH 1 of 4] cu, search: remove redundant m_rdGoOnSbacCoder pointer
This series is awesome... On Fri, Jul 11, 2014 at 6:00 AM, Steve Borho st...@borho.org wrote: # HG changeset patch # User Steve Borho st...@borho.org # Date 1405031594 18000 # Thu Jul 10 17:33:14 2014 -0500 # Node ID d6c423c66e4d5a06dc4dccbd9eade7a21f9c8adf # Parent 57a4c1c2274ec6b40f4a138523c6b67ffa853e09 cu, search: remove redundant m_rdGoOnSbacCoder pointer m_rdGoOnSbacCoder was always either pointing to the same SBac as m_sbacCoder (for calls to compressCU()) or it was supposed to be unused (for calls to encodeCU) diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncCu.cpp --- a/source/Lib/TLibEncoder/TEncCu.cpp Thu Jul 10 14:13:30 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncCu.cpp Thu Jul 10 17:33:14 2014 -0500 @@ -75,7 +75,6 @@ m_rdCost = NULL; m_sbacCoder = NULL; m_rdSbacCoders= NULL; -m_rdGoOnSbacCoder = NULL; m_bBitCounting= false; } @@ -1348,7 +1347,7 @@ // Encode Coefficients bool bEncodeDQP = m_bEncodeDQP; m_sbacCoder-codeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bEncodeDQP); -m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); +m_sbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_sbacCoder-getNumberOfWrittenBits(); outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; @@ -1397,7 +1396,7 @@ // Encode Coefficients bool bCodeDQP = m_bEncodeDQP; m_sbacCoder-codeCoeff(outTempCU, 0, depth, outTempCU-getCUSize(0), bCodeDQP); -m_rdGoOnSbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); +m_sbacCoder-store(m_rdSbacCoders[depth][CI_TEMP_BEST]); outTempCU-m_totalBits = m_sbacCoder-getNumberOfWrittenBits(); outTempCU-m_coeffBits = outTempCU-m_totalBits - outTempCU-m_mvBits; diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncCu.h --- a/source/Lib/TLibEncoder/TEncCu.h Thu Jul 10 14:13:30 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncCu.h Thu Jul 10 17:33:14 2014 -0500 @@ -124,7 +124,6 @@ // RD SBac pointers SBac (*m_rdSbacCoders)[CI_NUM]; -SBac*m_rdGoOnSbacCoder; uint8_t m_totalDepth; diff -r 57a4c1c2274e -r d6c423c66e4d source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 14:13:30 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Jul 10 17:33:14 2014 -0500 @@ -68,7 +68,6 @@ m_trQuant = NULL; m_sbacCoder = NULL; m_rdSbacCoders = NULL; -m_rdGoOnSbacCoder = NULL; m_numLayers = 0; } @@ -636,7 +635,7 @@ if (checkTransformSkip || checkTQbypass) { //- store original entropy coding status - - m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); + m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); uint32_t singleDistYTmp = 0; uint32_t singlePsyEnergyYTmp = 0; @@ -694,10 +693,10 @@ bestTQbypass = singleTQbypass; bestModeId = modeId; if (bestModeId == firstCheckId) - m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]); + m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]); } if (modeId == firstCheckId) - m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); + m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); } cu-setTransformSkipSubParts(checkTransformSkip ? bestModeId : 0, TEXT_LUMA, absPartIdx, fullDepth); @@ -708,7 +707,7 @@ { xLoadIntraResultQT(cu, absPartIdx, log2TrSize, reconQt, reconQtStride); cu-setCbfSubParts(singleCbfY trDepth, TEXT_LUMA, absPartIdx, fullDepth); - m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]); + m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_TEMP_BEST]); } else { @@ -719,7 +718,7 @@ } else { - m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); + m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); //- code luma block with given intra prediction mode and store Cbf- cu-setTransformSkipSubParts(0, TEXT_LUMA, absPartIdx, fullDepth); @@ -748,11 +747,11 @@ //- store full entropy coding status, load original entropy coding status - if (bCheckFull) { - m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_TEST]); - m_rdGoOnSbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); + m_sbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_TEST]); + m_sbacCoder-load(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); } else - m_rdGoOnSbacCoder-store(m_rdSbacCoders[fullDepth][CI_QT_TRAFO_ROOT]); +
Re: [x265] Many more warnings by GCC 4.8.2
Thanks, Mario. Except for the first one, most are harmless. But, we'll be fixing those right away. On Mon, Jul 21, 2014 at 9:59 AM, Mario Rohkrämer cont...@ligh.de wrote: No panic; I know that many reasons for warnings are less than serious. Just reporting. __ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In member function 'void x265::TComWeightPrediction:: getWpScaling(x265::TComDataCU*, int, int, x265::WeightParam*, x265::WeightParam*)': h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:518:62: warning: suggest braces around empty body in an 'else' statement [-Wempty-body] X265_CHECK(0, unexpected wpScaling configuration\n); ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In member function 'void x265::TComWeightPrediction:: xWeightedPredictionBi(x265::TComDataCU*, x265::TComYuv*, x265::TComYuv*, int, int, uint32_t, int, int, x265::TComYuv*, bool, bool)': h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:537:45: warning: 'pwp1' may be used uninitialized in this function [-Wmaybe-uninitialized] wp1[yuv].round = wp0[yuv].round; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:567:26: note: 'pwp1' was declared here WeightParam *pwp0, *pwp1; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:530:51: warning: 'pwp0' may be used uninitialized in this function [-Wmaybe-uninitialized] wp0[yuv].w = wp0[yuv].inputWeight; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:567:19: note: 'pwp0' was declared here WeightParam *pwp0, *pwp1; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp: In member function 'void x265::TComWeightPrediction:: xWeightedPredictionBi(x265::TComDataCU*, x265::ShortYuv*, x265::ShortYuv*, int, int, uint32_t, int, int, x265::TComYuv*, bool, bool)': h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:537:45: warning: 'pwp1' may be used uninitialized in this function [-Wmaybe-uninitialized] wp1[yuv].round = wp0[yuv].round; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:603:26: note: 'pwp1' was declared here WeightParam *pwp0, *pwp1; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:530:51: warning: 'pwp0' may be used uninitialized in this function [-Wmaybe-uninitialized] wp0[yuv].w = wp0[yuv].inputWeight; ^ h:/MSYS/home/LigH/x265/source/Lib/TLibCommon/TComWeightPrediction.cpp:603:19: note: 'pwp0' was declared here WeightParam *pwp0, *pwp1; ^ __ h:/MSYS/home/LigH/x265/source/encoder/level.cpp: In function 'void x265::determineLevel(const x265_param, x265::Profile::Name, x265::Level::Name, x265::Level::Tier)': h:/MSYS/home/LigH/x265/source/encoder/level.cpp:143:24: warning: array subscript is above array bounds [-Warray-bounds] while (levels[i].levelIdc param.levelIdc levels[i].levelIdc) ^ h:/MSYS/home/LigH/x265/source/encoder/level.cpp:143:24: warning: array subscript is above array bounds [-Warray-bounds] __ h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp: In member function 'bool x265::RateControl::initPass2()': h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'minVal' may be used uninitialized in this function [-Wmaybe-uninitialized] bool RateControl::initPass2() ^ h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'maxVal' may be used uninitialized in this function [-Wmaybe-uninitialized] h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'a' may be used uninitialized in this function [-Wmaybe-uninitialized] h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'minVal' may be used uninitialized in this function [-Wmaybe-uninitialized] h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'maxVal' may be used uninitialized in this function [-Wmaybe-uninitialized] h:/MSYS/home/LigH/x265/source/encoder/ratecontrol.cpp:728:6: warning: 'a' may be used uninitialized in this function [-Wmaybe-uninitialized] -- Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] Custom LowRes scale
Thanks, this is certainly an enhancement to x265 lookahead. We would be interested in this - especially if you can also include some efficiency (bitrate vs SSIM) metrics that describe the penalty moving from X265_LOWRES_SCALE of 4 to higher scales. On Mon, Jul 21, 2014 at 8:49 PM, Nicolas Morey-Chaisemartin nmo...@kalray.eu wrote: Hi, We recently profiled x265 pre-analysis to estimate what performance we could reach using our accelerator and I was quite disappointed by the performance. When running on a Core-i7 with AVX at roughly 2.7GHz, we barely reached the 30fps mark using ultrafast preset on a 4K video. After a little bit of browsing I realized that work in LosRew is always done at 1/4th of the final resolution which seems fair but requires a huge amount of work for 4K. It seemed straight forward enough to change the divider at LowRes initialization but it seems there are a lot of hard coded values that depend both on the LowRes divider and the LowRes CU Size. Here's a patch (definitly not applicable like this but just to give an idea of where I'm going) that seems to fix most of the hard-coded value. It still works with a X265_LOWRES_SCALE of 4 and the perf is definilty improving (29fps = 40fps on a 2048x1024 medium preset on a E5504). Would you be interested in a clean version of this? At least the hard-coded CU_SIZE part? IMHO it would be better to have dynamic value for LowRes depending on preset (or equivalent) and the input resolution... 1/4th is fast enough in HD not to be an issue but for RT stream in 4K or more, 1/16 will be compulsory. Nicolas --- x265/source/common/common.h | 1 + x265/source/common/lowres.cpp| 4 ++-- x265/source/encoder/frameencoder.cpp | 7 --- x265/source/encoder/ratecontrol.cpp | 16 x265/source/encoder/slicetype.cpp| 8 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/x265/source/common/common.h b/x265/source/common/common.h index 06f60e7..00e73fc 100644 --- a/x265/source/common/common.h +++ b/x265/source/common/common.h @@ -156,6 +156,7 @@ typedef int32_t coeff_t; // transform coefficient // high cost estimates (intra and inter both suffer) #define X265_LOWRES_CU_SIZE 8 #define X265_LOWRES_CU_BITS 3 +#define X265_LOWRES_SCALE 2 #define X265_MALLOC(type, count)(type*)x265_malloc(sizeof(type) * (count)) #define X265_FREE(ptr) x265_free(ptr) diff --git a/x265/source/common/lowres.cpp b/x265/source/common/lowres.cpp index 5fc2f6b..6138023 100644 --- a/x265/source/common/lowres.cpp +++ b/x265/source/common/lowres.cpp @@ -31,8 +31,8 @@ bool Lowres::create(TComPicYuv *orig, int _bframes, bool bAQEnabled) { isLowres = true; bframes = _bframes; -width = orig-getWidth() / 2; -lines = orig-getHeight() / 2; +width = orig-getWidth() / X265_LOWRES_SCALE; +lines = orig-getHeight() / X265_LOWRES_SCALE; lumaStride = width + 2 * orig-getLumaMarginX(); if (lumaStride 31) lumaStride += 32 - (lumaStride 31); diff --git a/x265/source/encoder/frameencoder.cpp b/x265/source/encoder/ frameencoder.cpp index 8c3ee26..7213f60 100644 --- a/x265/source/encoder/frameencoder.cpp +++ b/x265/source/encoder/frameencoder.cpp @@ -1300,9 +1300,10 @@ int FrameEncoder::calcQpForCu(uint32_t cuAddr, double baseQp) /* Derive qpOffet for each CU by averaging offsets for all 16x16 blocks in the cu. */ double qp_offset = 0; -int maxBlockCols = (m_frame-getPicYuvOrg()-getWidth() + (16 - 1)) / 16; -int maxBlockRows = (m_frame-getPicYuvOrg()-getHeight() + (16 - 1)) / 16; -int noOfBlocks = g_maxCUSize / 16; +int lowResCu = (X265_LOWRES_CU_SIZE * X265_LOWRES_SCALE); +int maxBlockCols = (m_frame-getPicYuvOrg()-getWidth() + (lowResCu - 1)) / lowResCu; +int maxBlockRows = (m_frame-getPicYuvOrg()-getHeight() + (lowResCu - 1)) / lowResCu; +int noOfBlocks = g_maxCUSize / lowResCu; int block_y = (cuAddr / m_frame-getPicSym()-getFrameWidthInCU()) * noOfBlocks; int block_x = (cuAddr * noOfBlocks) - block_y * m_frame-getPicSym()- getFrameWidthInCU(); diff --git a/x265/source/encoder/ratecontrol.cpp b/x265/source/encoder/ ratecontrol.cpp index 4358994..5fcc27a 100644 --- a/x265/source/encoder/ratecontrol.cpp +++ b/x265/source/encoder/ratecontrol.cpp @@ -161,8 +161,8 @@ void RateControl::calcAdaptiveQuantFrame(Frame *pic) if (m_param-rc.aqMode == X265_AQ_NONE || m_param-rc.aqStrength == 0) { /* Need to init it anyways for CU tree */ -int cuWidth = ((maxCol / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; -int cuHeight = ((maxRow / 2) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; +int cuWidth = ((maxCol / X265_LOWRES_SCALE) + X265_LOWRES_CU_SIZE - 1) X265_LOWRES_CU_BITS; +int cuHeight = ((maxRow / X265_LOWRES_SCALE) + X265_LOWRES_CU_SIZE - 1)
Re: [x265] [PATCH] psyrdoq: implementation of psyrdoq
Does it make sense to try this for DC coefficients? On Tue, Jul 22, 2014 at 9:24 PM, Steve Borho st...@borho.org wrote: On 07/22, sumala...@multicorewareinc.com wrote: # HG changeset patch # User Sumalatha Polureddysumala...@multicorewareinc.com # Date 1406032149 -19800 # Node ID 37e03dcd2e4f0b5894880ff8c097bd6e11590459 # Parent d303b4d860e9f06396a156726dd518d0f41fe796 psyrdoq: implementation of psyrdoq diff -r d303b4d860e9 -r 37e03dcd2e4f source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Mon Jul 21 22:43:38 2014 -0500 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Tue Jul 22 17:59:09 2014 +0530 @@ -64,6 +64,8 @@ return y + ((x - y) ((x - y) (sizeof(int) * CHAR_BIT - 1))); // min(x, y) } +#define SIGN(x,y) ((x^(y 31))-(y 31)) + // // TComTrQuant class member functions // @@ -307,6 +309,8 @@ } uint32_t TComTrQuant::transformNxN(TComDataCU* cu, + pixel* fenc, + uint32_tfencStride, int16_t*residual, uint32_tstride, coeff_t*coeff, @@ -316,10 +320,10 @@ booluseTransformSkip, boolcurUseRDOQ) { +int trSize = 1 log2TrSize; if (cu-getCUTransquantBypass(absPartIdx)) { uint32_t numSig = 0; -int trSize = 1 log2TrSize; for (int k = 0; k trSize; k++) { for (int j = 0; j trSize; j++) @@ -339,6 +343,12 @@ const uint32_t sizeIdx = log2TrSize - 2; int useDST = (sizeIdx == 0 ttype == TEXT_LUMA cu-getPredictionMode(absPartIdx) == MODE_INTRA); int index = DCT_4x4 + sizeIdx - useDST; +if (psyRdoqEnabled()) +{ +// converting pixel to int and putting in separate buffer to take dct +primitives.square_copy_ps[sizeIdx](m_tmpfencBuf, MAX_CU_SIZE, fenc, fencStride); +primitives.dct[index](m_tmpfencBuf, m_tmpfencCoeff, stride); +} primitives.dct[index](residual, m_tmpCoeff, stride); if (m_nr-bNoiseReduction) { @@ -356,7 +366,7 @@ if (m_useRDOQ curUseRDOQ) { -return xRateDistOptQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx); +return xRateDistOptQuant(cu, m_tmpfencCoeff, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx); } return xQuant(cu, m_tmpCoeff, coeff, log2TrSize, ttype, absPartIdx); } @@ -505,7 +515,7 @@ * Rate distortion optimized quantization for entropy * coding engines using probability models like CABAC */ -uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, +uint32_t TComTrQuant::xRateDistOptQuant(TComDataCU* cu, int32_t* fencCoeff, int32_t* srcCoeff, coeff_t* dstCoeff, uint32_t log2TrSize, TextType ttype, uint32_t absPartIdx) { uint32_t trSize = 1 log2TrSize; @@ -614,7 +624,7 @@ { level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos], levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam, - c1c2Idx, qbits, scaleFactor, 1); + c1c2Idx, qbits, scaleFactor, 1, srcCoeff[blkPos], fencCoeff[blkPos]); sigRateDelta[blkPos] = 0; } else @@ -631,7 +641,7 @@ curCostSig = xGetRateSigCoef(1, ctxSig); level = xGetCodedLevel(costCoeff[scanPos], curCostSig, costSig[scanPos], levelDouble, maxAbsLevel, baseLevel, greaterOneBits, levelAbsBits, goRiceParam, - c1c2Idx, qbits, scaleFactor, 0); + c1c2Idx, qbits, scaleFactor, 0, srcCoeff[blkPos], fencCoeff[blkPos]); } else { @@ -1126,7 +1136,9 @@ uint32_t c1c2Idx, int qbits, double scaleFactor, -bool last) const +bool last,
Re: [x265] [PATCH] psyrdoq: implementation of psyrdoq
Ok, Thats what I thought too. On Jul 23, 2014 8:55 AM, Steve Borho st...@borho.org wrote: On 07/23, Deepthi Nandakumar wrote: Does it make sense to try this for DC coefficients? my understanding is that it is not helpful, and possibly harmful. we don't want to bias the DC coefficient in any way. snipped -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Added fast intra search option
There are a couple of warnings our regression tests caught with this. Can you take a look? source\encoder\predict.cpp(78): warning C4800: 'const unsigned char' : forcing value to bool 'true' or 'false' (performance warning) (IntraFilterType can be bool, I think?). C:\users\deepthi\code\x265\source\encoder\slicetype.cpp(1714): warning C4701: potentially uninitialized local variable 'lowmode' used Thanks, Deepthi On Wed, Aug 13, 2014 at 4:07 AM, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1407882999 25200 # Node ID 75e4ad481b3668b1e420ede300287aa3ea3fb8d5 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 Added fast intra search option This version calls intra_pred_allangs to create the predictions then the faster search with satd diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/common/param.cpp --- a/source/common/param.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/param.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -132,6 +132,7 @@ /* Intra Coding Tools */ param-bEnableConstrainedIntra = 0; param-bEnableStrongIntraSmoothing = 1; +param-bEnableFastIntra = 0; /* Inter Coding tools */ param-searchMethod = X265_HEX_SEARCH; @@ -560,6 +561,7 @@ OPT(lossless) p-bLossless = atobool(value); OPT(cu-lossless) p-bCULossless = atobool(value); OPT(constrained-intra) p-bEnableConstrainedIntra = atobool(value); +OPT(fast-intra) p-bEnableFastIntra = atobool(value); OPT(open-gop) p-bOpenGOP = atobool(value); OPT(scenecut) { @@ -1211,6 +1213,7 @@ BOOL(p-bLossless, lossless); BOOL(p-bCULossless, cu-lossless); BOOL(p-bEnableConstrainedIntra, constrained-intra); +BOOL(p-bEnableFastIntra, fast-intra); BOOL(p-bOpenGOP, open-gop); s += sprintf(s, interlace=%d, p-interlaceMode); s += sprintf(s, keyint=%d, p-keyframeMax); diff -r 8a7f4bb1d1be -r 75e4ad481b36 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/encoder/slicetype.cpp Tue Aug 12 15:36:39 2014 -0700 @@ -1242,6 +1242,7 @@ { m_rows[i].m_widthInCU = m_widthInCU; m_rows[i].m_heightInCU = m_heightInCU; +m_rows[i].m_param = m_param; } if (!WaveFront::init(m_heightInCU)) @@ -1676,26 +1677,86 @@ int predsize = cuSize * cuSize; -// generate 35 intra predictions into tmp +// generate 35 intra predictions into m_predictions +pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; +int icost = m_me.COST_MAX, cost, highcost, lowcost, acost = m_me.COST_MAX; +uint32_t lowmode, mode; primitives.intra_pred[sizeIdx][DC_IDX](m_predictions, cuSize, left0, above0, 0, (cuSize = 16)); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; pixel *above = (cuSize = 8) ? above1 : above0; pixel *left = (cuSize = 8) ? left1 : left0; -primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions + predsize, cuSize, left, above, 0, 0); +primitives.intra_pred[sizeIdx][PLANAR_IDX](m_predictions, cuSize, left, above, 0, 0); +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions, cuSize); +if (cost icost) +icost = cost; primitives.intra_pred_allangs[sizeIdx](m_predictions + 2 * predsize, above0, left0, above1, left1, (cuSize = 16)); -// calculate 35 satd costs, keep least cost +// calculate satd costs, keep least cost ALIGN_VAR_32(pixel, buf_trans[32 * 32]); primitives.transpose[sizeIdx](buf_trans, m_me.fenc, FENC_STRIDE); -pixelcmp_t satd = primitives.satd[partitionFromLog2Size(X265_LOWRES_CU_BITS)]; -int icost = m_me.COST_MAX, cost; -for (uint32_t mode = 0; mode 35; mode++) +// fast-intra angle search +if (m_param-bEnableFastIntra) { -if ((mode = 2) (mode 18)) +for (mode = 4;mode 35; mode += 5) +{ +if (mode 18) +cost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +if (cost acost) +{ +lowmode = mode; +acost = cost; +} +} +mode = lowmode - 2; +if (mode 18) +lowcost = satd(buf_trans, cuSize, m_predictions[mode * predsize], cuSize); +else +lowcost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); +highcost = m_me.COST_MAX; +if (lowmode 34) +{ +mode = lowmode + 2; +if (mode 18)
Re: [x265] [PATCH] count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t*
Praveen, Can you build a mercurial queue for these quant patches - so they can be reviewed and pushed in once quant is 16-bit everywhere? Thanks, Deepthi On Thu, Aug 14, 2014 at 2:01 AM, Steve Borho st...@borho.org wrote: On 08/12, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1407834530 -19800 # Node ID bb4d44663964237e4b66af6d92b2f13dbcf4f9b9 # Parent 8a7f4bb1d1be32fe668d410450c2e320ccae6098 count_nonzero primitive, downscaling quantCoeff from int32_t* to int16_t* There's not much point in applying these patches until all of the quant primitives are using short ints for coefficients. As-is this will just be a slow-down. diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/dct.cpp --- a/source/common/dct.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/dct.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -815,7 +815,7 @@ return numSig; } -int count_nonzero_c(const int32_t *quantCoeff, int numCoeff) +int count_nonzero_c(const int16_t *quantCoeff, int numCoeff) { X265_CHECK(((intptr_t)quantCoeff 15) == 0, quant buffer not aligned\n); X265_CHECK(numCoeff 0 (numCoeff 15) == 0, numCoeff invalid %d\n, numCoeff); diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/primitives.h --- a/source/common/primitives.h Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/primitives.h Tue Aug 12 14:38:50 2014 +0530 @@ -163,7 +163,7 @@ typedef uint32_t (*nquant_t)(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); typedef void (*dequant_scaling_t)(const int32_t* src, const int32_t *dequantCoef, int32_t* dst, int num, int mcqp_miper, int shift); typedef void (*dequant_normal_t)(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); -typedef int (*count_nonzero_t)(const int32_t *quantCoeff, int numCoeff); +typedef int (*count_nonzero_t)(const int16_t *quantCoeff, int numCoeff); typedef void (*weightp_pp_t)(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); typedef void (*weightp_sp_t)(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/quant.cpp --- a/source/common/quant.cpp Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/quant.cpp Tue Aug 12 14:38:50 2014 +0530 @@ -2,6 +2,7 @@ * Copyright (C) 2014 x265 project * * Authors: Steve Borho st...@borho.org + * Praveen Kumar Tiwari prav...@multicorewareinc.com * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -463,7 +464,17 @@ const uint32_t sizeIdx = log2TrSize - 2; int useDST = !sizeIdx ttype == TEXT_LUMA bIntra; -X265_CHECK((int)numSig == primitives.count_nonzero(coeff, 1 log2TrSize * 2), numSig differ\n); +/* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ +int numCoeff = (1 (log2TrSize * 2)); +assert(numCoeff = 1024); +ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); +for (int i = 0; i numCoeff; i++) +{ +qCoeff[i] = (coeff[i] 0x); +} + +X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, 1 log2TrSize * 2), numSig differ\n); // DC only if (numSig == 1 coeff[0] != 0 !useDST) @@ -501,7 +512,16 @@ int numCoeff = 1 log2TrSize * 2; uint32_t numSig = primitives.nquant(m_resiDctCoeff, qCoef, dstCoeff, qbits, add, numCoeff); These two loops are only here for an X265_CHECK statement that is usually compiled out. All of this code should have been wrapped within #if CHECKED_BUILD || _DEBUG -X265_CHECK((int)numSig == primitives.count_nonzero(dstCoeff, numCoeff), numSig differ\n); +/* This section of code is to safely convert int32_t coefficients to int16_t, once the caller function is + * optimize to take coefficients as int16_t*, it will be cleanse.*/ +assert(numCoeff = 1024); +ALIGN_VAR_16(int16_t, qCoeff[32 * 32]); +for (int i = 0; i numCoeff; i++) +{ +qCoeff[i] = (dstCoeff[i] 0x); +} + +X265_CHECK((int)numSig == primitives.count_nonzero(qCoeff, numCoeff), numSig differ\n); if (!numSig) return 0; diff -r 8a7f4bb1d1be -r bb4d44663964 source/common/x86/pixel-util.h --- a/source/common/x86/pixel-util.h Tue Aug 12 01:11:39 2014 -0500 +++ b/source/common/x86/pixel-util.h Tue Aug 12 14:38:50 2014 +0530 @@ -2,6 +2,7 @@ * Copyright (C) 2013 x265 project * * Authors: Steve Borho st...@borho.org + *
Re: [x265] psycho-visual feature tuning, feature freeze
I have removed the temporary disabling we had in place for rdoq, when psy-rd was enabled since rdoq was not psy-aware at that point. RDOQ is turned on in rdLevels 4 and above. Psy-rdoq can be turned on if rdoq is enabled. Psy-rd can be turned on with or without rdoq/psy-rdoq. On Sat, Aug 16, 2014 at 11:24 PM, Steve Borho st...@borho.org wrote: Hello, I've pushed some (what I hope to be) final tunings of the two new psycho-visual optimization features. The features are still disabled by default, but the recommended values are now 1.0 for both. http://x265.readthedocs.org/en/stable/cli.html#psycho-visual-options Please try them out. The stable branch has been merged with default in preparation of a 1.3 tag, which should happen early this week. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] psycho-visual feature tuning, feature freeze
Our general suggestion would be to use psy-rd and psy-rdoq together for best visual quality. On Tue, Aug 19, 2014 at 2:18 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: I have removed the temporary disabling we had in place for rdoq, when psy-rd was enabled since rdoq was not psy-aware at that point. RDOQ is turned on in rdLevels 4 and above. Psy-rdoq can be turned on if rdoq is enabled. Psy-rd can be turned on with or without rdoq/psy-rdoq. On Sat, Aug 16, 2014 at 11:24 PM, Steve Borho st...@borho.org wrote: Hello, I've pushed some (what I hope to be) final tunings of the two new psycho-visual optimization features. The features are still disabled by default, but the recommended values are now 1.0 for both. http://x265.readthedocs.org/en/stable/cli.html#psycho-visual-options Please try them out. The stable branch has been merged with default in preparation of a 1.3 tag, which should happen early this week. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]
Ashok is already working on pre-calculating these inside-picture flags along with more refactors. After his refactors are in, we can check whether padding will improve performance. In fact, very likely he already has a local version of the logic in this patch. On Mon, Aug 25, 2014 at 10:46 PM, Steve Borho st...@borho.org wrote: On 08/25, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408956792 -32400 # Mon Aug 25 17:53:12 2014 +0900 # Node ID 7145e57c722a94a06faec33e3041442032a1892f # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] Queued for default, thanks. There seems to be a lot of logic that checks for 'inside picture bounds'. It seems like we could save a lot of CPU cycles if we padded input pictures to the max-ctu size instead of the min-ctu size and adjusted the conformance window accordingly. diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014 +0900 @@ -816,12 +816,12 @@ TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) { +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) +return NULL; + uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) -return NULL; - if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) { if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) @@ -857,14 +857,11 @@ TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) +return NULL; -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) -{ -return NULL; -} - -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize)) { @@ -895,15 +892,14 @@ TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picHeightInLumaSamples) { return NULL; } -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize)) { @@ -938,14 +934,13 @@ TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picWidthInLumaSamples) { return NULL; } +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize)) @@ -954,7 +949,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]; if (RasterAddress::isEqualRowOrCol(absPartIdxRT,
Re: [x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless
Thanks, Min. This is a solution but will affect performance with an extra TComDataCU*. I have sent another patch where I'm just re-encoding the CU if lossless is chosen as the best mode. This will not affect normal analysis. Can you review that? Deepthi On Tue, Aug 26, 2014 at 3:47 AM, Steve Borho st...@borho.org wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1409002891 18000 # Mon Aug 25 16:41:31 2014 -0500 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264 # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b analysis: fix inter hash mistake with --cu-lossless diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -2293,7 +2293,7 @@ * \returns void */ void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, - ShortYuv* outBestResiYuv, TComYuv* outReconYuv) + ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu) { X265_CHECK(!cu-isIntra(0), intra CU not expected\n); @@ -2321,6 +2321,7 @@ } uint64_t bestCost = MAX_INT64; +bool bestTransquantBypassFlag = bIsTQBypassEnable; for (uint32_t modeId = 0; modeId numModes; modeId++) { @@ -2388,15 +2389,29 @@ if (cu-getQtRootCbf(0)) xSetResidualQTData(cu, 0, outBestResiYuv, depth, true); +bestTransquantBypassFlag = bIsLosslessMode; bestBits = bits; bestCost = cost; bestCoeffBits = cu-m_coeffBits; m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); } + +// Save lossless mode coeff +if (bIsLosslessMode) +{ +tmpCu-copyPartFrom(cu, 0, depth, false); +} } X265_CHECK(bestCost != MAX_INT64, no best cost\n); +if (bestTransquantBypassFlag !m_param-bLossless) +{ +assert(log2CUSize 2); +cu-setCUTransquantBypassSubParts(true, 0, depth); +cu-copyPartFrom(tmpCu, 0, depth, false); +} + if (cu-getQtRootCbf(0)) outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize); else diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 -0500 @@ -147,7 +147,7 @@ /// encode residual and compute rd-cost for inter mode void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, - TComYuv* reconYuv); + TComYuv* reconYuv, TComDataCU* tmpCu); void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv); void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -82,7 +82,7 @@ uint32_t sizeL = cuSize * cuSize; uint32_t sizeC = sizeL (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass); +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass); m_interCU_2Nx2N[i] = new TComDataCU; m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass); @@ -108,6 +108,9 @@ m_tempCU[i] = new TComDataCU; m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass); +m_tempLosslessCU[i] = new TComDataCU; +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass); + m_bestPredYuv[i] = new TComYuv; ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp); @@ -158,6 +161,7 @@ delete m_bestMergeCU[i]; delete m_bestCU[i]; delete m_tempCU[i]; +delete m_tempLosslessCU[i]; if (m_bestPredYuv m_bestPredYuv[i]) { @@ -240,6 +244,7 @@ // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); m_tempCU[0]-initCU(cu-m_pic, cu-getAddr()); +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr()); // analysis of CU uint32_t numPartition = cu-getTotalNumPart(); @@ -394,6 +399,7 @@ uint32_tnextDepth = depth + 1; TComDataCU* subBestPartCU = m_bestCU[nextDepth]; TComDataCU* subTempPartCU = m_tempCU[nextDepth]; +TComDataCU*
Re: [x265] fix m_initSliceContext (uninitialised m_sliceQp)
Thanks, queued for default (does not apply on stable due to the SAO refactor). On Tue, Aug 26, 2014 at 1:55 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1409041357 -32400 # Tue Aug 26 17:22:37 2014 +0900 # Node ID c18255467f12da1a780340ade55292c32d95bfdd # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b fix m_initSliceContext (uninitialised m_sliceQp) diff -r 5acfb12ec5d1 -r c18255467f12 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/frameencoder.cpp Tue Aug 26 17:22:37 2014 +0900 @@ -158,8 +158,6 @@ int64_t startCompressTime = x265_mdate(); Slice* slice = m_frame-m_picSym-m_slice; -m_initSliceContext.resetEntropy(slice); - /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */ @@ -225,12 +223,15 @@ m_frameFilter.m_sao.m_refDepth = 2 + !IS_REFERENCED(slice); break; } -m_frameFilter.start(m_frame); // Clip slice QP to 0-51 spec range before encoding qp = Clip3(-QP_BD_OFFSET, MAX_QP, qp); slice-m_sliceQp = qp; +m_initSliceContext.resetEntropy(slice); + +m_frameFilter.start(m_frame); + if (m_frame-m_lowres.bKeyframe) { if (m_param-bEmitHRDSEI) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Entropy: Replaced getCtxQtCbf() with table
Is it also possible to change codeQtCbf so that only uint32_t cbf needs to be passed in, and not entire TComDataCU*? This will help since our coming refactors will get rid of TComDataCU. On Mon, Sep 1, 2014 at 3:17 PM, Steve Borho st...@borho.org wrote: On 09/01, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1409562155 -19800 # Mon Sep 01 14:32:35 2014 +0530 # Node ID e7f58267b15c2d6d477bd370f936d00377d2bbc3 # Parent 4d96eb40f4d6e5cd0883a0a61f20bf00c07ed8f0 Entropy: Replaced getCtxQtCbf() with table nice! but a nit diff -r 4d96eb40f4d6 -r e7f58267b15c source/Lib/TLibCommon/ContextTables.h --- a/source/Lib/TLibCommon/ContextTables.h Thu Aug 28 13:14:34 2014 +0530 +++ b/source/Lib/TLibCommon/ContextTables.h Mon Sep 01 14:32:35 2014 +0530 @@ -144,6 +144,7 @@ #define NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4 3 #define NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4 1 +static const uint32_t CtxCbf[3][3] = { { 1, 0, 0 }, { 2, 3, 4 }, { 2, 3, 4} }; upper-case names are reserved for classes/structs. static const uint32_t ctxCbf[MAX_NUM_CHANNEL_TYPE][3] = ... static const uint32_t significanceMapContextSetStart[MAX_NUM_CHANNEL_TYPE][3] = { { 0, 9, 21 }, { 0, 9, 12 } }; static const uint32_t significanceMapContextSetSize[MAX_NUM_CHANNEL_TYPE][3] = { { 9, 12, 6 }, { 9, 3, 3 } }; static const uint32_t nonDiagonalScan8x8ContextOffset[MAX_NUM_CHANNEL_TYPE] = { 6, 0 }; diff -r 4d96eb40f4d6 -r e7f58267b15c source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Aug 28 13:14:34 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Sep 01 14:32:35 2014 +0530 @@ -2798,7 +2798,7 @@ else singleCostY = m_rdCost.calcRdCost(nonZeroDistY, singleBitsComp[TEXT_LUMA][0]); m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_LUMA, trMode); +m_entropyCoder-codeQtCbfZero(TEXT_LUMA, trMode); unrelated to this patch, but why is depth here stored in trMode/trModeC? const uint32_t nullBitsY = m_entropyCoder-getNumberOfWrittenBits(); uint64_t nullCostY = 0; if (m_rdCost.m_psyRd) @@ -2826,7 +2826,7 @@ else if (checkTransformSkipY) { m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_LUMA, trMode); +m_entropyCoder-codeQtCbfZero(TEXT_LUMA, trMode); const uint32_t nullBitsY = m_entropyCoder-getNumberOfWrittenBits(); if (m_rdCost.m_psyRd) minCost[TEXT_LUMA][0] = m_rdCost.calcPsyRdCost(distY, nullBitsY, psyEnergyY); @@ -2898,7 +2898,7 @@ else singleCostU = m_rdCost.calcRdCost(nonZeroDistU, singleBitsComp[TEXT_CHROMA_U][tuIterator.section]); m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_U, trMode); +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_U, trMode); const uint32_t nullBitsU = m_entropyCoder-getNumberOfWrittenBits(); uint64_t nullCostU = 0; if (m_rdCost.m_psyRd) @@ -2926,7 +2926,7 @@ else if (checkTransformSkipUV) { m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_U, trModeC); +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_U, trModeC); const uint32_t nullBitsU = m_entropyCoder-getNumberOfWrittenBits(); if (m_rdCost.m_psyRd) minCost[TEXT_CHROMA_U][tuIterator.section] = m_rdCost.calcPsyRdCost(distU, nullBitsU, psyEnergyU); @@ -2980,7 +2980,7 @@ else singleCostV = m_rdCost.calcRdCost(nonZeroDistV, singleBitsComp[TEXT_CHROMA_V][tuIterator.section]); m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_V, trMode); +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_V, trMode); const uint32_t nullBitsV = m_entropyCoder-getNumberOfWrittenBits(); uint64_t nullCostV = 0; if (m_rdCost.m_psyRd) @@ -3008,7 +3008,7 @@ else if (checkTransformSkipUV) { m_entropyCoder-resetBits(); -m_entropyCoder-codeQtCbfZero(cu, TEXT_CHROMA_V, trModeC); +m_entropyCoder-codeQtCbfZero(TEXT_CHROMA_V, trModeC); const uint32_t nullBitsV = m_entropyCoder-getNumberOfWrittenBits();
Re: [x265] [PATCH] fix: hash/binary mismatch for new CU structure holds CU-specific info
Thanks, Ashok. Queued. On Wed, Sep 3, 2014 at 8:20 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1409753842 -19800 # Wed Sep 03 19:47:22 2014 +0530 # Node ID 00c381bf615840180bb6ce924dc9be84c060938f # Parent 62c4779fb0bb35d5d8a69678e9e8aa81272f0115 fix: hash/binary mismatch for new CU structure holds CU-specific info diff -r 62c4779fb0bb -r 00c381bf6158 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Aug 28 13:14:34 2014 +0530 +++ b/source/encoder/analysis.cpp Wed Sep 03 19:47:22 2014 +0530 @@ -420,13 +420,13 @@ checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu); if (depth == g_maxCUDepth) { -checkIntra(outBestCU, outTempCU, SIZE_NxN, cu); +checkIntra(outBestCU, outTempCU, SIZE_NxN, cu); } else { -m_entropyCoder-resetBits(); -m_entropyCoder-codeSplitFlag(outBestCU, 0, depth); -outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits +m_entropyCoder-resetBits(); +m_entropyCoder-codeSplitFlag(outBestCU, 0, depth); +outBestCU-m_totalBits += m_entropyCoder-getNumberOfWrittenBits(); // split bits } if (m_rdCost.m_psyRd) outBestCU-m_totalPsyCost = m_rdCost.calcPsyRdCost(outBestCU-m_totalDistortion, outBestCU-m_totalBits, outBestCU-m_psyEnergy); @@ -437,6 +437,7 @@ // copy original YUV samples in lossless mode if (outBestCU-isLosslessCoded(0)) fillOrigYUVBuffer(outBestCU, m_origYuv[depth]); + // further split if (cu_split_flag) { @@ -445,18 +446,17 @@ TComDataCU* subTempPartCU = m_tempCU[nextDepth]; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++) { -int qp = outTempCU-getQP(0); -subBestPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init. -if (cu-flags CU::PRESENT) +CU *child_cu = cuPicsym-m_CULocalData + cu-childIdx + partUnitIdx; + +if (child_cu-flags CU::PRESENT) { +int qp = outTempCU-getQP(0); +subBestPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init. subTempPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init. if (0 == partUnitIdx) //initialize RD with previous depth buffer m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[depth][CI_CURR_BEST]); else m_rdEntropyCoders[nextDepth][CI_CURR_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]); -CU *child_cu = cuPicsym-m_CULocalData + cu-childIdx + partUnitIdx; -if (!(child_cu-flags CU::PRESENT)) -continue; compressIntraCU(subBestPartCU, subTempPartCU, nextDepth, bInsidePicture, cuPicsym, child_cu); outTempCU-copyPartFrom(subBestPartCU, partUnitIdx, nextDepth); // Keep best part data to current temporary data. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Resolve gcc warnings
Thanks, pushed. On Thu, Sep 4, 2014 at 4:06 AM, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1409783716 25200 # Node ID fed3ddf3f84f3c4e4c72bcc0818a07a99ec7312e # Parent 62c4779fb0bb35d5d8a69678e9e8aa81272f0115 Resolve gcc warnings * more parenthesis for macro * changed signed to unsigned int diff -r 62c4779fb0bb -r fed3ddf3f84f source/common/common.h --- a/source/common/common.hThu Aug 28 13:14:34 2014 +0530 +++ b/source/common/common.hWed Sep 03 15:35:16 2014 -0700 @@ -291,7 +291,7 @@ } }; -#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = (bitfield) (~(flag)) | ((~((value) - 1)) (flag)) +#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) (~(flag))) | ((~((value) - 1)) (flag)) #define CU_GET_FLAG(bitfield, flag) (!!((bitfield) (flag))) } /* defined in common.cpp */ diff -r 62c4779fb0bb -r fed3ddf3f84f source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Aug 28 13:14:34 2014 +0530 +++ b/source/encoder/analysis.cpp Wed Sep 03 15:35:16 2014 -0700 @@ -258,7 +258,7 @@ void Analysis::loadCTUData(TComDataCU* parentCU) { -int8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param-maxCUSize]}; +uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE, g_log2Size[m_param-maxCUSize]}; // Initialize the coding blocks inside the CTB for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx = cuRange[0]; rangeIdx--) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH 2 of 3] asm: optimize nquant by PSIGND, improve 13k cycles - 11k cycles
Min, Praveen has sent a number of patches on changing the entire interface for quant such that the coefficients are now 16-bit instead of 32-bit. Your patches still assume they are 32-bit? Can you review all his patches (8-10 patches) and see if we're moving in the right direction? Thanks, Deepthi On Thu, Sep 4, 2014 at 5:07 AM, Min Chen chenm...@163.com wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1409787419 25200 # Node ID 4ca9e972f48cb4530ca7181ad7cec351568a99b3 # Parent 94bd00d1af5d8c5f6f26f97c50a727588a860714 asm: optimize nquant by PSIGND, improve 13k cycles - 11k cycles diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/dct.cpp --- a/source/common/dct.cpp Wed Sep 03 16:36:44 2014 -0700 +++ b/source/common/dct.cpp Wed Sep 03 16:36:59 2014 -0700 @@ -801,6 +801,10 @@ { uint32_t numSig = 0; +X265_CHECK((numCoeff % 16) == 0, number of quant coeff is not multiple of 4x4\n); +X265_CHECK((uint32_t)add ((uint32_t)1 qBits), 2 ^ qBits less than add\n); +X265_CHECK(((intptr_t)quantCoeff 15) == 0, quantCoeff buffer not aligned\n); + for (int blockpos = 0; blockpos numCoeff; blockpos++) { int level = coef[blockpos]; diff -r 94bd00d1af5d -r 4ca9e972f48c source/common/x86/pixel-util8.asm --- a/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:44 2014 -0700 +++ b/source/common/x86/pixel-util8.asm Wed Sep 03 16:36:59 2014 -0700 @@ -941,55 +941,47 @@ ; uint32_t nquant(int32_t *coef, int32_t *quantCoeff, int32_t *qCoef, int qBits, int add, int numCoeff); ;- INIT_XMM sse4 -cglobal nquant, 4,5,8 +cglobal nquant, 3,5,8 movdm6, r4m mov r4d, r5m pxorm7, m7 ; m7 = numZero -movdm5, r3d ; m5 = qbits +movdm5, r3m ; m5 = qbits pshufd m6, m6, 0 ; m6 = add mov r3d, r4d; r3 = numCoeff shr r4d, 3 + .loop: movum0, [r0]; m0 = level movum1, [r0 + 16] ; m1 = level -movum2, [r1]; m2 = qcoeff -movum3, [r1 + 16] ; m3 = qcoeff + +pabsd m2, m0 +pmulld m2, [r1]; m4 = tmpLevel1 +paddd m2, m6 +psrad m2, m5 ; m4 = level1 +psignd m2, m0 ; restore sign + +pabsd m3, m1 +pmulld m3, [r1 + 16] ; m4 = tmpLevel1 +paddd m3, m6 +psrad m3, m5 ; m4 = level1 +psignd m3, m1 ; restore sign add r0, 32 add r1, 32 -pxorm4, m4 -pcmpgtd m4, m0 ; m4 = sign -pabsd m0, m0 -pmulld m0, m2 ; m0 = tmpLevel1 -paddd m0, m6 -psrad m0, m5 ; m0 = level1 -pxorm0, m4 -psubd m0, m4 - -pxorm4, m4 -pcmpgtd m4, m1 ; m4 = sign -pabsd m1, m1 -pmulld m1, m3 ; m1 = tmpLevel1 -paddd m1, m6 -psrad m1, m5 ; m1 = level1 -pxorm1, m4 -psubd m1, m4 - -packssdwm0, m0 -packssdwm1, m1 -pmovsxwdm0, m0 +packssdwm2, m3 +pmovsxwdm0, m2 +movhlps m1, m2 pmovsxwdm1, m1 -movu[r2], m0 +movu[r2 ], m0 movu[r2 + 16], m1 add r2, 32 + +pxorm4, m4 +pcmpeqw m2, m4 +psubw m7, m2 + dec r4d - -packssdwm0, m1 -pxorm4, m4 -pcmpeqw m0, m4 -psubw m7, m0 - jnz .loop packuswbm7, m7 @@ -997,10 +989,8 @@ mov eax, r3d movdr4d, m7 sub eax, r4d; numSig - RET - ;- ; void dequant_normal(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift) ;- ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled
On Sat, Sep 6, 2014 at 10:08 PM, Steve Borho st...@borho.org wrote: # HG changeset patch # User Steve Borho st...@borho.org # Date 1409932577 -7200 # Fri Sep 05 17:56:17 2014 +0200 # Node ID 07d69bce1760a28be1b1ee1821dfeb3335602422 # Parent 795878af39730deb24e2ee0e585c625084bb031b frameencoder: remove second encodeCU() pass over CTUs when SAO is disabled This is a performance optimization, it allows the encoder to generate the final bitstream of each CTU as it is compressed and cache hot. When SAO is enabled, SAO analysis must be performed and coded at the start of the CTU but SAO analysis currently requires surrounding CTUs to be encoded making the second pass unavoidable. diff -r 795878af3973 -r 07d69bce1760 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Sep 05 16:03:44 2014 +0200 +++ b/source/encoder/frameencoder.cpp Fri Sep 05 17:56:17 2014 +0200 @@ -192,16 +192,6 @@ } } -uint32_t numSubstreams = m_param-bEnableWavefront ? m_frame-getPicSym()-getFrameHeightInCU() : 1; -if (!m_outStreams) -{ -m_outStreams = new Bitstream[numSubstreams]; -m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams); -} -else -for (uint32_t i = 0; i numSubstreams; i++) -m_outStreams[i].resetBits(); - /* Get the QP for this frame from rate control. This call may block until * frames ahead of it in encode order have called rateControlEnd() */ int qp = m_top-m_rateControl-rateControlStart(m_frame, m_rce, m_top); @@ -214,6 +204,24 @@ m_frameFilter.start(m_frame, m_initSliceContext, qp); +// reset entropy coders +m_entropyCoder.load(m_initSliceContext); +for (int i = 0; i m_numRows; i++) +m_rows[i].init(m_initSliceContext); + +uint32_t numSubstreams = m_param-bEnableWavefront ? m_frame-getPicSym()-getFrameHeightInCU() : 1; +if (!m_outStreams) +{ +m_outStreams = new Bitstream[numSubstreams]; +m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams); +if (!m_param-bEnableSAO) +for (uint32_t i = 0; i numSubstreams; i++) + m_rows[i].rdEntropyCoders[0][CI_CURR_BEST].setBitstream(m_outStreams[i]); +} +else +for (uint32_t i = 0; i numSubstreams; i++) +m_outStreams[i].resetBits(); + if (m_frame-m_lowres.bKeyframe) { if (m_param-bEmitHRDSEI) @@ -328,7 +336,7 @@ m_entropyCoder.setBitstream(m_bs); m_entropyCoder.codeSliceHeader(slice); -// re-encode each row of CUs for the final time (TODO: get rid of this second pass) +// finish encode of each CTU row encodeSlice(); // serialize each row, record final lengths in slice header @@ -409,8 +417,40 @@ const uint32_t widthInLCUs = m_frame-getPicSym()-getFrameWidthInCU(); const uint32_t lastCUAddr = (slice-m_endCUAddr + m_frame-getNumPartInCU() - 1) / m_frame-getNumPartInCU(); const int numSubstreams = m_param-bEnableWavefront ? m_frame-getPicSym()-getFrameHeightInCU() : 1; + +if (!m_param-bEnableSAO) +{ +/* terminate each row and collect stats */ +for (uint32_t cuAddr = 0; cuAddr lastCUAddr; cuAddr++) +{ +uint32_t col = cuAddr % widthInLCUs; + +if (m_param-bEnableWavefront col == widthInLCUs - 1) +{ +uint32_t lin = cuAddr / widthInLCUs; +uint32_t subStrm = lin % numSubstreams; + m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1); + m_rows[subStrm].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish(); +m_outStreams[subStrm].writeByteAlignment(); +} + +// Collect Frame Stats for 2 pass +TComDataCU* cu = m_frame-getCU(cuAddr); +m_frameStats.mvBits += cu-m_mvBits; +m_frameStats.coeffBits += cu-m_coeffBits; +m_frameStats.miscBits += cu-m_totalBits - (cu-m_mvBits + cu-m_coeffBits); +} +if (!m_param-bEnableWavefront) +{ + m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeTerminatingBit(1); +m_rows[0].rdEntropyCoders[0][CI_CURR_BEST].codeSliceFinish(); +m_outStreams[0].writeByteAlignment(); +} + +return; +} + SAOParam *saoParam = slice-m_pic-getPicSym()-m_saoParam; - for (uint32_t cuAddr = 0; cuAddr lastCUAddr; cuAddr++) { uint32_t col = cuAddr % widthInLCUs; @@ -487,11 +527,6 @@ PPAScopeEvent(FrameEncoder_compressRows); Slice* slice = m_frame-m_picSym-m_slice; -// reset entropy coders -m_entropyCoder.load(m_initSliceContext); -for (int i = 0; i m_numRows; i++) -m_rows[i].init(m_initSliceContext); - m_bAllRowsStop = false; m_vbvResetTriggerRow = -1; @@ -672,15 +707,17 @@ } if (m_param-bEnableWavefront col == 0 row 0) +{
Re: [x265] [PATCH] search.cpp: fixed type conversion warning
Thanks, Praveen - an alternative patch pushed to take care of this (silly) warning. On Tue, Sep 9, 2014 at 11:29 AM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1410242347 -19800 # Node ID 5026f08bd7d64ab0ee22dcc98dd034030aa65db9 # Parent 7510b362ca969d850d33b10acb37f3e5a6d6b7dc search.cpp: fixed type conversion warning diff -r 7510b362ca96 -r 5026f08bd7d6 source/encoder/search.cpp --- a/source/encoder/search.cpp Tue Sep 09 11:07:59 2014 +0530 +++ b/source/encoder/search.cpp Tue Sep 09 11:29:07 2014 +0530 @@ -1443,7 +1443,8 @@ m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); // determine residual for partition -puDistY = puCost = 0; +puDistY = 0; +puCost = 0; xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, true, puCost, depthRange); // check r-d cost @@ -1462,7 +1463,8 @@ m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); // determine residual for partition -puDistY = puCost = 0; +puDistY = 0; +puCost = 0; xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost, depthRange); overallDistY += (puCost = bestPUCost) ? bestPUDistY : puDistY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] copy_cnt: enable avx2 version of asm code
Would be better to combine this asm enable with the corresponding asm patch itself. I have pushed copy_cnt8, and enabled only that for now. On Wed, Sep 10, 2014 at 3:28 PM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1410343073 -19800 # Node ID 2cd4a13086740728559fde3a176953e9aa4c0782 # Parent 7bc4db02ccc728f6e2ddedd036c96e3d37b90f22 copy_cnt: enable avx2 version of asm code diff -r 7bc4db02ccc7 -r 2cd4a1308674 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Sep 10 14:45:33 2014 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Sep 10 15:27:53 2014 +0530 @@ -1724,14 +1724,10 @@ p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2; p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2; -/* Need to update assembly code as per changed interface of the copy_cnt primitive, once - * code is updated, avx2 version will be enabled */ -/* p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2; p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2; p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2; p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2; -*/ p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2; p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] copy_cnt: enable avx2 version of asm code
Ok, thanks. On Thu, Sep 11, 2014 at 11:47 AM, Praveen Tiwari prav...@multicorewareinc.com wrote: You can push 16x16 and 32x32 also they are good in performance but they need a bit more improvement, I will be sending improvement patch soon. Regards, Praveen Tiwari On Thu, Sep 11, 2014 at 11:29 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Would be better to combine this asm enable with the corresponding asm patch itself. I have pushed copy_cnt8, and enabled only that for now. On Wed, Sep 10, 2014 at 3:28 PM, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1410343073 -19800 # Node ID 2cd4a13086740728559fde3a176953e9aa4c0782 # Parent 7bc4db02ccc728f6e2ddedd036c96e3d37b90f22 copy_cnt: enable avx2 version of asm code diff -r 7bc4db02ccc7 -r 2cd4a1308674 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Sep 10 14:45:33 2014 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Sep 10 15:27:53 2014 +0530 @@ -1724,14 +1724,10 @@ p.sad_x4[LUMA_16x32] = x265_pixel_sad_x4_16x32_avx2; p.ssd_s[BLOCK_32x32] = x265_pixel_ssd_s_32_avx2; -/* Need to update assembly code as per changed interface of the copy_cnt primitive, once - * code is updated, avx2 version will be enabled */ -/* p.copy_cnt[BLOCK_4x4] = x265_copy_cnt_4_avx2; p.copy_cnt[BLOCK_8x8] = x265_copy_cnt_8_avx2; p.copy_cnt[BLOCK_16x16] = x265_copy_cnt_16_avx2; p.copy_cnt[BLOCK_32x32] = x265_copy_cnt_32_avx2; -*/ p.cvt32to16_shl[BLOCK_4x4] = x265_cvt32to16_shl_4_avx2; p.cvt32to16_shl[BLOCK_8x8] = x265_cvt32to16_shl_8_avx2; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] sao: some cleanups
Thanks, queued for testing. On Fri, Sep 12, 2014 at 7:34 AM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1410487314 -32400 # Fri Sep 12 11:01:54 2014 +0900 # Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf # Parent 7e29b10982d2eb7fd79f581d6f04184522ba sao: some cleanups diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h --- a/source/common/common.hThu Sep 11 19:24:28 2014 +0530 +++ b/source/common/common.hFri Sep 12 11:01:54 2014 +0900 @@ -200,6 +200,8 @@ namespace x265 { +enum { SAO_NUM_OFFSET = 4 }; + // NOTE: MUST be alignment to 16 or 32 bytes for asm code struct NoiseReduction { @@ -215,9 +217,8 @@ enum { NUM_DOWN_PART = 4 }; int bestType; -int length; int subTypeIdx; // indicates EO class or BO band position -int offset[4]; +int offset[SAO_NUM_OFFSET]; int startCUX; int startCUY; int endCUX; @@ -245,10 +246,9 @@ bool mergeLeftFlag; int typeIdx; int subTypeIdx;// indicates EO class or BO band position -int offset[4]; +int offset[SAO_NUM_OFFSET]; int partIdx; int partIdxTmp; -int length; void reset() { diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm --- a/source/common/x86/loopfilter.asm Thu Sep 11 19:24:28 2014 +0530 +++ b/source/common/x86/loopfilter.asm Fri Sep 12 11:01:54 2014 +0900 @@ -44,7 +44,7 @@ pslldq m0,15 ; m0 = [iSignLeft x .. x] pcmpeqb m4,m4 ; m4 = [pb -1] pxorm5,m5 ; m5 = 0 -movum6,[r1]; m6 = m_iOffsetEo +movhm6,[r1]; m6 = m_offsetEo .loop: movum7,[r0]; m1 = pRec[x] diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppFri Sep 12 11:01:54 2014 +0900 @@ -879,19 +879,19 @@ if (symbol) { -if (saoLcuParam-typeIdx 4 compIdx != 2) +if (saoLcuParam-typeIdx SAO_BO compIdx != 2) saoLcuParam-subTypeIdx = saoLcuParam-typeIdx; int offsetTh = 1 X265_MIN(X265_DEPTH - 5, 5); if (saoLcuParam-typeIdx == SAO_BO) { -for (i = 0; i saoLcuParam-length; i++) +for (i = 0; i SAO_BO_LEN; i++) { uint32_t absOffset = ((saoLcuParam-offset[i] 0) ? -saoLcuParam-offset[i] : saoLcuParam-offset[i]); codeSaoMaxUvlc(absOffset, offsetTh - 1); } -for (i = 0; i saoLcuParam-length; i++) +for (i = 0; i SAO_BO_LEN; i++) { if (saoLcuParam-offset[i] != 0) { @@ -903,7 +903,7 @@ symbol = (uint32_t)(saoLcuParam-subTypeIdx); codeSaoUflc(5, symbol); } -else if (saoLcuParam-typeIdx 4) +else // if (saoLcuParam-typeIdx SAO_BO) { codeSaoMaxUvlc(saoLcuParam-offset[0], offsetTh - 1); codeSaoMaxUvlc(saoLcuParam-offset[1], offsetTh - 1); diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp --- a/source/encoder/sao.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/sao.cppFri Sep 12 11:01:54 2014 +0900 @@ -79,26 +79,13 @@ 341, // level 4 }; -const uint32_t SAO::s_eoTable[9] = +const uint32_t SAO::s_eoTable[NUM_EDGETYPE] = { 1, // 0 2, // 1 0, // 2 3, // 3 -4, // 4 -0, // 5 -0, // 6 -0, // 7 -0 -}; - -const int SAO::s_numClass[MAX_NUM_SAO_TYPE] = -{ -SAO_EO_LEN, -SAO_EO_LEN, -SAO_EO_LEN, -SAO_EO_LEN, -SAO_BO_LEN +4 // 4 }; SAO::SAO() @@ -122,8 +109,6 @@ m_clipTable = NULL; m_clipTableBase = NULL; m_offsetBo = NULL; -m_chromaOffsetBo = NULL; -m_tableBo = NULL; m_tmpU1[0] = NULL; m_tmpU1[1] = NULL; m_tmpU1[2] = NULL; @@ -162,18 +147,12 @@ * m_numTotalParts must allow for sufficient storage in any allocated arrays */ m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]); -int pixelRange = 1 X265_DEPTH; -int boRangeShift = X265_DEPTH - SAO_BO_BITS; -pixel maxY = (1 X265_DEPTH) - 1; -pixel minY = 0; -pixel rangeExt = maxY 1; +const pixel maxY = (1 X265_DEPTH) - 1; +const pixel rangeExt = maxY 1; int numLcu = m_numCuInWidth * m_numCuInHeight; -CHECKED_MALLOC(m_tableBo, pixel, pixelRange); - -CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); -CHECKED_MALLOC(m_offsetBo,int, maxY + 2 * rangeExt); -CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt); +CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); +
Re: [x265] [PATCH] Resolved gcc compiler error of mismatched type
Thanks, pushed. On Fri, Sep 12, 2014 at 5:57 AM, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1410481540 25200 # Node ID 37b199156dfc27874205d6d7fadb71a00e3257d9 # Parent 7e29b10982d2eb7fd79f581d6f04184522ba Resolved gcc compiler error of mismatched type diff -r 7e29b10982d2 -r 37b199156dfc source/x265.cpp --- a/source/x265.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/x265.cpp Thu Sep 11 17:25:40 2014 -0700 @@ -788,7 +788,7 @@ void CLIOptions::writeAnalysisFile(x265_picture* pic, x265_param *p) { -fpos_t seekTo = pic-poc * this-analysisRecordSize; +uint64_t seekTo = pic-poc * this-analysisRecordSize; fseeko(this-analysisFile, seekTo, SEEK_SET); fwrite(p-sourceWidth, sizeof(int), 1, this-analysisFile); fwrite(p-sourceHeight, sizeof(int), 1, this-analysisFile); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH RFC] analysis: add CU specific details to encodeCU()
On Fri, Sep 12, 2014 at 6:05 PM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410525310 -19800 # Fri Sep 12 18:05:10 2014 +0530 # Node ID bf4ebe5df0cab013e4462597b55bd505b2a6a71a # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Fri Sep 12 18:05:10 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppFri Sep 12 18:05:10 2014 +0530 @@ -481,14 +481,14 @@ } } -void Entropy::encodeCTU(TComDataCU* cu) +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cuData); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,26 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cu_split_flag = !(cuData-flags CU::LEAF); +int cu_unsplit_flag = !(cuData-flags CU::SPLIT_MANDATORY); + +uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); +uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); + +if (!cu_unsplit_flag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) Most of the patch looks correct, but can't the above if-check also replaced by one of the childCU flags? +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cu_split_flag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +523,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r bf4ebe5df0ca source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Fri Sep 12 18:05:10 2014 +0530 @@ -148,7 +148,7 @@ void codeShortTermRefPicSet(RPS* rps); void finishSlice() { encodeBinTrm(1); finish(); dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); } -void encodeCTU(TComDataCU* cu); +void encodeCTU(TComDataCU* cu, CU *cuData); void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); } @@ -193,7 +193,7 @@ void
Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1410341620 -19800 # Wed Sep 10 15:03:40 2014 +0530 # Node ID d8be3c38915d4a628b804522da8946a152041203 # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f Search: remove redundant encode coefficients in intra for performance diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -1840,6 +1840,7 @@ void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) { uint64_t puCost = 0; +uint32_t puBits = 0; uint32_t depth = cu-getDepth(0); uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; @@ -1851,7 +1852,7 @@ uint32_t tuDepthRange[2]; cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange); +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); //=== update PU data diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -111,47 +111,6 @@ return false; } -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode = cu-getTransformIdx(absPartIdx); -uint32_t subdiv = (trMode trDepth ? 1 : 0); -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; - -if (cu-getPredictionMode(0) == MODE_INTRA cu-getPartitionSize(0) == SIZE_NxN trDepth == 0) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize *(depthRange + 1)) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else if (log2TrSize == *depthRange) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else -{ -X265_CHECK(log2TrSize *depthRange, transform size too small\n); -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize); -} - -if (subdiv) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange); - -return; -} - -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); -} - void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height) { uint32_t fullDepth = cu-getDepth(0) + trDepth; @@ -183,32 +142,6 @@ } } -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx) -{ -const TextType ttype = TEXT_LUMA; - -if (!cu-getCbf(absPartIdx, ttype, trDepth)) -return; - -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode= cu-getTransformIdx(absPartIdx); - -if (trMode trDepth) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum); - -return; -} - -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= log2TrSize - 2; -uint32_t coeffOffset = absPartIdx LOG2_UNIT_SIZE * 2; -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype); -} - void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) @@ -316,15 +249,6 @@ } } -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -m_entropyCoder-resetBits(); -xEncIntraHeaderLuma(cu, trDepth, absPartIdx); -xEncSubdivCbfQTLuma(cu, trDepth, absPartIdx, depthRange); -xEncCoeffQTLuma(cu, trDepth, absPartIdx); -return
Re: [x265] [PATCH] Search: remove redundant encode coefficients in intra for performance
Sorry, the output mismatch was due to asm. Pushed. On Sun, Sep 14, 2014 at 4:35 PM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: This significantly changes outputs for P and B frames. Higher bitrates and higher SSIM. Lets do full regression testing on this - and compare the bitrate/ssim for all combinations to be reasonably sure there are no bugs. On Fri, Sep 12, 2014 at 7:47 PM, as...@multicorewareinc.com wrote: # HG changeset patch # User Ashok Kumar Mishraas...@multicorewareinc.com # Date 1410341620 -19800 # Wed Sep 10 15:03:40 2014 +0530 # Node ID d8be3c38915d4a628b804522da8946a152041203 # Parent cd8fd0afd4e873fc940ae3384fac4deed3ec7b4f Search: remove redundant encode coefficients in intra for performance diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/analysis.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -1840,6 +1840,7 @@ void Analysis::encodeIntraInInter(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, TComYuv* outReconYuv) { uint64_t puCost = 0; +uint32_t puBits = 0; uint32_t depth = cu-getDepth(0); uint32_t initTrDepth = cu-getPartitionSize(0) == SIZE_2Nx2N ? 0 : 1; @@ -1851,7 +1852,7 @@ uint32_t tuDepthRange[2]; cu-getQuadtreeTULog2MinSizeInCU(tuDepthRange, 0); -uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, tuDepthRange); +uint32_t puDistY = xRecurIntraCodingQT(cu, initTrDepth, 0, fencYuv, predYuv, outResiYuv, false, puCost, puBits, tuDepthRange); xSetIntraResultQT(cu, initTrDepth, 0, outReconYuv); //=== update PU data diff -r cd8fd0afd4e8 -r d8be3c38915d source/encoder/search.cpp --- a/source/encoder/search.cpp Thu Sep 11 17:25:40 2014 -0700 +++ b/source/encoder/search.cpp Wed Sep 10 15:03:40 2014 +0530 @@ -111,47 +111,6 @@ return false; } -void Search::xEncSubdivCbfQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode = cu-getTransformIdx(absPartIdx); -uint32_t subdiv = (trMode trDepth ? 1 : 0); -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; - -if (cu-getPredictionMode(0) == MODE_INTRA cu-getPartitionSize(0) == SIZE_NxN trDepth == 0) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize *(depthRange + 1)) -{ -X265_CHECK(subdiv, subdivision not present\n); -} -else if (log2TrSize == cu-m_slice-m_sps-quadtreeTULog2MinSize) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else if (log2TrSize == *depthRange) -{ -X265_CHECK(!subdiv, subdivision present\n); -} -else -{ -X265_CHECK(log2TrSize *depthRange, transform size too small\n); -m_entropyCoder-codeTransformSubdivFlag(subdiv, 5 - log2TrSize); -} - -if (subdiv) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncSubdivCbfQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum, depthRange); - -return; -} - -m_entropyCoder-codeQtCbf(cu, absPartIdx, TEXT_LUMA, trMode); -} - void Search::xEncSubdivCbfQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t absPartIdxStep, uint32_t width, uint32_t height) { uint32_t fullDepth = cu-getDepth(0) + trDepth; @@ -183,32 +142,6 @@ } } -void Search::xEncCoeffQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx) -{ -const TextType ttype = TEXT_LUMA; - -if (!cu-getCbf(absPartIdx, ttype, trDepth)) -return; - -uint32_t fullDepth = cu-getDepth(0) + trDepth; -uint32_t trMode= cu-getTransformIdx(absPartIdx); - -if (trMode trDepth) -{ -uint32_t qtPartNum = cu-m_pic-getNumPartInCU() ((fullDepth + 1) 1); -for (uint32_t part = 0; part 4; part++) -xEncCoeffQTLuma(cu, trDepth + 1, absPartIdx + part * qtPartNum); - -return; -} - -uint32_t log2TrSize = g_maxLog2CUSize - fullDepth; -uint32_t qtLayer= log2TrSize - 2; -uint32_t coeffOffset = absPartIdx LOG2_UNIT_SIZE * 2; -coeff_t* coeff = m_qtTempCoeff[ttype][qtLayer] + coeffOffset; -m_entropyCoder-codeCoeffNxN(cu, coeff, absPartIdx, log2TrSize, ttype); -} - void Search::xEncCoeffQTChroma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TextType ttype) { if (!cu-getCbf(absPartIdx, ttype, trDepth)) @@ -316,15 +249,6 @@ } } -uint32_t Search::xGetIntraBitsQTLuma(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, uint32_t depthRange[2]) -{ -m_entropyCoder-resetBits(); -xEncIntraHeaderLuma(cu, trDepth, absPartIdx
Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()
Thanks, looks good. On Tue, Sep 16, 2014 at 9:45 AM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410840429 -19800 # Tue Sep 16 09:37:09 2014 +0530 # Node ID 50505472d3e33b775c70f2f373e1c15d17e47e66 # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Tue Sep 16 09:37:09 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppTue Sep 16 09:37:09 2014 +0530 @@ -481,14 +481,14 @@ } } -void Entropy::encodeCTU(TComDataCU* cu) +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cuData); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,24 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cuSplitFlag = !(cuData-flags CU::LEAF); +int cuUnsplitFlag = !(cuData-flags CU::SPLIT_MANDATORY); + +if (!cuUnsplitFlag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +int cuPresentFlagChild = !(childCU-flags CU::PRESENT); +if (!cuPresentFlagChild) +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cuSplitFlag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +521,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Tue Sep 16 09:37:09 2014 +0530 @@ -148,7 +148,7 @@ void codeShortTermRefPicSet(RPS* rps); void finishSlice() { encodeBinTrm(1); finish(); dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); } -void encodeCTU(TComDataCU* cu); +void encodeCTU(TComDataCU* cu, CU *cuData); void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState[OFF_SAO_MERGE_FLAG_CTX]); } @@ -193,7 +193,7 @@ void encodeBinsEP(uint32_t binValues, int numBins); void encodeBinTrm(uint32_t binValue); -void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP); +void
Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()
Ok, thanks. please send a follow-on patch cleaning up both encodeCU and compressCu functions. On Tue, Sep 16, 2014 at 11:32 AM, Santhoshini Sekar santhosh...@multicorewareinc.com wrote: On Tue, Sep 16, 2014 at 10:56 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: On Tue, Sep 16, 2014 at 9:45 AM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410840429 -19800 # Tue Sep 16 09:37:09 2014 +0530 # Node ID 50505472d3e33b775c70f2f373e1c15d17e47e66 # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Tue Sep 16 09:37:09 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppTue Sep 16 09:37:09 2014 +0530 @@ -481,14 +481,14 @@ } } -void Entropy::encodeCTU(TComDataCU* cu) +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cuData); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,24 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cuSplitFlag = !(cuData-flags CU::LEAF); +int cuUnsplitFlag = !(cuData-flags CU::SPLIT_MANDATORY); + +if (!cuUnsplitFlag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +int cuPresentFlagChild = !(childCU-flags CU::PRESENT); +if (!cuPresentFlagChild) +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cuSplitFlag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +521,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Tue Sep 16 09:37:09 2014 +0530 @@ -148,7 +148,7 @@ void codeShortTermRefPicSet(RPS* rps); void finishSlice() { encodeBinTrm(1); finish(); dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); } -void encodeCTU(TComDataCU* cu); +void encodeCTU(TComDataCU* cu, CU *cuData); void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); void codeSaoMerge(uint32_t code) { encodeBin(code, m_contextState