On Mon, Sep 30, 2013 at 12:35 PM, <[email protected]>wrote:
> # HG changeset patch > # User Deepthi Devaki <[email protected]> > # Date 1380523193 -19800 > # Node ID bb238e8b36007aad896884009c720d26df8775c5 > # Parent 55edc34e253c14d3eccb83a7d1db43774349ff9a > Bidir ME: new logic adapted from x264 > > L0 and L1 MVs from unidir ME used for bidir MV. bidir cost is calculated > from the average of references. Performance/PSNR with new bidir with > commandline > x265.exe FourPeople_1280x720_60.y4m -f 100 --b-adapt 0 -b 3 --ref 1 > --hash 1 -o four.hevc -r recon.yuv > New: (2.24 fps), 515.16 kb/s, Global PSNR: 39.704 > Orig: (2.05 fps), 519.47 kb/s, Global PSNR: 39.711 > > diff -r 55edc34e253c -r bb238e8b3600 source/Lib/TLibEncoder/TEncSearch.cpp > --- a/source/Lib/TLibEncoder/TEncSearch.cpp Sat Sep 28 22:54:44 2013 > -0500 > +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Sep 30 12:09:53 2013 > +0530 > @@ -2289,7 +2289,7 @@ > * \param bUseRes > * \returns void > */ > -void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* fencYuv, > TComYuv* predYuv, bool bUseMRG) > +void TEncSearch::predInterSearch(TComDataCU* cu, TComYuv* /*fencYuv*/, > TComYuv* predYuv, bool bUseMRG) > { > m_predYuv[0].clear(); > m_predYuv[1].clear(); > @@ -2317,7 +2317,6 @@ > > UInt partAddr; > int roiWidth, roiHeight; > - int refStart, refEnd; > > PartSize partSize = cu->getPartitionSize(0); > int bestBiPRefIdxL1 = 0; > @@ -2483,8 +2482,6 @@ > // Bi-directional prediction > if ((cu->getSlice()->isInterB()) && > (cu->isBipredRestriction(partIdx) == false)) > { > - UInt motBits[2]; > - > mvBidir[0] = mv[0]; > mvBidir[1] = mv[1]; > refIdxBidir[0] = refIdx[0]; > @@ -2492,112 +2489,17 @@ > > ::memcpy(mvPredBi, mvPred, sizeof(mvPred)); > ::memcpy(mvpIdxBi, mvpIdx, sizeof(mvpIdx)); > - > - if (cu->getSlice()->getMvdL1ZeroFlag()) > - { > - xCopyAMVPInfo(&amvpInfo[1][bestBiPRefIdxL1], > cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()); > - cu->setMVPIdxSubParts(bestBiPMvpL1, REF_PIC_LIST_1, > partAddr, partIdx, cu->getDepth(partAddr)); > - mvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1; > - mvPredBi[1][bestBiPRefIdxL1] = > cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()->m_mvCand[bestBiPMvpL1]; > - > - mvBidir[1] = mvPredBi[1][bestBiPRefIdxL1]; > - refIdxBidir[1] = bestBiPRefIdxL1; > - > cu->getCUMvField(REF_PIC_LIST_1)->setAllMv(mvBidir[1], partSize, partAddr, > 0, partIdx); > - > cu->getCUMvField(REF_PIC_LIST_1)->setAllRefIdx(refIdxBidir[1], partSize, > partAddr, 0, partIdx); > - motionCompensation(cu, &m_predYuv[1], REF_PIC_LIST_1, > partIdx); > - > - motBits[0] = bits[0] - mbBits[0]; > - motBits[1] = mbBits[1]; > - > - if (cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) > 1) > - { > - motBits[1] += bestBiPRefIdxL1 + 1; > - if (bestBiPRefIdxL1 == > cu->getSlice()->getNumRefIdx(REF_PIC_LIST_1) - 1) motBits[1]--; > - } > - > - motBits[1] += > m_mvpIdxCost[mvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS]; > - > - bits[2] = mbBits[2] + motBits[0] + motBits[1]; > - > - mvTemp[1][bestBiPRefIdxL1] = mvBidir[1]; > - } > - else > - { > - motBits[0] = bits[0] - mbBits[0]; > - motBits[1] = bits[1] - mbBits[1]; > - bits[2] = mbBits[2] + motBits[0] + motBits[1]; > - } > - > - int refList = 0; > - if (listCost[0] <= listCost[1]) > - { > - refList = 1; > - } > - else > - { > - refList = 0; > - } > - if (!cu->getSlice()->getMvdL1ZeroFlag()) > - { > - cu->getCUMvField(RefPicList(1 - > refList))->setAllMv(mv[1 - refList], partSize, partAddr, 0, partIdx); > - cu->getCUMvField(RefPicList(1 - > refList))->setAllRefIdx(refIdx[1 - refList], partSize, partAddr, 0, > partIdx); > - motionCompensation(cu, &m_predYuv[1 - refList], > RefPicList(1 - refList), partIdx); > - } > - RefPicList picList = (refList ? REF_PIC_LIST_1 : > REF_PIC_LIST_0); > - > - if (cu->getSlice()->getMvdL1ZeroFlag()) > - { > - refList = 0; > - picList = REF_PIC_LIST_0; > - } > - > - bool bChanged = false; > - > - refStart = 0; > - refEnd = cu->getSlice()->getNumRefIdx(picList) - 1; > - > - for (int refIdxTmp = refStart; refIdxTmp <= refEnd; > refIdxTmp++) > - { > - bitsTemp = mbBits[2] + motBits[1 - refList]; > - if (cu->getSlice()->getNumRefIdx(picList) > 1) > - { > - bitsTemp += refIdxTmp + 1; > - if (refIdxTmp == > cu->getSlice()->getNumRefIdx(picList) - 1) bitsTemp--; > - } > - bitsTemp += > m_mvpIdxCost[mvpIdxBi[refList][refIdxTmp]][AMVP_MAX_NUM_CANDS]; > - // call bidir ME > - xMotionEstimation(cu, fencYuv, partIdx, picList, > &mvPredBi[refList][refIdxTmp], refIdxTmp, mvTemp[refList][refIdxTmp], > - bitsTemp, costTemp); > - xCopyAMVPInfo(&amvpInfo[refList][refIdxTmp], > cu->getCUMvField(picList)->getAMVPInfo()); > - xCheckBestMVP(cu, picList, > mvTemp[refList][refIdxTmp], mvPredBi[refList][refIdxTmp], > mvpIdxBi[refList][refIdxTmp], > - bitsTemp, costTemp); > - > - if (costTemp < costbi) > - { > - bChanged = true; > - > - mvBidir[refList] = mvTemp[refList][refIdxTmp]; > - refIdxBidir[refList] = refIdxTmp; > - > - costbi = costTemp; > - motBits[refList] = bitsTemp - mbBits[2] - > motBits[1 - refList]; > - bits[2] = bitsTemp; > - } > - } // for loop-refIdxTmp > - > - if (!bChanged) > - { > - if (costbi <= listCost[0] && costbi <= listCost[1]) > - { > - xCopyAMVPInfo(&amvpInfo[0][refIdxBidir[0]], > cu->getCUMvField(REF_PIC_LIST_0)->getAMVPInfo()); > - xCheckBestMVP(cu, REF_PIC_LIST_0, mvBidir[0], > mvPredBi[0][refIdxBidir[0]], mvpIdxBi[0][refIdxBidir[0]], bits[2], costbi); > - if (!cu->getSlice()->getMvdL1ZeroFlag()) > - { > - xCopyAMVPInfo(&amvpInfo[1][refIdxBidir[1]], > cu->getCUMvField(REF_PIC_LIST_1)->getAMVPInfo()); > - xCheckBestMVP(cu, REF_PIC_LIST_1, mvBidir[1], > mvPredBi[1][refIdxBidir[1]], mvpIdxBi[1][refIdxBidir[1]], bits[2], costbi); > - } > - } > - } > + > + pixel *ref0,*ref1; > + ref0 = > cu->getSlice()->m_mref[REF_PIC_LIST_0][refIdx[0]]->fpelPlane + (pu - > fenc->getLumaAddr()) + (mv[0].x >> 2) + (mv[0].y >> 2) * fenc->getStride(); > + ref1 = > cu->getSlice()->m_mref[REF_PIC_LIST_1][refIdx[1]]->fpelPlane + (pu - > fenc->getLumaAddr()) + (mv[1].x >> 2) + (mv[1].y >> 2) * fenc->getStride(); > + > There is a problem here. It is taking only fullpel references. Must generate the reference subpels. > + pixel avg[MAX_CU_SIZE * MAX_CU_SIZE]; > + > + int partEnum = PartitionFromSizes(roiWidth, roiHeight); > + primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, > ref1, fenc->getStride(), fenc->getStride()); > + int satdCost = primitives.satd[partEnum](pu, > fenc->getStride(), avg, roiWidth); > + costbi = satdCost + m_rdCost->getCost(bits[0]) + > m_rdCost->getCost(bits[1]); > } // if (B_SLICE) > } //end if bTestNormalMC > > diff -r 55edc34e253c -r bb238e8b3600 source/common/pixel.cpp > --- a/source/common/pixel.cpp Sat Sep 28 22:54:44 2013 -0500 > +++ b/source/common/pixel.cpp Mon Sep 30 12:09:53 2013 +0530 > @@ -578,6 +578,21 @@ > } > } > > +template<int lx, int ly> > +void pixelavg_pp(pixel* dst, intptr_t dstride, pixel* src0, pixel* src1, > intptr_t sstride0, intptr_t sstride1) > +{ > + for( int y = 0; y < ly; y++ ) > + { > + for( int x = 0; x < lx; x++ ) > + { > + dst[x] = ( src0[x] + src1[x] + 1 ) >> 1; > + } > + src0 += sstride0; > + src1 += sstride1; > + dst += dstride; > + } > +} > + > void scale1D_128to64(pixel *dst, pixel *src, intptr_t /*stride*/) > { > int x; > @@ -647,6 +662,7 @@ > SET_FUNC_PRIMITIVE_TABLE_C2(sad) > SET_FUNC_PRIMITIVE_TABLE_C2(sad_x3) > SET_FUNC_PRIMITIVE_TABLE_C2(sad_x4) > + SET_FUNC_PRIMITIVE_TABLE_C2(pixelavg_pp) > > // satd > p.satd[PARTITION_4x4] = satd_4x4; > diff -r 55edc34e253c -r bb238e8b3600 source/common/primitives.h > --- a/source/common/primitives.h Sat Sep 28 22:54:44 2013 -0500 > +++ b/source/common/primitives.h Mon Sep 30 12:09:53 2013 +0530 > @@ -196,6 +196,7 @@ > typedef void (*pixelsub_sp_t)(int bx, int by, short *dst, intptr_t > dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1); > typedef void (*pixeladd_ss_t)(int bx, int by, short *dst, intptr_t > dstride, short *src0, short *src1, intptr_t sstride0, intptr_t sstride1); > typedef void (*pixeladd_pp_t)(int bx, int by, pixel *dst, intptr_t > dstride, pixel *src0, pixel *src1, intptr_t sstride0, intptr_t sstride1); > +typedef void (*pixelavg_pp_t)(pixel *dst, intptr_t dstride, pixel *src0, > pixel *src1, intptr_t sstride0, intptr_t sstride1); > typedef void (*blockfil_s_t)(short *dst, intptr_t dstride, short val); > > typedef void (*intra_dc_t)(pixel* above, pixel* left, pixel* dst, > intptr_t dstStride, int width, int bFilter); > @@ -290,6 +291,7 @@ > pixelsub_sp_t pixelsub_sp; > pixeladd_ss_t pixeladd_ss; > pixeladd_pp_t pixeladd_pp; > + pixelavg_pp_t pixelavg_pp[NUM_PARTITIONS]; > > filterVwghtd_t filterVwghtd; > filterHwghtd_t filterHwghtd; >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
