Re: [x265] [PATCH 4 of 4] Enabling weight prediction for half and full pel

Shazeb Khan Tue, 08 Oct 2013 22:57:03 -0700

On Wed, Oct 9, 2013 at 12:55 AM, Steve Borho <[email protected]> wrote:


>
>
>
> On Tue, Oct 8, 2013 at 6:41 AM, <[email protected]> wrote:
>
>> # HG changeset patch
>> # User Shazeb Nawaz Khan <[email protected]>
>> # Date 1381232285 -19800
>> #      Tue Oct 08 17:08:05 2013 +0530
>> # Node ID ad8608b0a64869c3b8bbc32e0bb418f7b1dde4bb
>> # Parent  18a5d7c3464d1a3fa98afa95a0fe7a8894bcd3d2
>> Enabling weight prediction for half and full pel
>>
>> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.cpp
>> --- a/source/common/reference.cpp       Tue Oct 08 16:56:39 2013 +0530
>> +++ b/source/common/reference.cpp       Tue Oct 08 17:08:05 2013 +0530
>> @@ -58,6 +58,7 @@
>>  int MotionReference::init(TComPicYuv* pic, wpScalingParam *w)
>>  {
>>      m_reconPic = pic;
>> +    unweightedFPelPlane = pic->getLumaAddr();
>>
>
> the motion reference object has m_reconPic, so it can call
> m_reconPic->getLumaAddr() at any time.  I don't see the need for another
> pointer
>

Its not possible to refer m_reconPic in subpelInterpolate(), since the
parameter is catched in ReferencePlanes type pointer rather than
MotionReference pointer. Its required to have/use a pointer in superclass.
Can there be another way?


>
>
>>      lumaStride = pic->getStride();
>>      m_startPad = pic->m_lumaMarginY * lumaStride + pic->m_lumaMarginX;
>>      m_next = NULL;
>> diff -r 18a5d7c3464d -r ad8608b0a648 source/common/reference.h
>> --- a/source/common/reference.h Tue Oct 08 16:56:39 2013 +0530
>> +++ b/source/common/reference.h Tue Oct 08 17:08:05 2013 +0530
>> @@ -43,6 +43,7 @@
>>
>>      pixel* fpelPlane;
>>      pixel* lowresPlane[4];
>> +    pixel* unweightedFPelPlane;
>>
>>      bool isWeighted;
>>      bool isLowres;
>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/frameencoder.cpp
>> --- a/source/encoder/frameencoder.cpp   Tue Oct 08 16:56:39 2013 +0530
>> +++ b/source/encoder/frameencoder.cpp   Tue Oct 08 17:08:05 2013 +0530
>> @@ -909,6 +909,10 @@
>>                      {
>>                          refpic->m_reconRowWait.wait();
>>                      }
>> +                    if(slice->getPPS()->getUseWP() &&
>> (slice->getSliceType() == P_SLICE))
>>
>
> white-space
>
>
>> +                    {
>> +                        slice->m_mref[list][ref]->applyWeight(refpic,
>> row + refLagRows, m_numRows);
>> +                    }
>>                  }
>>              }
>>
>> @@ -941,6 +945,10 @@
>>                          {
>>                              refpic->m_reconRowWait.wait();
>>                          }
>> +                        if(slice->getPPS()->getUseWP() &&
>> (slice->getSliceType() == P_SLICE))
>>
>
> white-space
>
>
>> +                        {
>> +
>>  slice->m_mref[list][ref]->applyWeight(refpic, i + refLagRows, m_numRows);
>> +                        }
>>                      }
>>                  }
>>
>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.cpp
>> --- a/source/encoder/motion.cpp Tue Oct 08 16:56:39 2013 +0530
>> +++ b/source/encoder/motion.cpp Tue Oct 08 17:08:05 2013 +0530
>> @@ -89,6 +89,7 @@
>>      fenc = (pixel*)X265_MALLOC(pixel, MAX_CU_SIZE * MAX_CU_SIZE);
>>      subpelbuf = (pixel*)X265_MALLOC(pixel, (MAX_CU_SIZE + 1) *
>> (MAX_CU_SIZE + 1));
>>      immedVal = (short*)X265_MALLOC(short, (MAX_CU_SIZE + 1) *
>> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>> +    immedVal2 = (int16_t*)X265_MALLOC(int16_t, (MAX_CU_SIZE + 1) *
>> (MAX_CU_SIZE + 1 + NTAPS_LUMA - 1));
>>
>
> this is the same as immedVal (short==int16_t most everywhere).  It would
> be better to change immedVal's type to int16_t* and to cast it as short
> everywhere necessary in the short term until we can fix all the
> interpolation primitives to use int16_t instead of short.
>

In subpelInterpolate

pixel      -----------[interpolatePS]------>   immedVal
 -------[interpolateSS]------->    immedVal2
 ---------[weightp]--------->      subpellbuf

immedVal2 serves as intermediate buffer.


>
>
>>  }
>>
>>  MotionEstimate::~MotionEstimate()
>> @@ -96,6 +97,7 @@
>>      X265_FREE(fenc);
>>      X265_FREE(subpelbuf);
>>      X265_FREE(immedVal);
>> +    X265_FREE(immedVal2);
>>  }
>>
>>  void MotionEstimate::setSourcePU(int offset, int width, int height)
>> @@ -831,7 +833,7 @@
>>                  }
>>                  else
>>                  {
>> -                    subpelInterpolate(fqref, ref->lumaStride, xFrac,
>> yFrac, dir);
>> +                    subpelInterpolate(ref, qmv0, dir);
>>                      cost0 = hpelcomp(fenc, FENC_STRIDE, subpelbuf,
>> FENC_STRIDE + (dir == 2)) + mvcost0;
>>                      cost1 = hpelcomp(fenc, FENC_STRIDE, subpelbuf + (dir
>> == 2) + (dir == 1 ? FENC_STRIDE : 0), FENC_STRIDE + (dir == 2)) + mvcost1;
>>                  }
>> @@ -1140,47 +1142,61 @@
>>          {
>>              return cmp(fenc, FENC_STRIDE, fref, ref->lumaStride);
>>          }
>> -        else if (yFrac == 0)
>> +        else
>>          {
>> -            primitives.ipfilter_pp[FILTER_H_P_P_8](fref,
>> ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight,
>> g_lumaFilter[xFrac]);
>> +            subpelInterpolate(ref, qmv, 0);
>>
>
> as a post-step; we should try to declare subpelInterpolate as inline
>
>
>> +        }
>> +        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
>> +    }
>> +}
>> +
>> +void MotionEstimate::subpelInterpolate(ReferencePlanes *ref, MV qmv, int
>> dir)
>> +{
>> +    int xFrac = qmv.x & 0x3;
>> +    int yFrac = qmv.y & 0x3;
>> +    assert(yFrac | xFrac);
>> +    int realWidth = blockwidth + (dir == 2);
>> +    int realHeight = blockheight + (dir == 1);
>> +    intptr_t realStride = FENC_STRIDE + (dir == 2);
>> +    pixel *fref = ref->unweightedFPelPlane + blockOffset + (qmv.x >> 2)
>> + (qmv.y >> 2) * ref->lumaStride;
>> +
>> +    if (ref->isWeighted)
>> +    {
>> +        if (yFrac == 0)
>> +        {
>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref,
>> ref->lumaStride, immedVal, realStride, realWidth, realHeight,
>> g_lumaFilter[xFrac]);
>> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>> ref->offset);
>>          }
>>          else if (xFrac == 0)
>>          {
>> -            primitives.ipfilter_pp[FILTER_V_P_P_8](fref,
>> ref->lumaStride, subpelbuf, FENC_STRIDE, blockwidth, blockheight,
>> g_lumaFilter[yFrac]);
>> +            primitives.ipfilter_ps[FILTER_V_P_S_8](fref,
>> ref->lumaStride, immedVal, realStride, realWidth, realHeight,
>> g_lumaFilter[yFrac]);
>> +            primitives.weightpUni(immedVal, subpelbuf, realStride,
>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>> ref->offset);
>>          }
>>          else
>>          {
>>              int filterSize = NTAPS_LUMA;
>>              int halfFilterSize = (filterSize >> 1);
>> -            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>> blockwidth, blockwidth, blockheight + filterSize - 1, g_lumaFilter[xFrac]);
>> -            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>> (halfFilterSize - 1) * blockwidth, blockwidth, subpelbuf, FENC_STRIDE,
>> blockwidth, blockheight, g_lumaFilter[yFrac]);
>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>> realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
>> +            primitives.ipfilter_ss[FILTER_V_S_S_8](immedVal +
>> (halfFilterSize - 1) * realWidth, realWidth, immedVal2, realStride,
>> realWidth, realHeight, g_lumaFilter[yFrac]);
>> +            primitives.weightpUni(immedVal2, subpelbuf, realStride,
>> realStride, realWidth, realHeight, ref->weight, ref->round, ref->shift,
>> ref->offset);
>>          }
>> -
>> -        return cmp(fenc, FENC_STRIDE, subpelbuf, FENC_STRIDE);
>> -    }
>> -}
>> -
>> -void MotionEstimate::subpelInterpolate(pixel *fref, intptr_t lumaStride,
>> int xFrac, int yFrac, int dir)
>> -{
>> -    assert(yFrac | xFrac);
>> -
>> -    int realWidth = blockwidth + (dir == 2);
>> -    int realHeight = blockheight + (dir == 1);
>> -    intptr_t realStride = FENC_STRIDE + (dir == 2);
>> -
>> -    if (yFrac == 0)
>> -    {
>> -        primitives.ipfilter_pp[FILTER_H_P_P_8](fref, lumaStride,
>> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[xFrac]);
>> -    }
>> -    else if (xFrac == 0)
>> -    {
>> -        primitives.ipfilter_pp[FILTER_V_P_P_8](fref, lumaStride,
>> subpelbuf, realStride, realWidth, realHeight, g_lumaFilter[yFrac]);
>>      }
>>      else
>>      {
>> -        int filterSize = NTAPS_LUMA;
>> -        int halfFilterSize = (filterSize >> 1);
>> -        primitives.ipfilter_ps[FILTER_H_P_S_8](fref - (halfFilterSize -
>> 1) * lumaStride, lumaStride, immedVal, realWidth, realWidth, realHeight +
>> filterSize - 1, g_lumaFilter[xFrac]);
>> -        primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>> (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride,
>> realWidth, realHeight, g_lumaFilter[yFrac]);
>> +        if (yFrac == 0)
>> +        {
>> +            primitives.ipfilter_pp[FILTER_H_P_P_8](fref,
>> ref->lumaStride, subpelbuf, realStride, realWidth, realHeight,
>> g_lumaFilter[xFrac]);
>> +        }
>> +        else if (xFrac == 0)
>> +        {
>> +            primitives.ipfilter_pp[FILTER_V_P_P_8](fref,
>> ref->lumaStride, subpelbuf, realStride, realWidth, realHeight,
>> g_lumaFilter[yFrac]);
>> +        }
>> +        else
>> +        {
>> +            int filterSize = NTAPS_LUMA;
>> +            int halfFilterSize = (filterSize >> 1);
>> +            primitives.ipfilter_ps[FILTER_H_P_S_8](fref -
>> (halfFilterSize - 1) * ref->lumaStride, ref->lumaStride, immedVal,
>> realWidth, realWidth, realHeight + filterSize - 1, g_lumaFilter[xFrac]);
>> +            primitives.ipfilter_sp[FILTER_V_S_P_8](immedVal +
>> (halfFilterSize - 1) * realWidth, realWidth, subpelbuf, realStride,
>> realWidth, realHeight, g_lumaFilter[yFrac]);
>> +        }
>>      }
>>  }
>> diff -r 18a5d7c3464d -r ad8608b0a648 source/encoder/motion.h
>> --- a/source/encoder/motion.h   Tue Oct 08 16:56:39 2013 +0530
>> +++ b/source/encoder/motion.h   Tue Oct 08 17:08:05 2013 +0530
>> @@ -54,6 +54,7 @@
>>      /* subpel generation buffers */
>>      pixel *subpelbuf;
>>      short *immedVal;
>> +    int16_t *immedVal2;
>>      int blockwidth;
>>      int blockheight;
>>
>> @@ -96,7 +97,7 @@
>>
>>      int subpelCompare(ReferencePlanes *ref, const MV & qmv, pixelcmp_t);
>>
>> -    void subpelInterpolate(pixel *fref, intptr_t lumaStride, int xFrac,
>> int yFrac, int dir);
>> +    void subpelInterpolate(ReferencePlanes *ref, MV qmv, int dir);
>>
>>  protected:
>>
>> _______________________________________________
>> x265-devel mailing list
>> [email protected]
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
>
> --
> Steve Borho
>
> _______________________________________________
> x265-devel mailing list
> [email protected]
> https://mailman.videolan.org/listinfo/x265-devel
>
>

_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Re: [x265] [PATCH 4 of 4] Enabling weight prediction for half and full pel

Reply via email to