On 12/12, [email protected] wrote:
> # HG changeset patch
> # User Praveen Tiwari
> # Date 1418376812 -19800
> # Node ID ca805794c7519ef698776d0f717fd66b0d3bc626
> # Parent  b1c2ef980dfe59c486454a8838c2c1bb74bf4d32
> psyCost_pp: C code optimization, suitable for ASM conversion
> 
> psy_sum_std_4x4, is combined version of satd_4x4 and sad<4, 4>, eliminating 
> unnecessary load operations, extra arguments(zeroBuf and 0).
> It also replaces SAD operation with low cost sum operation as the sencod 
> pixel buffer is zero.

Do we actually measure psy energy for 4x4 pixel blocks? CU's are generally 8x8 
to 64x64.

The short-short version which operates on residual blocks might be used
for 4x4 TUs, but I'm not certain about that.

> diff -r b1c2ef980dfe -r ca805794c751 source/common/pixel.cpp
> --- a/source/common/pixel.cpp Thu Dec 11 16:52:06 2014 -0600
> +++ b/source/common/pixel.cpp Fri Dec 12 15:03:32 2014 +0530
> @@ -801,6 +801,38 @@
>  #pragma warning(disable: 4127) // conditional expression is constant
>  #endif
>  
> +int psy_sum_std_4x4(const pixel* pix1, intptr_t stride_pix1)
> +{
> +    sum2_t tmp[4][2];
> +    sum2_t a0, a1, a2, a3, b0, b1;
> +    sum2_t sum = 0;
> +    int sum0 = 0;
> +    uint32_t sum1 = 0;
> +
> +    for (int i = 0; i < 4; i++, pix1 += stride_pix1)
> +    {
> +        a0 = pix1[0];
> +        a1 = pix1[1];
> +        a2 = pix1[2];
> +        a3 = pix1[3];
> +        sum1 = sum1 + (a0 + a1 + a2 + a3);
> +        b0 = (a0 + a1) + ((a0 - a1) << BITS_PER_SUM);
> +        b1 = (a2 + a3) + ((a2 - a3) << BITS_PER_SUM);
> +        tmp[i][0] = b0 + b1;
> +        tmp[i][1] = b0 - b1;
> +    }
> +
> +    for (int i = 0; i < 2; i++)
> +    {
> +        HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], 
> tmp[3][i]);
> +        a0 = abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3);
> +        sum += ((sum_t)a0) + (a0 >> BITS_PER_SUM);
> +    }
> +
> +    sum0 = (int)(sum >> 1);
> +    return (int)(sum0 - (sum1 >> 2));
> +}
> +
>  template<int size>
>  int psyCost_pp(const pixel* source, intptr_t sstride, const pixel* recon, 
> intptr_t rstride)
>  {
> @@ -828,8 +860,8 @@
>      else
>      {
>          /* 4x4 is too small for sa8d */
> -        int sourceEnergy = satd_4x4(source, sstride, zeroBuf, 0) - (sad<4, 
> 4>(source, sstride, zeroBuf, 0) >> 2);
> -        int reconEnergy = satd_4x4(recon, rstride, zeroBuf, 0) - (sad<4, 
> 4>(recon, rstride, zeroBuf, 0) >> 2);
> +        int sourceEnergy = psy_sum_std_4x4(source, sstride);
> +        int reconEnergy = psy_sum_std_4x4(recon, rstride);
>          return abs(sourceEnergy - reconEnergy);
>      }
>  }
> _______________________________________________
> x265-devel mailing list
> [email protected]
> https://mailman.videolan.org/listinfo/x265-devel

-- 
Steve Borho
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to