These copies are reasonably hot in perf traces and there is no need to copy the pointers here. Just set a pointer to the array to read from and use that directly.
Benchmarks show this gives a nice improvement in FPS reported for an encoding using --preset=medium: Cortex-A715: +0.2% Cortex-X3: +0.7% Neoverse V2: +0.9% Change-Id: I33ba6854e7fbe6ecb1778b8201030a717f60f34e --- source/common/lowres.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/source/common/lowres.h b/source/common/lowres.h index c9c2e3712..7e6baa844 100644 --- a/source/common/lowres.h +++ b/source/common/lowres.h @@ -71,11 +71,7 @@ struct ReferencePlanes inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride, bool hme) { intptr_t YStride = hme ? lumaStride / 2 : lumaStride; - pixel *plane[4]; - for (int i = 0; i < 4; i++) - { - plane[i] = hme ? lowerResPlane[i] : lowresPlane[i]; - } + pixel **plane = hme ? lowerResPlane : lowresPlane; if ((qmv.x | qmv.y) & 1) { int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1); @@ -98,11 +94,7 @@ struct ReferencePlanes inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp, bool hme) { intptr_t YStride = hme ? lumaStride / 2 : lumaStride; - pixel *plane[4]; - for (int i = 0; i < 4; i++) - { - plane[i] = hme ? lowerResPlane[i] : lowresPlane[i]; - } + pixel **plane = hme ? lowerResPlane : lowresPlane; if ((qmv.x | qmv.y) & 1) { ALIGN_VAR_16(pixel, subpelbuf[8 * 8]); -- 2.34.1
>From 1e6f0252b7d47d153d2fe2f13977f0e70b9bf4a2 Mon Sep 17 00:00:00 2001 From: George Steed <george.st...@arm.com> Date: Thu, 10 Oct 2024 14:26:12 +0100 Subject: [PATCH] Avoid pointer copies in ReferencePlanes::{lowresMC,lowresQPelCost} These copies are reasonably hot in perf traces and there is no need to copy the pointers here. Just set a pointer to the array to read from and use that directly. Benchmarks show this gives a nice improvement in FPS reported for an encoding using --preset=medium: Cortex-A715: +0.2% Cortex-X3: +0.7% Neoverse V2: +0.9% Change-Id: I33ba6854e7fbe6ecb1778b8201030a717f60f34e --- source/common/lowres.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/source/common/lowres.h b/source/common/lowres.h index c9c2e3712..7e6baa844 100644 --- a/source/common/lowres.h +++ b/source/common/lowres.h @@ -71,11 +71,7 @@ struct ReferencePlanes inline pixel *lowresMC(intptr_t blockOffset, const MV& qmv, pixel *buf, intptr_t& outstride, bool hme) { intptr_t YStride = hme ? lumaStride / 2 : lumaStride; - pixel *plane[4]; - for (int i = 0; i < 4; i++) - { - plane[i] = hme ? lowerResPlane[i] : lowresPlane[i]; - } + pixel **plane = hme ? lowerResPlane : lowresPlane; if ((qmv.x | qmv.y) & 1) { int hpelA = (qmv.y & 2) | ((qmv.x & 2) >> 1); @@ -98,11 +94,7 @@ struct ReferencePlanes inline int lowresQPelCost(pixel *fenc, intptr_t blockOffset, const MV& qmv, pixelcmp_t comp, bool hme) { intptr_t YStride = hme ? lumaStride / 2 : lumaStride; - pixel *plane[4]; - for (int i = 0; i < 4; i++) - { - plane[i] = hme ? lowerResPlane[i] : lowresPlane[i]; - } + pixel **plane = hme ? lowerResPlane : lowresPlane; if ((qmv.x | qmv.y) & 1) { ALIGN_VAR_16(pixel, subpelbuf[8 * 8]); -- 2.34.1
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel