# HG changeset patch # User Dnyaneshwar G <dnyanesh...@multicorewareinc.com> # Date 1456466613 -19800 # Fri Feb 26 11:33:33 2016 +0530 # Node ID 5ff8ee940ad7f4d34b106ae4999b996245c87919 # Parent 01782e7f0a8cb93efbe4ff1534602ff9055c8565 asm: separated pelFilterChroma function into horizontal & vertical primitives for asm
diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/deblock.cpp --- a/source/common/deblock.cpp Thu Feb 25 12:17:57 2016 +0530 +++ b/source/common/deblock.cpp Fri Feb 26 11:33:33 2016 +0530 @@ -319,27 +319,6 @@ } } -/* Deblocking of one line/column for the chrominance component - * \param src pointer to picture data - * \param offset offset value for picture data - * \param tc tc value - * \param maskP indicator to disable filtering on partP - * \param maskQ indicator to disable filtering on partQ */ -static inline void pelFilterChroma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) -{ - for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) - { - int16_t m4 = (int16_t)src[0]; - int16_t m3 = (int16_t)src[-offset]; - int16_t m5 = (int16_t)src[offset]; - int16_t m2 = (int16_t)src[-offset * 2]; - - int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3)); - src[-offset] = x265_clip(m3 + (delta & maskP)); - src[0] = x265_clip(m4 - (delta & maskQ)); - } -} - void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[]) { PicYuv* reconPic = cuQ->m_encData->m_reconPic; @@ -517,7 +496,7 @@ int32_t tc = s_tcTable[indexTC] << bitdepthShift; pixel* srcC = srcChroma[chromaIdx]; - pelFilterChroma(srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); + primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ); } } } diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/loopfilter.cpp --- a/source/common/loopfilter.cpp Thu Feb 25 12:17:57 2016 +0530 +++ b/source/common/loopfilter.cpp Fri Feb 26 11:33:33 2016 +0530 @@ -158,6 +158,27 @@ src[offset * 2] = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6); } } + +/* Deblocking of one line/column for the chrominance component +* \param src pointer to picture data +* \param offset offset value for picture data +* \param tc tc value +* \param maskP indicator to disable filtering on partP +* \param maskQ indicator to disable filtering on partQ */ +static void pelFilterChroma_c(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ) +{ + for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep) + { + int16_t m4 = (int16_t)src[0]; + int16_t m3 = (int16_t)src[-offset]; + int16_t m5 = (int16_t)src[offset]; + int16_t m2 = (int16_t)src[-offset * 2]; + + int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3)); + src[-offset] = x265_clip(m3 + (delta & maskP)); + src[0] = x265_clip(m4 - (delta & maskQ)); + } +} } namespace X265_NS { @@ -176,5 +197,7 @@ // C code is same for EDGE_VER and EDGE_HOR only asm code is different p.pelFilterLumaStrong[0] = pelFilterLumaStrong_c; p.pelFilterLumaStrong[1] = pelFilterLumaStrong_c; + p.pelFilterChroma[0] = pelFilterChroma_c; + p.pelFilterChroma[1] = pelFilterChroma_c; } } diff -r 01782e7f0a8c -r 5ff8ee940ad7 source/common/primitives.h --- a/source/common/primitives.h Thu Feb 25 12:17:57 2016 +0530 +++ b/source/common/primitives.h Fri Feb 26 11:33:33 2016 +0530 @@ -197,6 +197,7 @@ typedef uint32_t (*costC1C2Flag_t)(uint16_t *absCoeff, intptr_t numC1Flag, uint8_t *baseCtxMod, intptr_t ctxOffset); typedef void (*pelFilterLumaStrong_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tcP, int32_t tcQ); +typedef void (*pelFilterChroma_t)(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ); /* Function pointers to optimized encoder primitives. Each pointer can reference * either an assembly routine, a SIMD intrinsic primitive, or a C function */ @@ -332,6 +333,7 @@ costC1C2Flag_t costC1C2Flag; pelFilterLumaStrong_t pelFilterLumaStrong[2]; // EDGE_VER = 0, EDGE_HOR = 1 + pelFilterChroma_t pelFilterChroma[2]; // EDGE_VER = 0, EDGE_HOR = 1 /* There is one set of chroma primitives per color space. An encoder will * have just a single color space and thus it will only ever use one entry _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel