I believe that the source code "if (modeHor)..." in fuction "void all_angs_pred_c(pixel *dest, pixel *refPix, pixel *filtPix, int bLuma)" is redundant and it makes some bugs in intra prediction.
Best Regards, Peking University Yangang Cai From: x265-devel-request Date: 2015-12-03 01:29 To: x265-devel Subject: x265-devel Digest, Vol 31, Issue 7 Send x265-devel mailing list submissions to [email protected] To subscribe or unsubscribe via the World Wide Web, visit https://mailman.videolan.org/listinfo/x265-devel or, via email, send a message with subject or body 'help' to [email protected] You can reach the person managing the list at [email protected] When replying, please edit your Subject line so it is more specific than "Re: Contents of x265-devel digest..." Today's Topics: 1. [PATCH 11 of 15] sao: split SAO Left reference pixel buffer into row base (Min Chen) 2. [PATCH 12 of 15] sao: new CU level process function (Min Chen) 3. [PATCH 13 of 15] sao: avoid thread conflict on offsetEo and offsetBo (Min Chen) 4. [PATCH 14 of 15] sao: reduce address operators by split into Luma and Chroma path (Min Chen) ---------------------------------------------------------------------- Message: 1 Date: Wed, 02 Dec 2015 11:28:34 -0600 From: Min Chen <[email protected]> To: [email protected] Subject: [x265] [PATCH 11 of 15] sao: split SAO Left reference pixel buffer into row base Message-ID: <3a423fcb4b4089de2c05.1449077314@chen-PC> Content-Type: text/plain; charset="us-ascii" # HG changeset patch # User Min Chen <[email protected]> # Date 1449076371 21600 # Node ID 3a423fcb4b4089de2c05a9067556f20a6fca0d1b # Parent 82f6a10f44b88400f0f875025b9e8b6caff3acd3 sao: split SAO Left reference pixel buffer into row base --- source/encoder/sao.cpp | 35 +++++++++++++++++++++++++---------- source/encoder/sao.h | 4 ++-- 2 files changed, 27 insertions(+), 12 deletions(-) diff -r 82f6a10f44b8 -r 3a423fcb4b40 source/encoder/sao.cpp --- a/source/encoder/sao.cpp Wed Dec 02 11:12:48 2015 -0600 +++ b/source/encoder/sao.cpp Wed Dec 02 11:12:51 2015 -0600 @@ -87,8 +87,12 @@ m_tmpU[0] = NULL; m_tmpU[1] = NULL; m_tmpU[2] = NULL; - m_tmpL1 = NULL; - m_tmpL2 = NULL; + m_tmpL1[0] = NULL; + m_tmpL1[1] = NULL; + m_tmpL1[2] = NULL; + m_tmpL2[0] = NULL; + m_tmpL2[1] = NULL; + m_tmpL2[2] = NULL; m_depthSaoRate[0][0] = 0; m_depthSaoRate[0][1] = 0; @@ -116,11 +120,12 @@ CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); - CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1); - CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1); for (int i = 0; i < 3; i++) { + CHECKED_MALLOC(m_tmpL1[i], pixel, g_maxCUSize + 1); + CHECKED_MALLOC(m_tmpL2[i], pixel, g_maxCUSize + 1); + // SAO asm code will read 1 pixel before and after, so pad by 2 // NOTE: m_param->sourceWidth+2 enough, to avoid condition check in copySaoAboveRef(), I alloc more up to 63 bytes in here CHECKED_MALLOC(m_tmpU[i], pixel, m_numCuInWidth * g_maxCUSize + 2); @@ -182,11 +187,21 @@ { X265_FREE_ZERO(m_clipTableBase); - X265_FREE_ZERO(m_tmpL1); - X265_FREE_ZERO(m_tmpL2); for (int i = 0; i < 3; i++) { + if (m_tmpL1[i]) + { + X265_FREE(m_tmpL1[i]); + m_tmpL1[i] = NULL; + } + + if (m_tmpL2[i]) + { + X265_FREE(m_tmpL2[i]); + m_tmpL2[i] = NULL; + } + if (m_tmpU[i]) { X265_FREE(m_tmpU[i] - 1); @@ -307,7 +322,7 @@ memset(_upBuff1 + MAX_CU_SIZE, 0, 2 * sizeof(int8_t)); /* avoid valgrind uninit warnings */ - tmpL = m_tmpL1; + tmpL = m_tmpL1[plane]; tmpU = &(m_tmpU[plane][lpelx]); switch (typeIdx) @@ -607,7 +622,7 @@ for (int i = 0; i < ctuHeight + 1; i++) { - m_tmpL1[i] = rec[0]; + m_tmpL1[plane][i] = rec[0]; rec += stride; } @@ -623,7 +638,7 @@ rec = reconPic->getPlaneAddr(plane, addr); for (int i = 0; i < ctuHeight + 1; i++) { - m_tmpL2[i] = rec[ctuWidth - 1]; + m_tmpL2[plane][i] = rec[ctuWidth - 1]; rec += stride; } } @@ -652,7 +667,7 @@ } processSaoCu(addr, typeIdx, plane); } - std::swap(m_tmpL1, m_tmpL2); + std::swap(m_tmpL1[plane], m_tmpL2[plane]); } } diff -r 82f6a10f44b8 -r 3a423fcb4b40 source/encoder/sao.h --- a/source/encoder/sao.h Wed Dec 02 11:12:48 2015 -0600 +++ b/source/encoder/sao.h Wed Dec 02 11:12:51 2015 -0600 @@ -93,8 +93,8 @@ pixel* m_clipTableBase; pixel* m_tmpU[3]; - pixel* m_tmpL1; - pixel* m_tmpL2; + pixel* m_tmpL1[3]; + pixel* m_tmpL2[3]; public: ------------------------------ Message: 2 Date: Wed, 02 Dec 2015 11:28:35 -0600 From: Min Chen <[email protected]> To: [email protected] Subject: [x265] [PATCH 12 of 15] sao: new CU level process function Message-ID: <b1c261378db29a1988d8.1449077315@chen-PC> Content-Type: text/plain; charset="us-ascii" # HG changeset patch # User Min Chen <[email protected]> # Date 1449076374 21600 # Node ID b1c261378db29a1988d8e27c5eabe1a76821f83d # Parent 3a423fcb4b4089de2c05a9067556f20a6fca0d1b sao: new CU level process function --- source/encoder/framefilter.cpp | 13 +++++-- source/encoder/sao.cpp | 68 ++++++++++++++++++++++++++++++++++++++++ source/encoder/sao.h | 1 + 3 files changed, 78 insertions(+), 4 deletions(-) diff -r 3a423fcb4b40 -r b1c261378db2 source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cpp Wed Dec 02 11:12:51 2015 -0600 +++ b/source/encoder/framefilter.cpp Wed Dec 02 11:12:54 2015 -0600 @@ -541,19 +541,24 @@ { FrameData& encData = *m_frame->m_encData; SAOParam* saoParam = encData.m_saoParam; + uint32_t numCols = encData.m_slice->m_sps->numCuInWidth; if (saoParam->bSaoFlag[0]) - m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[0], row, 0); + { + for(uint32_t col = 0; col < numCols; col++) + m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0); + } if (saoParam->bSaoFlag[1]) { - m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[1], row, 1); - m_parallelFilter[row].m_sao.processSaoUnitRow(saoParam->ctuParam[2], row, 2); + for(uint32_t col = 0; col < numCols; col++) + m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1); + for(uint32_t col = 0; col < numCols; col++) + m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2); } if (encData.m_slice->m_pps->bTransquantBypassEnabled) { - uint32_t numCols = encData.m_slice->m_sps->numCuInWidth; uint32_t lineStartCUAddr = row * numCols; const CUGeom* cuGeoms = m_frameEncoder->m_cuGeoms; diff -r 3a423fcb4b40 -r b1c261378db2 source/encoder/sao.cpp --- a/source/encoder/sao.cpp Wed Dec 02 11:12:51 2015 -0600 +++ b/source/encoder/sao.cpp Wed Dec 02 11:12:54 2015 -0600 @@ -671,6 +671,74 @@ } } +/* Process SAO unit */ +void SAO::processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane) +{ + PicYuv* reconPic = m_frame->m_reconPic; + intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride; + uint32_t picWidth = m_param->sourceWidth; + int ctuWidth = g_maxCUSize; + int ctuHeight = g_maxCUSize; + + if (plane) + { + picWidth >>= m_hChromaShift; + ctuWidth >>= m_hChromaShift; + ctuHeight >>= m_vChromaShift; + } + + int addr = idxY * m_numCuInWidth + idxX; + pixel* rec = reconPic->getPlaneAddr(plane, addr); + + if (idxX == 0) + { + for (int i = 0; i < ctuHeight + 1; i++) + { + m_tmpL1[plane][i] = rec[0]; + rec += stride; + } + } + + bool mergeLeftFlag = (ctuParam[addr].mergeMode == SAO_MERGE_LEFT); + int typeIdx = ctuParam[addr].typeIdx; + + if (idxX != (m_numCuInWidth - 1)) + { + rec = reconPic->getPlaneAddr(plane, addr); + for (int i = 0; i < ctuHeight + 1; i++) + { + m_tmpL2[plane][i] = rec[ctuWidth - 1]; + rec += stride; + } + } + + if (typeIdx >= 0) + { + if (!mergeLeftFlag) + { + if (typeIdx == SAO_BO) + { + memset(m_offsetBo, 0, sizeof(m_offsetBo)); + + for (int i = 0; i < SAO_NUM_OFFSET; i++) + m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); + } + else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) + { + int offset[NUM_EDGETYPE]; + offset[0] = 0; + for (int i = 0; i < SAO_NUM_OFFSET; i++) + offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC; + + for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) + m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + } + } + processSaoCu(addr, typeIdx, plane); + } + std::swap(m_tmpL1[plane], m_tmpL2[plane]); +} + void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc) { saoUnitDst->mergeMode = saoUnitSrc->mergeMode; diff -r 3a423fcb4b40 -r b1c261378db2 source/encoder/sao.h --- a/source/encoder/sao.h Wed Dec 02 11:12:51 2015 -0600 +++ b/source/encoder/sao.h Wed Dec 02 11:12:54 2015 -0600 @@ -132,6 +132,7 @@ // CTU-based SAO process without slice granularity void processSaoCu(int addr, int typeIdx, int plane); void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane); + void processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane); void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc); ------------------------------ Message: 3 Date: Wed, 02 Dec 2015 11:28:36 -0600 From: Min Chen <[email protected]> To: [email protected] Subject: [x265] [PATCH 13 of 15] sao: avoid thread conflict on offsetEo and offsetBo Message-ID: <a3a9660c91b8eeb8f708.1449077316@chen-PC> Content-Type: text/plain; charset="us-ascii" # HG changeset patch # User Min Chen <[email protected]> # Date 1449076377 21600 # Node ID a3a9660c91b8eeb8f70869fc4022f939c01023f0 # Parent b1c261378db29a1988d8e27c5eabe1a76821f83d sao: avoid thread conflict on offsetEo and offsetBo --- source/encoder/framefilter.cpp | 12 +++++------- source/encoder/sao.cpp | 38 ++++++++++++++++++++------------------ source/encoder/sao.h | 4 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cpp Wed Dec 02 11:12:54 2015 -0600 +++ b/source/encoder/framefilter.cpp Wed Dec 02 11:12:57 2015 -0600 @@ -543,18 +543,16 @@ SAOParam* saoParam = encData.m_saoParam; uint32_t numCols = encData.m_slice->m_sps->numCuInWidth; - if (saoParam->bSaoFlag[0]) + for(uint32_t col = 0; col < numCols; col++) { - for(uint32_t col = 0; col < numCols; col++) + if (saoParam->bSaoFlag[0]) m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0); - } - if (saoParam->bSaoFlag[1]) - { - for(uint32_t col = 0; col < numCols; col++) + if (saoParam->bSaoFlag[1]) + { m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1); - for(uint32_t col = 0; col < numCols; col++) m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2); + } } if (encData.m_slice->m_pps->bTransquantBypassEnabled) diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/sao.cpp --- a/source/encoder/sao.cpp Wed Dec 02 11:12:54 2015 -0600 +++ b/source/encoder/sao.cpp Wed Dec 02 11:12:57 2015 -0600 @@ -325,6 +325,8 @@ tmpL = m_tmpL1[plane]; tmpU = &(m_tmpU[plane][lpelx]); + int8_t* offsetEo = m_offsetEo[plane]; + switch (typeIdx) { case SAO_EO_0: // dir: - @@ -343,7 +345,7 @@ int edgeType = signRight + signLeft + 2; signLeft = -signRight; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; } rec += stride; @@ -368,7 +370,7 @@ row1LastPxl = rec[stride + ctuWidth - 1]; } - primitives.saoCuOrgE0(rec, m_offsetEo, ctuWidth, signLeft1, stride); + primitives.saoCuOrgE0(rec, offsetEo, ctuWidth, signLeft1, stride); if (!lpelx) { @@ -407,7 +409,7 @@ int edgeType = signDown + upBuff1[x] + 2; upBuff1[x] = -signDown; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; } rec += stride; @@ -420,11 +422,11 @@ int diff = (endY - startY) % 2; for (y = startY; y < endY - diff; y += 2) { - primitives.saoCuOrgE1_2Rows(rec, upBuff1, m_offsetEo, stride, ctuWidth); + primitives.saoCuOrgE1_2Rows(rec, upBuff1, offsetEo, stride, ctuWidth); rec += 2 * stride; } if (diff & 1) - primitives.saoCuOrgE1(rec, upBuff1, m_offsetEo, stride, ctuWidth); + primitives.saoCuOrgE1(rec, upBuff1, offsetEo, stride, ctuWidth); } break; @@ -474,7 +476,7 @@ int8_t signDown = signOf(rec[x] - rec[x + stride + 1]); int edgeType = signDown + upBuff1[x] + 2; upBufft[x + 1] = -signDown; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; } std::swap(upBuff1, upBufft); @@ -488,7 +490,7 @@ { int8_t iSignDown2 = signOf(rec[stride + startX] - tmpL[y]); - primitives.saoCuOrgE2[endX > 16](rec + startX, upBufft + startX, upBuff1 + startX, m_offsetEo, endX - startX, stride); + primitives.saoCuOrgE2[endX > 16](rec + startX, upBufft + startX, upBuff1 + startX, offsetEo, endX - startX, stride); upBufft[startX] = iSignDown2; @@ -520,14 +522,14 @@ int8_t signDown = signOf(rec[x] - tmpL[y + 1]); int edgeType = signDown + upBuff1[x] + 2; upBuff1[x - 1] = -signDown; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; for (x = startX + 1; x < endX; x++) { signDown = signOf(rec[x] - rec[x + stride - 1]); edgeType = signDown + upBuff1[x] + 2; upBuff1[x - 1] = -signDown; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; } upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]); @@ -557,9 +559,9 @@ int8_t signDown = signOf(rec[x] - tmpL[y + 1]); int edgeType = signDown + upBuff1[x] + 2; upBuff1[x - 1] = -signDown; - rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; + rec[x] = m_clipTable[rec[x] + offsetEo[edgeType]]; - primitives.saoCuOrgE3[endX > 16](rec, upBuff1, m_offsetEo, stride - 1, startX, endX); + primitives.saoCuOrgE3[endX > 16](rec, upBuff1, offsetEo, stride - 1, startX, endX); upBuff1[endX - 1] = signOf(rec[endX - 1 + stride] - rec[endX]); @@ -571,7 +573,7 @@ } case SAO_BO: { - const int8_t* offsetBo = m_offsetBo; + const int8_t* offsetBo = m_offsetBo[plane]; if (ctuWidth & 15) { @@ -649,10 +651,10 @@ { if (typeIdx == SAO_BO) { - memset(m_offsetBo, 0, sizeof(m_offsetBo)); + memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0])); for (int i = 0; i < SAO_NUM_OFFSET; i++) - m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); + m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); } else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) { @@ -662,7 +664,7 @@ offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC; for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) - m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; } } processSaoCu(addr, typeIdx, plane); @@ -718,10 +720,10 @@ { if (typeIdx == SAO_BO) { - memset(m_offsetBo, 0, sizeof(m_offsetBo)); + memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0])); for (int i = 0; i < SAO_NUM_OFFSET; i++) - m_offsetBo[((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); + m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); } else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) { @@ -731,7 +733,7 @@ offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC; for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) - m_offsetEo[edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; } } processSaoCu(addr, typeIdx, plane); diff -r b1c261378db2 -r a3a9660c91b8 source/encoder/sao.h --- a/source/encoder/sao.h Wed Dec 02 11:12:54 2015 -0600 +++ b/source/encoder/sao.h Wed Dec 02 11:12:57 2015 -0600 @@ -80,8 +80,8 @@ PerPlane* m_offsetOrgPreDblk; double m_depthSaoRate[2][4]; - int8_t m_offsetBo[SAO_NUM_BO_CLASSES]; - int8_t m_offsetEo[NUM_EDGETYPE]; + int8_t m_offsetBo[NUM_PLANE][SAO_NUM_BO_CLASSES]; + int8_t m_offsetEo[NUM_PLANE][NUM_EDGETYPE]; int m_chromaFormat; int m_numCuInWidth; ------------------------------ Message: 4 Date: Wed, 02 Dec 2015 11:28:37 -0600 From: Min Chen <[email protected]> To: [email protected] Subject: [x265] [PATCH 14 of 15] sao: reduce address operators by split into Luma and Chroma path Message-ID: <a6d88a08af3d48cb804a.1449077317@chen-PC> Content-Type: text/plain; charset="us-ascii" # HG changeset patch # User Min Chen <[email protected]> # Date 1449076380 21600 # Node ID a6d88a08af3d48cb804aa61819bd45ee685d1f59 # Parent a3a9660c91b8eeb8f70869fc4022f939c01023f0 sao: reduce address operators by split into Luma and Chroma path --- source/encoder/framefilter.cpp | 7 +-- source/encoder/sao.cpp | 133 ++++++++++++++++++++++++++++++++++------ source/encoder/sao.h | 3 +- 3 files changed, 118 insertions(+), 25 deletions(-) diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/framefilter.cpp --- a/source/encoder/framefilter.cpp Wed Dec 02 11:12:57 2015 -0600 +++ b/source/encoder/framefilter.cpp Wed Dec 02 11:13:00 2015 -0600 @@ -546,13 +546,10 @@ for(uint32_t col = 0; col < numCols; col++) { if (saoParam->bSaoFlag[0]) - m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[0], row, col, 0); + m_parallelFilter[row].m_sao.processSaoUnitCuLuma(saoParam->ctuParam[0], row, col); if (saoParam->bSaoFlag[1]) - { - m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[1], row, col, 1); - m_parallelFilter[row].m_sao.processSaoUnitCu(saoParam->ctuParam[2], row, col, 2); - } + m_parallelFilter[row].m_sao.processSaoUnitCuChroma(saoParam->ctuParam, row, col); } if (encData.m_slice->m_pps->bTransquantBypassEnabled) diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.cpp --- a/source/encoder/sao.cpp Wed Dec 02 11:12:57 2015 -0600 +++ b/source/encoder/sao.cpp Wed Dec 02 11:13:00 2015 -0600 @@ -674,29 +674,21 @@ } /* Process SAO unit */ -void SAO::processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane) +void SAO::processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX) { PicYuv* reconPic = m_frame->m_reconPic; - intptr_t stride = plane ? reconPic->m_strideC : reconPic->m_stride; - uint32_t picWidth = m_param->sourceWidth; + intptr_t stride = reconPic->m_stride; int ctuWidth = g_maxCUSize; int ctuHeight = g_maxCUSize; - if (plane) - { - picWidth >>= m_hChromaShift; - ctuWidth >>= m_hChromaShift; - ctuHeight >>= m_vChromaShift; - } - int addr = idxY * m_numCuInWidth + idxX; - pixel* rec = reconPic->getPlaneAddr(plane, addr); + pixel* rec = reconPic->getLumaAddr(addr); if (idxX == 0) { for (int i = 0; i < ctuHeight + 1; i++) { - m_tmpL1[plane][i] = rec[0]; + m_tmpL1[0][i] = rec[0]; rec += stride; } } @@ -706,10 +698,10 @@ if (idxX != (m_numCuInWidth - 1)) { - rec = reconPic->getPlaneAddr(plane, addr); + rec = reconPic->getLumaAddr(addr); for (int i = 0; i < ctuHeight + 1; i++) { - m_tmpL2[plane][i] = rec[ctuWidth - 1]; + m_tmpL2[0][i] = rec[ctuWidth - 1]; rec += stride; } } @@ -720,10 +712,10 @@ { if (typeIdx == SAO_BO) { - memset(m_offsetBo[plane], 0, sizeof(m_offsetBo[0])); + memset(m_offsetBo[0], 0, sizeof(m_offsetBo[0])); for (int i = 0; i < SAO_NUM_OFFSET; i++) - m_offsetBo[plane][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); + m_offsetBo[0][((ctuParam[addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[addr].offset[i] << SAO_BIT_INC); } else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) { @@ -733,12 +725,115 @@ offset[i + 1] = ctuParam[addr].offset[i] << SAO_BIT_INC; for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) - m_offsetEo[plane][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + m_offsetEo[0][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; } } - processSaoCu(addr, typeIdx, plane); + processSaoCu(addr, typeIdx, 0); } - std::swap(m_tmpL1[plane], m_tmpL2[plane]); + std::swap(m_tmpL1[0], m_tmpL2[0]); +} + +/* Process SAO unit (Chroma only) */ +void SAO::processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX) +{ + PicYuv* reconPic = m_frame->m_reconPic; + intptr_t stride = reconPic->m_strideC; + int ctuWidth = g_maxCUSize; + int ctuHeight = g_maxCUSize; + + { + ctuWidth >>= m_hChromaShift; + ctuHeight >>= m_vChromaShift; + } + + int addr = idxY * m_numCuInWidth + idxX; + pixel* recCb = reconPic->getCbAddr(addr); + pixel* recCr = reconPic->getCrAddr(addr); + + if (idxX == 0) + { + for (int i = 0; i < ctuHeight + 1; i++) + { + m_tmpL1[1][i] = recCb[0]; + m_tmpL1[2][i] = recCr[0]; + recCb += stride; + recCr += stride; + } + } + + bool mergeLeftFlagCb = (ctuParam[1][addr].mergeMode == SAO_MERGE_LEFT); + int typeIdxCb = ctuParam[1][addr].typeIdx; + + bool mergeLeftFlagCr = (ctuParam[2][addr].mergeMode == SAO_MERGE_LEFT); + int typeIdxCr = ctuParam[2][addr].typeIdx; + + if (idxX != (m_numCuInWidth - 1)) + { + recCb = reconPic->getCbAddr(addr); + recCr = reconPic->getCrAddr(addr); + for (int i = 0; i < ctuHeight + 1; i++) + { + m_tmpL2[1][i] = recCb[ctuWidth - 1]; + m_tmpL2[2][i] = recCr[ctuWidth - 1]; + recCb += stride; + recCr += stride; + } + } + + // Process U + if (typeIdxCb >= 0) + { + if (!mergeLeftFlagCb) + { + if (typeIdxCb == SAO_BO) + { + memset(m_offsetBo[1], 0, sizeof(m_offsetBo[0])); + + for (int i = 0; i < SAO_NUM_OFFSET; i++) + m_offsetBo[1][((ctuParam[1][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[1][addr].offset[i] << SAO_BIT_INC); + } + else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) + { + int offset[NUM_EDGETYPE]; + offset[0] = 0; + for (int i = 0; i < SAO_NUM_OFFSET; i++) + offset[i + 1] = ctuParam[1][addr].offset[i] << SAO_BIT_INC; + + for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) + m_offsetEo[1][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + } + } + processSaoCu(addr, typeIdxCb, 1); + } + + // Process V + if (typeIdxCr >= 0) + { + if (!mergeLeftFlagCr) + { + if (typeIdxCr == SAO_BO) + { + memset(m_offsetBo[2], 0, sizeof(m_offsetBo[0])); + + for (int i = 0; i < SAO_NUM_OFFSET; i++) + m_offsetBo[2][((ctuParam[2][addr].bandPos + i) & (SAO_NUM_BO_CLASSES - 1))] = (int8_t)(ctuParam[2][addr].offset[i] << SAO_BIT_INC); + } + else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) + { + int offset[NUM_EDGETYPE]; + offset[0] = 0; + for (int i = 0; i < SAO_NUM_OFFSET; i++) + offset[i + 1] = ctuParam[2][addr].offset[i] << SAO_BIT_INC; + + for (int edgeType = 0; edgeType < NUM_EDGETYPE; edgeType++) + m_offsetEo[2][edgeType] = (int8_t)offset[s_eoTable[edgeType]]; + } + } + processSaoCu(addr, typeIdxCb, 2); + } + + std::swap(m_tmpL1[1], m_tmpL2[1]); + std::swap(m_tmpL1[2], m_tmpL2[2]); } void SAO::copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc) diff -r a3a9660c91b8 -r a6d88a08af3d source/encoder/sao.h --- a/source/encoder/sao.h Wed Dec 02 11:12:57 2015 -0600 +++ b/source/encoder/sao.h Wed Dec 02 11:13:00 2015 -0600 @@ -132,7 +132,8 @@ // CTU-based SAO process without slice granularity void processSaoCu(int addr, int typeIdx, int plane); void processSaoUnitRow(SaoCtuParam* ctuParam, int idxY, int plane); - void processSaoUnitCu(SaoCtuParam* ctuParam, int idxY, int idxX, int plane); + void processSaoUnitCuLuma(SaoCtuParam* ctuParam, int idxY, int idxX); + void processSaoUnitCuChroma(SaoCtuParam* ctuParam[3], int idxY, int idxX); void copySaoUnit(SaoCtuParam* saoUnitDst, const SaoCtuParam* saoUnitSrc); ------------------------------ Subject: Digest Footer _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel ------------------------------ End of x265-devel Digest, Vol 31, Issue 7 *****************************************
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
