Thanks, queued for testing. On Fri, Sep 12, 2014 at 7:34 AM, Satoshi Nakagawa <nakagawa...@oki.com> wrote:
> # HG changeset patch > # User Satoshi Nakagawa <nakagawa...@oki.com> > # Date 1410487314 -32400 > # Fri Sep 12 11:01:54 2014 +0900 > # Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf > # Parent 7e29b10982d2eb7fd79f581d99996f04184522ba > sao: some cleanups > > diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h > --- a/source/common/common.h Thu Sep 11 19:24:28 2014 +0530 > +++ b/source/common/common.h Fri Sep 12 11:01:54 2014 +0900 > @@ -200,6 +200,8 @@ > > namespace x265 { > > +enum { SAO_NUM_OFFSET = 4 }; > + > // NOTE: MUST be alignment to 16 or 32 bytes for asm code > struct NoiseReduction > { > @@ -215,9 +217,8 @@ > enum { NUM_DOWN_PART = 4 }; > > int bestType; > - int length; > int subTypeIdx; // indicates EO class or BO band position > - int offset[4]; > + int offset[SAO_NUM_OFFSET]; > int startCUX; > int startCUY; > int endCUX; > @@ -245,10 +246,9 @@ > bool mergeLeftFlag; > int typeIdx; > int subTypeIdx; // indicates EO class or BO band position > - int offset[4]; > + int offset[SAO_NUM_OFFSET]; > int partIdx; > int partIdxTmp; > - int length; > > void reset() > { > diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm > --- a/source/common/x86/loopfilter.asm Thu Sep 11 19:24:28 2014 +0530 > +++ b/source/common/x86/loopfilter.asm Fri Sep 12 11:01:54 2014 +0900 > @@ -44,7 +44,7 @@ > pslldq m0, 15 ; m0 = [iSignLeft x .. x] > pcmpeqb m4, m4 ; m4 = [pb -1] > pxor m5, m5 ; m5 = 0 > - movu m6, [r1] ; m6 = m_iOffsetEo > + movh m6, [r1] ; m6 = m_offsetEo > > .loop: > movu m7, [r0] ; m1 = pRec[x] > diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp > --- a/source/encoder/entropy.cpp Thu Sep 11 19:24:28 2014 +0530 > +++ b/source/encoder/entropy.cpp Fri Sep 12 11:01:54 2014 +0900 > @@ -879,19 +879,19 @@ > > if (symbol) > { > - if (saoLcuParam->typeIdx < 4 && compIdx != 2) > + if (saoLcuParam->typeIdx < SAO_BO && compIdx != 2) > saoLcuParam->subTypeIdx = saoLcuParam->typeIdx; > > int offsetTh = 1 << X265_MIN(X265_DEPTH - 5, 5); > if (saoLcuParam->typeIdx == SAO_BO) > { > - for (i = 0; i < saoLcuParam->length; i++) > + for (i = 0; i < SAO_BO_LEN; i++) > { > uint32_t absOffset = ((saoLcuParam->offset[i] < 0) ? > -saoLcuParam->offset[i] : saoLcuParam->offset[i]); > codeSaoMaxUvlc(absOffset, offsetTh - 1); > } > > - for (i = 0; i < saoLcuParam->length; i++) > + for (i = 0; i < SAO_BO_LEN; i++) > { > if (saoLcuParam->offset[i] != 0) > { > @@ -903,7 +903,7 @@ > symbol = (uint32_t)(saoLcuParam->subTypeIdx); > codeSaoUflc(5, symbol); > } > - else if (saoLcuParam->typeIdx < 4) > + else // if (saoLcuParam->typeIdx < SAO_BO) > { > codeSaoMaxUvlc(saoLcuParam->offset[0], offsetTh - 1); > codeSaoMaxUvlc(saoLcuParam->offset[1], offsetTh - 1); > diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp > --- a/source/encoder/sao.cpp Thu Sep 11 19:24:28 2014 +0530 > +++ b/source/encoder/sao.cpp Fri Sep 12 11:01:54 2014 +0900 > @@ -79,26 +79,13 @@ > 341, // level 4 > }; > > -const uint32_t SAO::s_eoTable[9] = > +const uint32_t SAO::s_eoTable[NUM_EDGETYPE] = > { > 1, // 0 > 2, // 1 > 0, // 2 > 3, // 3 > - 4, // 4 > - 0, // 5 > - 0, // 6 > - 0, // 7 > - 0 > -}; > - > -const int SAO::s_numClass[MAX_NUM_SAO_TYPE] = > -{ > - SAO_EO_LEN, > - SAO_EO_LEN, > - SAO_EO_LEN, > - SAO_EO_LEN, > - SAO_BO_LEN > + 4 // 4 > }; > > SAO::SAO() > @@ -122,8 +109,6 @@ > m_clipTable = NULL; > m_clipTableBase = NULL; > m_offsetBo = NULL; > - m_chromaOffsetBo = NULL; > - m_tableBo = NULL; > m_tmpU1[0] = NULL; > m_tmpU1[1] = NULL; > m_tmpU1[2] = NULL; > @@ -162,18 +147,12 @@ > * m_numTotalParts must allow for sufficient storage in any allocated > arrays */ > m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]); > > - int pixelRange = 1 << X265_DEPTH; > - int boRangeShift = X265_DEPTH - SAO_BO_BITS; > - pixel maxY = (1 << X265_DEPTH) - 1; > - pixel minY = 0; > - pixel rangeExt = maxY >> 1; > + const pixel maxY = (1 << X265_DEPTH) - 1; > + const pixel rangeExt = maxY >> 1; > int numLcu = m_numCuInWidth * m_numCuInHeight; > > - CHECKED_MALLOC(m_tableBo, pixel, pixelRange); > - > - CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); > - CHECKED_MALLOC(m_offsetBo, int, maxY + 2 * rangeExt); > - CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt); > + CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt); > + CHECKED_MALLOC(m_offsetBo, pixel, maxY + 2 * rangeExt); > > CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1); > CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1); > @@ -199,19 +178,16 @@ > CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu); > CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu); > > - for (int k2 = 0; k2 < pixelRange; k2++) > - m_tableBo[k2] = (pixel)(1 + (k2 >> boRangeShift)); > + m_clipTable = &(m_clipTableBase[rangeExt]); > > - for (int i = 0; i < (minY + rangeExt); i++) > - m_clipTableBase[i] = minY; > + for (int i = 0; i < rangeExt; i++) > + m_clipTableBase[i] = 0; > > - for (int i = minY + rangeExt; i < (maxY + rangeExt); i++) > - m_clipTableBase[i] = (pixel)(i - rangeExt); > + for (int i = 0; i < maxY; i++) > + m_clipTable[i] = (pixel)i; > > - for (int i = maxY + rangeExt; i < (maxY + 2 * rangeExt); i++) > - m_clipTableBase[i] = maxY; > - > - m_clipTable = &(m_clipTableBase[rangeExt]); > + for (int i = maxY; i < maxY + rangeExt; i++) > + m_clipTable[i] = maxY; > > return true; > > @@ -223,8 +199,6 @@ > { > X265_FREE(m_clipTableBase); > X265_FREE(m_offsetBo); > - X265_FREE(m_tableBo); > - X265_FREE(m_chromaOffsetBo); > > X265_FREE(m_tmpL1); > X265_FREE(m_tmpL2); > @@ -271,12 +245,9 @@ > /* recursively initialize SAO parameters (only once) */ > void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow, > int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int > endCUY, int plane) const > { > - int j; > int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol); > > - SAOQTPart* saoPart; > - > - saoPart = &(saoParam->saoPart[plane][partIdx]); > + SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]); > > saoPart->partIdx = partIdx; > saoPart->partLevel = partLevel; > @@ -290,11 +261,10 @@ > > saoPart->upPartIdx = parentPartIdx; > saoPart->bestType = -1; > - saoPart->length = 0; > > saoPart->subTypeIdx = 0; > > - for (j = 0; j < MAX_NUM_SAO_OFFSETS; j++) > + for (int j = 0; j < SAO_NUM_OFFSET; j++) > saoPart->offset[j] = 0; > > if (saoPart->partLevel < m_maxSplitLevel) > @@ -371,14 +341,13 @@ > for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++) > { > saoParam->saoPart[c][i].bestType = -1; > - saoParam->saoPart[c][i].length = 0; > saoParam->saoPart[c][i].bSplit = false; > saoParam->saoPart[c][i].bProcessed = false; > saoParam->saoPart[c][i].minCost = MAX_DOUBLE; > saoParam->saoPart[c][i].minDist = MAX_INT; > saoParam->saoPart[c][i].minRate = MAX_INT; > saoParam->saoPart[c][i].subTypeIdx = 0; > - for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++) > + for (int j = 0; j < SAO_NUM_OFFSET; j++) > { > saoParam->saoPart[c][i].offset[j] = 0; > saoParam->saoPart[c][i].offset[j] = 0; > @@ -454,18 +423,12 @@ > int lcuHeight; > int rpelx; > int bpely; > - int edgeType; > - int signDown; > - int signDown1; > - int signDown2; > int picWidthTmp; > int picHeightTmp; > int startX; > int startY; > int endX; > int endY; > - int shift; > - int cuHeightTmp; > pixel* tmpL; > pixel* tmpU; > uint32_t lpelx = tmpCu->getCUPelX(); > @@ -505,22 +468,18 @@ > > // if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1) > { > - cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> > m_vChromaShift); > - shift = isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> > m_hChromaShift) - 1); > + int cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize >> > m_vChromaShift); > + pixel* recR = &rec[isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >> > m_hChromaShift) - 1)]; > for (int i = 0; i < cuHeightTmp + 1; i++) > { > - m_tmpL2[i] = rec[shift]; > - rec += stride; > + m_tmpL2[i] = *recR; > + recR += stride; > } > > - rec -= (stride * (cuHeightTmp + 1)); > - > tmpL = m_tmpL1; > tmpU = &(m_tmpU1[plane][lpelx]); > } > > - int32_t *offsetBo = isLuma ? m_offsetBo : m_chromaOffsetBo; > - > switch (saoType) > { > case SAO_EO_0: // dir: - > @@ -536,10 +495,10 @@ > for (x = startX; x < endX; x++) > { > int signRight = signOf(rec[x] - rec[x + 1]); > - edgeType = signRight + signLeft + 2; > + int edgeType = signRight + signLeft + 2; > signLeft = -signRight; > > - rec[x] = (pixel)Clip3(0, (1 << X265_DEPTH) - 1, > rec[x] + m_offsetEo[edgeType]); > + rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; > } > > rec += stride; > @@ -584,8 +543,8 @@ > { > for (x = 0; x < lcuWidth; x++) > { > - signDown = signOf(rec[x] - rec[x + stride]); > - edgeType = signDown + upBuff1[x] + 2; > + int signDown = signOf(rec[x] - rec[x + stride]); > + int edgeType = signDown + upBuff1[x] + 2; > upBuff1[x] = -signDown; > > rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; > @@ -612,11 +571,11 @@ > > for (y = startY; y < endY; y++) > { > - signDown2 = signOf(rec[stride + startX] - tmpL[y]); > + int signDown2 = signOf(rec[stride + startX] - tmpL[y]); > for (x = startX; x < endX; x++) > { > - signDown1 = signOf(rec[x] - rec[x + stride + 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(rec[x] - rec[x + stride + 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBufft[x + 1] = -signDown1; > rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; > } > @@ -647,8 +606,8 @@ > for (y = startY; y < endY; y++) > { > x = startX; > - signDown1 = signOf(rec[x] - tmpL[y + 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(rec[x] - tmpL[y + 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBuff1[x - 1] = -signDown1; > rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]]; > for (x = startX + 1; x < endX; x++) > @@ -668,10 +627,12 @@ > } > case SAO_BO: > { > + const pixel* offsetBo = m_offsetBo; > + > for (y = 0; y < lcuHeight; y++) > { > for (x = 0; x < lcuWidth; x++) > - rec[x] = (pixel)offsetBo[rec[x]]; > + rec[x] = offsetBo[rec[x]]; > > rec += stride; > } > @@ -704,38 +665,29 @@ > > memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp); > > - int typeIdx; > - uint32_t edgeType; > - > - int offset[LUMA_GROUP_NUM + 1]; > - int idxX; > - int idxY; > - int addr; > int frameWidthInCU = m_pic->getFrameWidthInCU(); > int frameHeightInCU = m_pic->getFrameHeightInCU(); > int stride; > bool isChroma = !!plane; > - bool mergeLeftFlag; > + uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) : > g_maxCUSize; > > - int32_t *offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo; > + const int boShift = X265_DEPTH - SAO_BO_BITS; > > - offset[0] = 0; > - for (idxY = 0; idxY < frameHeightInCU; idxY++) > + for (int idxY = 0; idxY < frameHeightInCU; idxY++) > { > - addr = idxY * frameWidthInCU; > + int addr = idxY * frameWidthInCU; > if (plane == 0) > { > - rec = m_pic->getPicYuvRec()->getLumaAddr(addr); > + rec = m_pic->getPicYuvRec()->getLumaAddr(addr); > stride = m_pic->getStride(); > picWidthTmp = m_param->sourceWidth; > } > else > { > - rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr); > + rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr); > stride = m_pic->getCStride(); > picWidthTmp = m_param->sourceWidth >> m_hChromaShift; > } > - uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) > : g_maxCUSize; > for (uint32_t i = 0; i < cuHeightTmp + 1; i++) > { > m_tmpL1[i] = rec[0]; > @@ -746,10 +698,13 @@ > > memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp); > > - for (idxX = 0; idxX < frameWidthInCU; idxX++) > + for (int idxX = 0; idxX < frameWidthInCU; idxX++) > { > addr = idxY * frameWidthInCU + idxX; > > + int typeIdx; > + bool mergeLeftFlag; > + > if (oneUnitFlag) > { > typeIdx = saoLcuParam[0].typeIdx; > @@ -766,21 +721,24 @@ > { > if (typeIdx == SAO_BO) > { > - for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++) > - offset[i] = 0; > + pixel* offsetBo = m_offsetBo; > + int offset[SAO_NUM_BO_CLASSES]; > + memset(offset, 0, sizeof(offset)); > > - for (int i = 0; i < saoLcuParam[addr].length; i++) > - offset[(saoLcuParam[addr].subTypeIdx + i) % > SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC; > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > + offset[((saoLcuParam[addr].subTypeIdx + i) & > (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC; > > for (int i = 0; i < (1 << X265_DEPTH); i++) > - offsetBo[i] = m_clipTable[i + > offset[m_tableBo[i]]]; > + offsetBo[i] = m_clipTable[i + offset[i >> > boShift]]; > } > - if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || > typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) > + else // if (typeIdx == SAO_EO_0 || typeIdx == > SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) > { > - for (int i = 0; i < saoLcuParam[addr].length; i++) > + int offset[NUM_EDGETYPE]; > + offset[0] = 0; > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > offset[i + 1] = saoLcuParam[addr].offset[i] > << SAO_BIT_INC; > > - for (edgeType = 0; edgeType < 6; edgeType++) > + for (int edgeType = 0; edgeType < NUM_EDGETYPE; > edgeType++) > m_offsetEo[edgeType] = > (int8_t)offset[s_eoTable[edgeType]]; > } > } > @@ -823,32 +781,25 @@ > > if (plane) > { > - rec = m_pic->getPicYuvRec()->getChromaAddr(plane); > + rec = m_pic->getPicYuvRec()->getChromaAddr(plane); > picWidthTmp = m_param->sourceWidth >> m_hChromaShift; > } > else > { > - rec = m_pic->getPicYuvRec()->getLumaAddr(); > + rec = m_pic->getPicYuvRec()->getLumaAddr(); > picWidthTmp = m_param->sourceWidth; > } > > if (!idxY) > memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp); > > - int typeIdx; > - > - int offset[LUMA_GROUP_NUM + 1]; > - int idxX; > - int addr; > int frameWidthInCU = m_pic->getFrameWidthInCU(); > int stride; > bool isChroma = !!plane; > - bool mergeLeftFlag; > > - int32_t* offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo; > + const int boShift = X265_DEPTH - SAO_BO_BITS; > > - offset[0] = 0; > - addr = idxY * frameWidthInCU; > + int addr = idxY * frameWidthInCU; > if (isChroma) > { > rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr); > @@ -872,12 +823,12 @@ > > memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp); > > - for (idxX = 0; idxX < frameWidthInCU; idxX++) > + for (int idxX = 0; idxX < frameWidthInCU; idxX++) > { > addr = idxY * frameWidthInCU + idxX; > > - typeIdx = saoLcuParam[addr].typeIdx; > - mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag; > + int typeIdx = saoLcuParam[addr].typeIdx; > + bool mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag; > > if (typeIdx >= 0) > { > @@ -885,21 +836,24 @@ > { > if (typeIdx == SAO_BO) > { > - for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++) > - offset[i] = 0; > + pixel* offsetBo = m_offsetBo; > + int offset[SAO_NUM_BO_CLASSES]; > + memset(offset, 0, sizeof(offset)); > > - for (int i = 0; i < saoLcuParam[addr].length; i++) > - offset[(saoLcuParam[addr].subTypeIdx + i) % > SAO_MAX_BO_CLASSES + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC; > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > + offset[((saoLcuParam[addr].subTypeIdx + i) & > (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC; > > for (int i = 0; i < (1 << X265_DEPTH); i++) > - offsetBo[i] = m_clipTable[i + > offset[m_tableBo[i]]]; > + offsetBo[i] = m_clipTable[i + offset[i >> > boShift]]; > } > - if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx > == SAO_EO_2 || typeIdx == SAO_EO_3) > + else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || > typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3) > { > - for (int i = 0; i < saoLcuParam[addr].length; i++) > + int offset[NUM_EDGETYPE]; > + offset[0] = 0; > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > offset[i + 1] = saoLcuParam[addr].offset[i] << > SAO_BIT_INC; > > - for (uint32_t edgeType = 0; edgeType < 6; edgeType++) > + for (int edgeType = 0; edgeType < NUM_EDGETYPE; > edgeType++) > m_offsetEo[edgeType] = > (int8_t)offset[s_eoTable[edgeType]]; > } > } > @@ -942,7 +896,7 @@ > saoLcuParam[i].partIdx = 0; > saoLcuParam[i].typeIdx = -1; > saoLcuParam[i].subTypeIdx = 0; > - for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++) > + for (int j = 0; j < SAO_NUM_OFFSET; j++) > saoLcuParam[i].offset[j] = 0; > } > } > @@ -954,10 +908,9 @@ > saoUnit->partIdx = 0; > saoUnit->partIdxTmp = 0; > saoUnit->typeIdx = -1; > - saoUnit->length = 0; > saoUnit->subTypeIdx = 0; > > - for (int i = 0; i < 4; i++) > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > saoUnit->offset[i] = 0; > } > > @@ -966,10 +919,9 @@ > saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag; > saoUnitDst->mergeUpFlag = saoUnitSrc->mergeUpFlag; > saoUnitDst->typeIdx = saoUnitSrc->typeIdx; > - saoUnitDst->length = saoUnitSrc->length; > > saoUnitDst->subTypeIdx = saoUnitSrc->subTypeIdx; > - for (int i = 0; i < 4; i++) > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > saoUnitDst->offset[i] = saoUnitSrc->offset[i]; > } > > @@ -1008,17 +960,15 @@ > saoLcuParam[addr].partIdxTmp = (int)partIdx; > saoLcuParam[addr].typeIdx = saoQTPart[partIdx].bestType; > saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx; > - if (saoLcuParam[addr].typeIdx != -1) > + if (saoLcuParam[addr].typeIdx >= 0) > { > - saoLcuParam[addr].length = saoQTPart[partIdx].length; > - for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++) > + for (int j = 0; j < SAO_NUM_OFFSET; j++) > saoLcuParam[addr].offset[j] = > saoQTPart[partIdx].offset[j]; > } > else > { > - saoLcuParam[addr].length = 0; > saoLcuParam[addr].subTypeIdx = > saoQTPart[partIdx].subTypeIdx; > - for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++) > + for (int j = 0; j < SAO_NUM_OFFSET; j++) > saoLcuParam[addr].offset[j] = 0; > } > } > @@ -1028,12 +978,9 @@ > /* process SAO for one partition */ > void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane) > { > - int typeIdx; > - int numTotalType = MAX_NUM_SAO_TYPE; > SAOQTPart* onePart = &(psQTPart[partIdx]); > > int64_t estDist; > - int classIdx; > > m_distOrg[partIdx] = 0; > > @@ -1046,50 +993,20 @@ > int allowMergeUp; > SaoLcuParam saoLcuParamRdo; > > - for (typeIdx = -1; typeIdx < numTotalType; typeIdx++) > + for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) > { > > m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]); > m_entropyCoder.resetBits(); > > - if (typeIdx == -1) > - { > - for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++) > - { > - for (int rx = onePart->startCUX; rx <= onePart->endCUX; > rx++) > - { > - // get bits for iTypeIdx = -1 > - allowMergeLeft = 1; > - allowMergeUp = 1; > - > - // reset > - resetSaoUnit(&saoLcuParamRdo); > - > - // set merge flag > - saoLcuParamRdo.mergeUpFlag = 1; > - saoLcuParamRdo.mergeLeftFlag = 1; > - > - if (ry == onePart->startCUY) > - saoLcuParamRdo.mergeUpFlag = 0; > - > - if (rx == onePart->startCUX) > - saoLcuParamRdo.mergeLeftFlag = 0; > - > - m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, > ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp); > - } > - } > - } > - > if (typeIdx >= 0) > { > estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda, > currentDistortionTableBo, currentRdCostTableBo); > if (typeIdx == SAO_BO) > { > // Estimate Best Position > - double currentRDCost = 0.0; > - > - for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; > i++) > + for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; > i++) > { > - currentRDCost = 0.0; > + double currentRDCost = 0.0; > for (int j = i; j < i + SAO_BO_LEN; j++) > currentRDCost += currentRdCostTableBo[j]; > > @@ -1101,7 +1018,7 @@ > } > > // Recode all offsets > - for (classIdx = bestClassTableBo; classIdx < > bestClassTableBo + SAO_BO_LEN; classIdx++) > + for (int classIdx = bestClassTableBo; classIdx < > bestClassTableBo + SAO_BO_LEN; classIdx++) > estDist += currentDistortionTableBo[classIdx]; > } > > @@ -1129,8 +1046,7 @@ > // set type and offsets > saoLcuParamRdo.typeIdx = typeIdx; > saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? > bestClassTableBo : 0; > - saoLcuParamRdo.length = s_numClass[typeIdx]; > - for (classIdx = 0; classIdx < saoLcuParamRdo.length; > classIdx++) > + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; > classIdx++) > saoLcuParamRdo.offset[classIdx] = > (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1]; > > m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, > ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp); > @@ -1152,6 +1068,30 @@ > } > else > { > + for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++) > + { > + for (int rx = onePart->startCUX; rx <= onePart->endCUX; > rx++) > + { > + // get bits for iTypeIdx = -1 > + allowMergeLeft = 1; > + allowMergeUp = 1; > + > + // reset > + resetSaoUnit(&saoLcuParamRdo); > + > + // set merge flag > + saoLcuParamRdo.mergeUpFlag = 1; > + saoLcuParamRdo.mergeLeftFlag = 1; > + > + if (ry == onePart->startCUY) > + saoLcuParamRdo.mergeUpFlag = 0; > + > + if (rx == onePart->startCUX) > + saoLcuParamRdo.mergeLeftFlag = 0; > + > + m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx, > ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp); > + } > + } > if (m_distOrg[partIdx] < m_costPartBest[partIdx]) > { > m_costPartBest[partIdx] = (double)m_distOrg[partIdx] + > m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda; > @@ -1170,18 +1110,15 @@ > > if (onePart->bestType != -1) > { > - onePart->length = s_numClass[onePart->bestType]; > int minIndex = 0; > if (onePart->bestType == SAO_BO) > { > onePart->subTypeIdx = bestClassTableBo; > minIndex = onePart->subTypeIdx; > } > - for (int i = 0; i < onePart->length; i++) > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > onePart->offset[i] = > (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1]; > } > - else > - onePart->length = 0; > } > > /* Run partition tree disable */ > @@ -1190,7 +1127,6 @@ > SAOQTPart* pOnePart = &(psQTPart[partIdx]); > > pOnePart->bSplit = false; > - pOnePart->length = 0; > pOnePart->bestType = -1; > > if (pOnePart->partLevel < (int)m_maxSplitLevel) > @@ -1236,7 +1172,6 @@ > { > costFinal = costSplit; > onePart->bSplit = true; > - onePart->length = 0; > onePart->bestType = -1; > > > m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]); > } > @@ -1271,7 +1206,6 @@ > uint32_t picHeightTmp; > int64_t* stats; > int64_t* counts; > - int classIdx; > int startX; > int startY; > int endX; > @@ -1308,6 +1242,8 @@ > > //if(iSaoType == BO_0 || iSaoType == BO_1) > { > + const int boShift = X265_DEPTH - SAO_BO_BITS; > + > if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary) > { > numSkipLine = isChroma ? 3 - (2 * m_vChromaShift) : 3; > @@ -1325,12 +1261,9 @@ > { > for (x = 0; x < endX; x++) > { > - classIdx = m_tableBo[recon[x]]; > - if (classIdx) > - { > - stats[classIdx] += (fenc[x] - recon[x]); > - counts[classIdx]++; > - } > + int classIdx = 1 + (recon[x] >> boShift); > + stats[classIdx] += (fenc[x] - recon[x]); > + counts[classIdx]++; > } > > fenc += stride; > @@ -1338,12 +1271,6 @@ > } > } > > - int signLeft; > - int signRight; > - int signDown; > - int signDown1; > - int signDown2; > - uint32_t edgeType; > int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; > int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; > > @@ -1366,11 +1293,11 @@ > endX = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth - > numSkipLineRight; > for (y = 0; y < lcuHeight - numSkipLine; y++) > { > - signLeft = signOf(recon[startX] - recon[startX - 1]); > + int signLeft = signOf(recon[startX] - recon[startX - 1]); > for (x = startX; x < endX; x++) > { > - signRight = signOf(recon[x] - recon[x + 1]); > - edgeType = signRight + signLeft + 2; > + int signRight = signOf(recon[x] - recon[x + 1]); > + int edgeType = signRight + signLeft + 2; > signLeft = -signRight; > > stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]); > @@ -1411,8 +1338,8 @@ > { > for (x = 0; x < endX; x++) > { > - signDown = signOf(recon[x] - recon[x + stride]); > - edgeType = signDown + upBuff1[x] + 2; > + int signDown = signOf(recon[x] - recon[x + stride]); > + int edgeType = signDown + upBuff1[x] + 2; > upBuff1[x] = -signDown; > > stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]); > @@ -1452,11 +1379,11 @@ > > for (y = startY; y < endY; y++) > { > - signDown2 = signOf(recon[stride + startX] - recon[startX > - 1]); > + int signDown2 = signOf(recon[stride + startX] - > recon[startX - 1]); > for (x = startX; x < endX; x++) > { > - signDown1 = signOf(recon[x] - recon[x + stride + 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(recon[x] - recon[x + stride + > 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBufft[x + 1] = -signDown1; > stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]); > counts[s_eoTable[edgeType]]++; > @@ -1500,8 +1427,8 @@ > { > for (x = startX; x < endX; x++) > { > - signDown1 = signOf(recon[x] - recon[x + stride - 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(recon[x] - recon[x + stride - > 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBuff1[x - 1] = -signDown1; > stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]); > counts[s_eoTable[edgeType]]++; > @@ -1518,7 +1445,6 @@ > > void SAO::calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY) > { > - int addr; > int x, y; > > pixel* fenc; > @@ -1528,7 +1454,6 @@ > uint32_t bPelY; > int64_t* stats; > int64_t* count; > - int classIdx; > int startX; > int startY; > int endX; > @@ -1545,11 +1470,13 @@ > int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1; > int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1; > > + const int boShift = X265_DEPTH - SAO_BO_BITS; > + > // NOTE: Row > { > // NOTE: Col > { > - addr = idxX + frameWidthInCU * idxY; > + int addr = idxX + frameWidthInCU * idxY; > cu = pic->getCU(addr); > > uint32_t picWidthTmp = m_param->sourceWidth; > @@ -1606,26 +1533,15 @@ > if (x < startX && y < startY) > continue; > > - classIdx = m_tableBo[recon[x]]; > - if (classIdx) > - { > - stats[classIdx] += (fenc[x] - recon[x]); > - count[classIdx]++; > - } > + int classIdx = 1 + (recon[x] >> boShift); > + stats[classIdx] += (fenc[x] - recon[x]); > + count[classIdx]++; > } > > fenc += stride; > recon += stride; > } > > - int signLeft; > - int signRight; > - int signDown; > - int signDown1; > - int signDown2; > - > - uint32_t edgeType; > - > //if (iSaoType == EO_0) > > numSkipLine = isChroma ? 1 : 3; > @@ -1644,11 +1560,11 @@ > > for (y = 0; y < lcuHeight; y++) > { > - signLeft = signOf(recon[firstX] - recon[firstX - 1]); > + int signLeft = signOf(recon[firstX] - recon[firstX - > 1]); > for (x = firstX; x < endX; x++) > { > - signRight = signOf(recon[x] - recon[x + 1]); > - edgeType = signRight + signLeft + 2; > + int signRight = signOf(recon[x] - recon[x + 1]); > + int edgeType = signRight + signLeft + 2; > signLeft = -signRight; > > if (x < startX && y < startY) > @@ -1690,8 +1606,8 @@ > { > for (x = 0; x < lcuWidth; x++) > { > - signDown = signOf(recon[x] - recon[x + stride]); > - edgeType = signDown + upBuff1[x] + 2; > + int signDown = signOf(recon[x] - recon[x + > stride]); > + int edgeType = signDown + upBuff1[x] + 2; > upBuff1[x] = -signDown; > > if (x < startX && y < startY) > @@ -1733,11 +1649,11 @@ > > for (y = firstY; y < endY; y++) > { > - signDown2 = signOf(recon[stride + startX] - > recon[startX - 1]); > + int signDown2 = signOf(recon[stride + startX] - > recon[startX - 1]); > for (x = firstX; x < endX; x++) > { > - signDown1 = signOf(recon[x] - recon[x + stride + > 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(recon[x] - recon[x + > stride + 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBufft[x + 1] = -signDown1; > > if (x < startX && y < startY) > @@ -1784,8 +1700,8 @@ > { > for (x = firstX; x < endX; x++) > { > - signDown1 = signOf(recon[x] - recon[x + stride - > 1]); > - edgeType = signDown1 + upBuff1[x] + 2; > + int signDown1 = signOf(recon[x] - recon[x + > stride - 1]); > + int edgeType = signDown1 + upBuff1[x] + 2; > upBuff1[x - 1] = -signDown1; > > if (x < startX && y < startY) > @@ -1807,12 +1723,10 @@ > > void SAO::getSaoStats(SAOQTPart *psQTPart, int plane) > { > - int levelIdx, partIdx, typeIdx, classIdx; > + int levelIdx, partIdx; > int i; > - int numTotalType = MAX_NUM_SAO_TYPE; > int lcuIdx; > int lcuIdy; > - int addr; > int frameWidthInCU = m_pic->getFrameWidthInCU(); > int downPartIdx; > int partStart; > @@ -1827,7 +1741,7 @@ > { > for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX; > lcuIdx++) > { > - addr = lcuIdy * frameWidthInCU + lcuIdx; > + int addr = lcuIdy * frameWidthInCU + lcuIdx; > calcSaoStatsCu(addr, partIdx, plane); > } > } > @@ -1841,7 +1755,7 @@ > { > for (lcuIdx = onePart->startCUX; lcuIdx <= > onePart->endCUX; lcuIdx++) > { > - addr = lcuIdy * frameWidthInCU + lcuIdx; > + int addr = lcuIdy * frameWidthInCU + lcuIdx; > calcSaoStatsCu(addr, partIdx, plane); > } > } > @@ -1858,9 +1772,9 @@ > for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++) > { > downPartIdx = onePart->downPartsIdx[i]; > - for (typeIdx = 0; typeIdx < numTotalType; typeIdx++) > + for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; > typeIdx++) > { > - for (classIdx = 0; classIdx < (typeIdx < SAO_BO ? > s_numClass[typeIdx] : SAO_MAX_BO_CLASSES) + 1; classIdx++) > + for (int classIdx = 0; classIdx < (typeIdx < > SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++) > { > m_offsetOrg[partIdx][typeIdx][classIdx] += > m_offsetOrg[downPartIdx][typeIdx][classIdx]; > m_count[partIdx][typeIdx][classIdx] += > m_count[downPartIdx][typeIdx][classIdx]; > @@ -1923,16 +1837,15 @@ > /* Check merge SAO unit */ > void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam * > saoUnitCheck, int dir) > { > - int i; > int countDiff = 0; > > if (saoUnitCurr->partIdx != saoUnitCheck->partIdx) > { > - if (saoUnitCurr->typeIdx != -1) > + if (saoUnitCurr->typeIdx >= 0) > { > if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx) > { > - for (i = 0; i < saoUnitCurr->length; i++) > + for (int i = 0; i < SAO_NUM_OFFSET; i++) > countDiff += (saoUnitCurr->offset[i] != > saoUnitCheck->offset[i]); > > countDiff += (saoUnitCurr->subTypeIdx != > saoUnitCheck->subTypeIdx); > @@ -1979,24 +1892,22 @@ > oneUnitFlag = 1; > else > { > - int i, j, addr, addrUp, addrLeft, idx, idxUp, idxLeft, idxCount; > - > oneUnitFlag = 0; > > - idxCount = -1; > + int idxCount = -1; > saoLcuParam[0].mergeUpFlag = 0; > saoLcuParam[0].mergeLeftFlag = 0; > > - for (j = 0; j < m_numCuInHeight; j++) > + for (int j = 0; j < m_numCuInHeight; j++) > { > - for (i = 0; i < m_numCuInWidth; i++) > + for (int i = 0; i < m_numCuInWidth; i++) > { > - addr = i + j * m_numCuInWidth; > - addrLeft = (addr % m_numCuInWidth == 0) ? -1 : addr - 1; > - addrUp = (addr < m_numCuInWidth) ? -1 : addr - > m_numCuInWidth; > - idx = saoLcuParam[addr].partIdxTmp; > - idxLeft = (addrLeft == -1) ? -1 : > saoLcuParam[addrLeft].partIdxTmp; > - idxUp = (addrUp == -1) ? -1 : > saoLcuParam[addrUp].partIdxTmp; > + int addr = i + j * m_numCuInWidth; > + int addrUp = (j == 0) ? -1 : addr - m_numCuInWidth; > + int addrLeft = (i == 0) ? -1 : addr - 1; > + int idx = saoLcuParam[addr].partIdxTmp; > + int idxLeft = (addrLeft == -1) ? -1 : > saoLcuParam[addrLeft].partIdxTmp; > + int idxUp = (addrUp == -1) ? -1 : > saoLcuParam[addrUp].partIdxTmp; > > if (idx != idxLeft && idx != idxUp) > { > @@ -2057,21 +1968,17 @@ > > void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY) > { > - int idxX; > int frameWidthInCU = saoParam->numCuInWidth; > int j, k; > - int addr = 0; > - int addrUp = -1; > - int addrLeft = -1; > int compIdx = 0; > SaoLcuParam mergeSaoParam[3][2]; > double compDistortion[3]; > > - for (idxX = 0; idxX < frameWidthInCU; idxX++) > + for (int idxX = 0; idxX < frameWidthInCU; idxX++) > { > - addr = idxX + frameWidthInCU * idxY; > - addrUp = addr < frameWidthInCU ? -1 : idxX + frameWidthInCU > * (idxY - 1); > - addrLeft = idxX == 0 ? -1 : idxX - 1 + frameWidthInCU > * idxY; > + int addr = idxX + idxY * frameWidthInCU; > + int addrUp = idxY == 0 ? -1 : addr - frameWidthInCU; > + int addrLeft = idxX == 0 ? -1 : addr - 1; > int allowMergeLeft = 1; > int allowMergeUp = 1; > uint32_t rate; > @@ -2111,7 +2018,7 @@ > } > } > > - saoParam->saoLcuParam[compIdx][addr].typeIdx = -1; > + saoParam->saoLcuParam[compIdx][addr].typeIdx = -1; > saoParam->saoLcuParam[compIdx][addr].mergeUpFlag = 0; > saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0; > saoParam->saoLcuParam[compIdx][addr].subTypeIdx = 0; > @@ -2173,9 +2080,9 @@ > } > } > > - if (saoParam->saoLcuParam[0][addr].typeIdx == -1) > + if (saoParam->saoLcuParam[0][addr].typeIdx < 0) > m_numNoSao[0]++; > - if (saoParam->saoLcuParam[1][addr].typeIdx == -1) > + if (saoParam->saoLcuParam[1][addr].typeIdx < 0) > m_numNoSao[1] += 2; > m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]); > m_entropyCoder.store(m_rdEntropyCoders[0][CI_CURR_BEST]); > @@ -2187,9 +2094,8 @@ > inline int64_t SAO::estSaoTypeDist(int compIdx, int typeIdx, int shift, > double lambda, int32_t *currentDistortionTableBo, double > *currentRdCostTableBo) > { > int64_t estDist = 0; > - int classIdx; > > - for (classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? > s_numClass[typeIdx] + 1 : SAO_MAX_BO_CLASSES + 1); classIdx++) > + for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ? SAO_EO_LEN + > 1 : SAO_NUM_BO_CLASSES + 1); classIdx++) > { > if (typeIdx == SAO_BO) > { > @@ -2200,7 +2106,7 @@ > { > m_offset[compIdx][typeIdx][classIdx] = > (int64_t)roundIDBI((double)(m_offsetOrg[compIdx][typeIdx][classIdx] << > (X265_DEPTH - 8)) / (double)(m_count[compIdx][typeIdx][classIdx] << > SAO_BIT_INC)); > m_offset[compIdx][typeIdx][classIdx] = Clip3(-OFFSET_THRESH + > 1, OFFSET_THRESH - 1, (int)m_offset[compIdx][typeIdx][classIdx]); > - if (typeIdx < 4) > + if (typeIdx < SAO_BO) > { > if (m_offset[compIdx][typeIdx][classIdx] < 0 && classIdx > < 3) > m_offset[compIdx][typeIdx][classIdx] = 0; > @@ -2231,12 +2137,11 @@ > //Clean up, best_q_offset. > int64_t iterOffset, tempOffset; > int64_t tempDist, tempRate; > - double tempCost, tempMinCost; > int64_t offsetOutput = 0; > > iterOffset = offsetInput; > // Assuming sending quantized value 0 results in zero offset and > sending the value zero needs 1 bit. entropy coder can be used to measure > the exact rate here. > - tempMinCost = lambda; > + double tempMinCost = lambda; > while (iterOffset != 0) > { > // Calculate the bits required for signalling the offset > @@ -2247,7 +2152,7 @@ > // Do the dequntization before distorion calculation > tempOffset = iterOffset << bitIncrease; > tempDist = estSaoDist(count, tempOffset, offsetOrg, shift); > - tempCost = ((double)tempDist + lambda * (double)tempRate); > + double tempCost = ((double)tempDist + lambda * > (double)tempRate); > if (tempCost < tempMinCost) > { > tempMinCost = tempCost; > @@ -2267,10 +2172,7 @@ > void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp, > SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane, > SaoLcuParam *compSaoParam, double > *compDistortion) > { > - int typeIdx; > - > int64_t estDist; > - int classIdx; > int64_t bestDist; > > SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]); > @@ -2287,7 +2189,6 @@ > double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; > > SaoLcuParam saoLcuParamRdo; > - double estRate = 0; > > resetSaoUnit(&saoLcuParamRdo); > > @@ -2298,18 +2199,16 @@ > copySaoUnit(saoLcuParam, &saoLcuParamRdo); > bestDist = 0; > > - for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) > + for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) > { > estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda, > currentDistortionTableBo, currentRdCostTableBo); > > if (typeIdx == SAO_BO) > { > // Estimate Best Position > - double currentRDCost = 0.0; > - > - for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++) > + for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++) > { > - currentRDCost = 0.0; > + double currentRDCost = 0.0; > for (int j = i; j < i + SAO_BO_LEN; j++) > currentRDCost += currentRdCostTableBo[j]; > > @@ -2323,23 +2222,22 @@ > // Re code all Offsets > // Code Center > estDist = 0; > - for (classIdx = bestClassTableBo; classIdx < bestClassTableBo > + SAO_BO_LEN; classIdx++) > + for (int classIdx = bestClassTableBo; classIdx < > bestClassTableBo + SAO_BO_LEN; classIdx++) > estDist += currentDistortionTableBo[classIdx]; > } > resetSaoUnit(&saoLcuParamRdo); > - saoLcuParamRdo.length = s_numClass[typeIdx]; > saoLcuParamRdo.typeIdx = typeIdx; > saoLcuParamRdo.mergeLeftFlag = 0; > saoLcuParamRdo.mergeUpFlag = 0; > saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ? > bestClassTableBo : 0; > - for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++) > + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > saoLcuParamRdo.offset[classIdx] = > (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1]; > > m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]); > m_entropyCoder.resetBits(); > m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane); > > - estRate = m_entropyCoder.getNumberOfWrittenBits(); > + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda > * (double)estRate); > > if (m_cost[plane][typeIdx] < dCostPartBest) > @@ -2367,12 +2265,12 @@ > if (saoLcuParamNeighbor != NULL) > { > estDist = 0; > - typeIdx = saoLcuParamNeighbor->typeIdx; > + int typeIdx = saoLcuParamNeighbor->typeIdx; > if (typeIdx >= 0) > { > int mergeBandPosition = (typeIdx == SAO_BO) ? > saoLcuParamNeighbor->subTypeIdx : 0; > int mergeOffset; > - for (classIdx = 0; classIdx < s_numClass[typeIdx]; > classIdx++) > + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; > classIdx++) > { > mergeOffset = saoLcuParamNeighbor->offset[classIdx]; > estDist += > estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1], > mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition + > 1], 0); > @@ -2395,8 +2293,6 @@ > { > int64_t estDist[2]; > int64_t bestDist = 0; > - int typeIdx; > - int classIdx; > > SaoLcuParam* saoLcuParam[2] = { &(saoParam->saoLcuParam[1][addr]), > &(saoParam->saoLcuParam[2][addr]) }; > SaoLcuParam* saoLcuParamNeighbor[2] = { NULL, NULL }; > @@ -2417,7 +2313,6 @@ > double costPartBest = MAX_DOUBLE; > double bestRDCostTableBo; > double currentRdCostTableBo[MAX_NUM_SAO_CLASS]; > - double estRate = 0; > int bestClassTableBo[2] = { 0, 0 }; > int currentDistortionTableBo[MAX_NUM_SAO_CLASS]; > > @@ -2435,19 +2330,18 @@ > copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]); > copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]); > > - for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) > + for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++) > { > if (typeIdx == SAO_BO) > { > // Estimate Best Position > for (int compIdx = 0; compIdx < 2; compIdx++) > { > - double currentRDCost = 0.0; > bestRDCostTableBo = MAX_DOUBLE; > estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx, > 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo); > - for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; > i++) > + for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; > i++) > { > - currentRDCost = 0.0; > + double currentRDCost = 0.0; > for (int j = i; j < i + SAO_BO_LEN; j++) > currentRDCost += currentRdCostTableBo[j]; > > @@ -2461,7 +2355,7 @@ > // Re code all Offsets > // Code Center > estDist[compIdx] = 0; > - for (classIdx = bestClassTableBo[compIdx]; classIdx < > bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++) > + for (int classIdx = bestClassTableBo[compIdx]; classIdx < > bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++) > estDist[compIdx] += > currentDistortionTableBo[classIdx]; > } > } > @@ -2477,18 +2371,17 @@ > for (int compIdx = 0; compIdx < 2; compIdx++) > { > resetSaoUnit(&saoLcuParamRdo[compIdx]); > - saoLcuParamRdo[compIdx].length = s_numClass[typeIdx]; > saoLcuParamRdo[compIdx].typeIdx = typeIdx; > saoLcuParamRdo[compIdx].mergeLeftFlag = 0; > saoLcuParamRdo[compIdx].mergeUpFlag = 0; > saoLcuParamRdo[compIdx].subTypeIdx = (typeIdx == SAO_BO) ? > bestClassTableBo[compIdx] : 0; > - for (classIdx = 0; classIdx < saoLcuParamRdo[compIdx].length; > classIdx++) > + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++) > saoLcuParamRdo[compIdx].offset[classIdx] = > (int)m_offset[compIdx + 1][typeIdx][classIdx + > saoLcuParamRdo[compIdx].subTypeIdx + 1]; > > m_entropyCoder.codeSaoOffset(&saoLcuParamRdo[compIdx], > compIdx + 1); > } > > - estRate = m_entropyCoder.getNumberOfWrittenBits(); > + uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits(); > m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) + > m_chromaLambda * (double)estRate); > > if (m_cost[1][typeIdx] < costPartBest) > @@ -2520,11 +2413,11 @@ > if (saoLcuParamNeighbor[compIdx] != NULL) > { > estDist[compIdx] = 0; > - typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx; > + int typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx; > if (typeIdx >= 0) > { > int mergeBandPosition = (typeIdx == SAO_BO) ? > saoLcuParamNeighbor[compIdx]->subTypeIdx : 0; > - for (classIdx = 0; classIdx < s_numClass[typeIdx]; > classIdx++) > + for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; > classIdx++) > { > int mergeOffset = > saoLcuParamNeighbor[compIdx]->offset[classIdx]; > estDist[compIdx] += estSaoDist(m_count[compIdx + > 1][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset, > m_offsetOrg[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1], 0); > diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.h > --- a/source/encoder/sao.h Thu Sep 11 19:24:28 2014 +0530 > +++ b/source/encoder/sao.h Fri Sep 12 11:01:54 2014 +0900 > @@ -36,7 +36,7 @@ > { > SAO_EO_LEN = 4, > SAO_BO_LEN = 4, > - SAO_MAX_BO_CLASSES = 32 > + SAO_NUM_BO_CLASSES = 32 > }; > > enum SAOType > @@ -55,15 +55,13 @@ > > enum { SAO_MAX_DEPTH = 4 }; > enum { SAO_BO_BITS = 5 }; > - enum { LUMA_GROUP_NUM = 1 << SAO_BO_BITS }; > - enum { MAX_NUM_SAO_OFFSETS = 4 }; > enum { MAX_NUM_SAO_CLASS = 33 }; > enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) }; > enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) }; > + enum { NUM_EDGETYPE = 5 }; > > static const int s_numCulPartsLevel[5]; > - static const int s_numClass[MAX_NUM_SAO_TYPE]; > - static const uint32_t s_eoTable[9]; > + static const uint32_t s_eoTable[NUM_EDGETYPE]; > > typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]); > typedef int64_t (PerType[MAX_NUM_SAO_TYPE]); > @@ -86,9 +84,8 @@ > PerPlane* m_offsetOrgPreDblk; > > double m_depthSaoRate[2][4]; > - int32_t* m_offsetBo; > - int32_t* m_chromaOffsetBo; > - int8_t m_offsetEo[LUMA_GROUP_NUM]; > + pixel* m_offsetBo; > + int8_t m_offsetEo[NUM_EDGETYPE]; > > int m_maxSplitLevel; > > @@ -100,7 +97,6 @@ > > pixel* m_clipTable; > pixel* m_clipTableBase; > - pixel* m_tableBo; > > pixel* m_tmpU1[3]; > pixel* m_tmpU2[3]; > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel