Thanks, queued for testing.

On Fri, Sep 12, 2014 at 7:34 AM, Satoshi Nakagawa <nakagawa...@oki.com>
wrote:

> # HG changeset patch
> # User Satoshi Nakagawa <nakagawa...@oki.com>
> # Date 1410487314 -32400
> #      Fri Sep 12 11:01:54 2014 +0900
> # Node ID 8a2312df90f99b8b479940141c6dafa4b96581cf
> # Parent  7e29b10982d2eb7fd79f581d99996f04184522ba
> sao: some cleanups
>
> diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/common.h
> --- a/source/common/common.h    Thu Sep 11 19:24:28 2014 +0530
> +++ b/source/common/common.h    Fri Sep 12 11:01:54 2014 +0900
> @@ -200,6 +200,8 @@
>
>  namespace x265 {
>
> +enum { SAO_NUM_OFFSET = 4 };
> +
>  // NOTE: MUST be alignment to 16 or 32 bytes for asm code
>  struct NoiseReduction
>  {
> @@ -215,9 +217,8 @@
>      enum { NUM_DOWN_PART = 4 };
>
>      int     bestType;
> -    int     length;
>      int     subTypeIdx;  // indicates EO class or BO band position
> -    int     offset[4];
> +    int     offset[SAO_NUM_OFFSET];
>      int     startCUX;
>      int     startCUY;
>      int     endCUX;
> @@ -245,10 +246,9 @@
>      bool mergeLeftFlag;
>      int  typeIdx;
>      int  subTypeIdx;    // indicates EO class or BO band position
> -    int  offset[4];
> +    int  offset[SAO_NUM_OFFSET];
>      int  partIdx;
>      int  partIdxTmp;
> -    int  length;
>
>      void reset()
>      {
> diff -r 7e29b10982d2 -r 8a2312df90f9 source/common/x86/loopfilter.asm
> --- a/source/common/x86/loopfilter.asm  Thu Sep 11 19:24:28 2014 +0530
> +++ b/source/common/x86/loopfilter.asm  Fri Sep 12 11:01:54 2014 +0900
> @@ -44,7 +44,7 @@
>      pslldq      m0,    15          ; m0 = [iSignLeft x .. x]
>      pcmpeqb     m4,    m4          ; m4 = [pb -1]
>      pxor        m5,    m5          ; m5 = 0
> -    movu        m6,    [r1]        ; m6 = m_iOffsetEo
> +    movh        m6,    [r1]        ; m6 = m_offsetEo
>
>  .loop:
>      movu        m7,    [r0]        ; m1 = pRec[x]
> diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/entropy.cpp
> --- a/source/encoder/entropy.cpp        Thu Sep 11 19:24:28 2014 +0530
> +++ b/source/encoder/entropy.cpp        Fri Sep 12 11:01:54 2014 +0900
> @@ -879,19 +879,19 @@
>
>      if (symbol)
>      {
> -        if (saoLcuParam->typeIdx < 4 && compIdx != 2)
> +        if (saoLcuParam->typeIdx < SAO_BO && compIdx != 2)
>              saoLcuParam->subTypeIdx = saoLcuParam->typeIdx;
>
>          int offsetTh = 1 << X265_MIN(X265_DEPTH - 5, 5);
>          if (saoLcuParam->typeIdx == SAO_BO)
>          {
> -            for (i = 0; i < saoLcuParam->length; i++)
> +            for (i = 0; i < SAO_BO_LEN; i++)
>              {
>                  uint32_t absOffset = ((saoLcuParam->offset[i] < 0) ?
> -saoLcuParam->offset[i] : saoLcuParam->offset[i]);
>                  codeSaoMaxUvlc(absOffset, offsetTh - 1);
>              }
>
> -            for (i = 0; i < saoLcuParam->length; i++)
> +            for (i = 0; i < SAO_BO_LEN; i++)
>              {
>                  if (saoLcuParam->offset[i] != 0)
>                  {
> @@ -903,7 +903,7 @@
>              symbol = (uint32_t)(saoLcuParam->subTypeIdx);
>              codeSaoUflc(5, symbol);
>          }
> -        else if (saoLcuParam->typeIdx < 4)
> +        else // if (saoLcuParam->typeIdx < SAO_BO)
>          {
>              codeSaoMaxUvlc(saoLcuParam->offset[0], offsetTh - 1);
>              codeSaoMaxUvlc(saoLcuParam->offset[1], offsetTh - 1);
> diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.cpp
> --- a/source/encoder/sao.cpp    Thu Sep 11 19:24:28 2014 +0530
> +++ b/source/encoder/sao.cpp    Fri Sep 12 11:01:54 2014 +0900
> @@ -79,26 +79,13 @@
>      341, // level 4
>  };
>
> -const uint32_t SAO::s_eoTable[9] =
> +const uint32_t SAO::s_eoTable[NUM_EDGETYPE] =
>  {
>      1, // 0
>      2, // 1
>      0, // 2
>      3, // 3
> -    4, // 4
> -    0, // 5
> -    0, // 6
> -    0, // 7
> -    0
> -};
> -
> -const int SAO::s_numClass[MAX_NUM_SAO_TYPE] =
> -{
> -    SAO_EO_LEN,
> -    SAO_EO_LEN,
> -    SAO_EO_LEN,
> -    SAO_EO_LEN,
> -    SAO_BO_LEN
> +    4  // 4
>  };
>
>  SAO::SAO()
> @@ -122,8 +109,6 @@
>      m_clipTable = NULL;
>      m_clipTableBase = NULL;
>      m_offsetBo = NULL;
> -    m_chromaOffsetBo = NULL;
> -    m_tableBo = NULL;
>      m_tmpU1[0] = NULL;
>      m_tmpU1[1] = NULL;
>      m_tmpU1[2] = NULL;
> @@ -162,18 +147,12 @@
>       * m_numTotalParts must allow for sufficient storage in any allocated
> arrays */
>      m_numTotalParts = X265_MAX(3, s_numCulPartsLevel[m_maxSplitLevel]);
>
> -    int pixelRange = 1 << X265_DEPTH;
> -    int boRangeShift = X265_DEPTH - SAO_BO_BITS;
> -    pixel maxY = (1 << X265_DEPTH) - 1;
> -    pixel minY = 0;
> -    pixel rangeExt = maxY >> 1;
> +    const pixel maxY = (1 << X265_DEPTH) - 1;
> +    const pixel rangeExt = maxY >> 1;
>      int numLcu = m_numCuInWidth * m_numCuInHeight;
>
> -    CHECKED_MALLOC(m_tableBo, pixel, pixelRange);
> -
> -    CHECKED_MALLOC(m_clipTableBase, pixel, maxY + 2 * rangeExt);
> -    CHECKED_MALLOC(m_offsetBo,        int, maxY + 2 * rangeExt);
> -    CHECKED_MALLOC(m_chromaOffsetBo , int, maxY + 2 * rangeExt);
> +    CHECKED_MALLOC(m_clipTableBase,  pixel, maxY + 2 * rangeExt);
> +    CHECKED_MALLOC(m_offsetBo,       pixel, maxY + 2 * rangeExt);
>
>      CHECKED_MALLOC(m_tmpL1, pixel, g_maxCUSize + 1);
>      CHECKED_MALLOC(m_tmpL2, pixel, g_maxCUSize + 1);
> @@ -199,19 +178,16 @@
>      CHECKED_MALLOC(m_countPreDblk, PerPlane, numLcu);
>      CHECKED_MALLOC(m_offsetOrgPreDblk, PerPlane, numLcu);
>
> -    for (int k2 = 0; k2 < pixelRange; k2++)
> -        m_tableBo[k2] = (pixel)(1 + (k2 >> boRangeShift));
> +    m_clipTable = &(m_clipTableBase[rangeExt]);
>
> -    for (int i = 0; i < (minY + rangeExt); i++)
> -        m_clipTableBase[i] = minY;
> +    for (int i = 0; i < rangeExt; i++)
> +        m_clipTableBase[i] = 0;
>
> -    for (int i = minY + rangeExt; i < (maxY + rangeExt); i++)
> -        m_clipTableBase[i] = (pixel)(i - rangeExt);
> +    for (int i = 0; i < maxY; i++)
> +        m_clipTable[i] = (pixel)i;
>
> -    for (int i = maxY + rangeExt; i < (maxY + 2 * rangeExt); i++)
> -        m_clipTableBase[i] = maxY;
> -
> -    m_clipTable = &(m_clipTableBase[rangeExt]);
> +    for (int i = maxY; i < maxY + rangeExt; i++)
> +        m_clipTable[i] = maxY;
>
>      return true;
>
> @@ -223,8 +199,6 @@
>  {
>      X265_FREE(m_clipTableBase);
>      X265_FREE(m_offsetBo);
> -    X265_FREE(m_tableBo);
> -    X265_FREE(m_chromaOffsetBo);
>
>      X265_FREE(m_tmpL1);
>      X265_FREE(m_tmpL2);
> @@ -271,12 +245,9 @@
>  /* recursively initialize SAO parameters (only once) */
>  void SAO::initSAOParam(SAOParam *saoParam, int partLevel, int partRow,
> int partCol, int parentPartIdx, int startCUX, int endCUX, int startCUY, int
> endCUY, int plane) const
>  {
> -    int j;
>      int partIdx = convertLevelRowCol2Idx(partLevel, partRow, partCol);
>
> -    SAOQTPart* saoPart;
> -
> -    saoPart = &(saoParam->saoPart[plane][partIdx]);
> +    SAOQTPart* saoPart = &(saoParam->saoPart[plane][partIdx]);
>
>      saoPart->partIdx   = partIdx;
>      saoPart->partLevel = partLevel;
> @@ -290,11 +261,10 @@
>
>      saoPart->upPartIdx = parentPartIdx;
>      saoPart->bestType  = -1;
> -    saoPart->length    =  0;
>
>      saoPart->subTypeIdx = 0;
>
> -    for (j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
> +    for (int j = 0; j < SAO_NUM_OFFSET; j++)
>          saoPart->offset[j] = 0;
>
>      if (saoPart->partLevel < m_maxSplitLevel)
> @@ -371,14 +341,13 @@
>          for (int i = 0; i < s_numCulPartsLevel[m_maxSplitLevel]; i++)
>          {
>              saoParam->saoPart[c][i].bestType     = -1;
> -            saoParam->saoPart[c][i].length       =  0;
>              saoParam->saoPart[c][i].bSplit       = false;
>              saoParam->saoPart[c][i].bProcessed   = false;
>              saoParam->saoPart[c][i].minCost      = MAX_DOUBLE;
>              saoParam->saoPart[c][i].minDist      = MAX_INT;
>              saoParam->saoPart[c][i].minRate      = MAX_INT;
>              saoParam->saoPart[c][i].subTypeIdx   = 0;
> -            for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
> +            for (int j = 0; j < SAO_NUM_OFFSET; j++)
>              {
>                  saoParam->saoPart[c][i].offset[j] = 0;
>                  saoParam->saoPart[c][i].offset[j] = 0;
> @@ -454,18 +423,12 @@
>      int lcuHeight;
>      int rpelx;
>      int bpely;
> -    int edgeType;
> -    int signDown;
> -    int signDown1;
> -    int signDown2;
>      int picWidthTmp;
>      int picHeightTmp;
>      int startX;
>      int startY;
>      int endX;
>      int endY;
> -    int shift;
> -    int cuHeightTmp;
>      pixel* tmpL;
>      pixel* tmpU;
>      uint32_t lpelx = tmpCu->getCUPelX();
> @@ -505,22 +468,18 @@
>
>  //   if (iSaoType!=SAO_BO_0 || iSaoType!=SAO_BO_1)
>      {
> -        cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize  >>
> m_vChromaShift);
> -        shift = isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >>
> m_hChromaShift) - 1);
> +        int cuHeightTmp = isLuma ? g_maxCUSize : (g_maxCUSize  >>
> m_vChromaShift);
> +        pixel* recR = &rec[isLuma ? (g_maxCUSize - 1) : ((g_maxCUSize >>
> m_hChromaShift) - 1)];
>          for (int i = 0; i < cuHeightTmp + 1; i++)
>          {
> -            m_tmpL2[i] = rec[shift];
> -            rec += stride;
> +            m_tmpL2[i] = *recR;
> +            recR += stride;
>          }
>
> -        rec -= (stride * (cuHeightTmp + 1));
> -
>          tmpL = m_tmpL1;
>          tmpU = &(m_tmpU1[plane][lpelx]);
>      }
>
> -    int32_t *offsetBo = isLuma ? m_offsetBo : m_chromaOffsetBo;
> -
>      switch (saoType)
>      {
>      case SAO_EO_0: // dir: -
> @@ -536,10 +495,10 @@
>                  for (x = startX; x < endX; x++)
>                  {
>                      int signRight = signOf(rec[x] - rec[x + 1]);
> -                    edgeType = signRight + signLeft + 2;
> +                    int edgeType = signRight + signLeft + 2;
>                      signLeft = -signRight;
>
> -                    rec[x] = (pixel)Clip3(0, (1 << X265_DEPTH) - 1,
> rec[x] + m_offsetEo[edgeType]);
> +                    rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
>                  }
>
>                  rec += stride;
> @@ -584,8 +543,8 @@
>          {
>              for (x = 0; x < lcuWidth; x++)
>              {
> -                signDown = signOf(rec[x] - rec[x + stride]);
> -                edgeType = signDown + upBuff1[x] + 2;
> +                int signDown = signOf(rec[x] - rec[x + stride]);
> +                int edgeType = signDown + upBuff1[x] + 2;
>                  upBuff1[x] = -signDown;
>
>                  rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
> @@ -612,11 +571,11 @@
>
>          for (y = startY; y < endY; y++)
>          {
> -            signDown2 = signOf(rec[stride + startX] - tmpL[y]);
> +            int signDown2 = signOf(rec[stride + startX] - tmpL[y]);
>              for (x = startX; x < endX; x++)
>              {
> -                signDown1 = signOf(rec[x] - rec[x + stride + 1]);
> -                edgeType  = signDown1 + upBuff1[x] + 2;
> +                int signDown1 = signOf(rec[x] - rec[x + stride + 1]);
> +                int edgeType  = signDown1 + upBuff1[x] + 2;
>                  upBufft[x + 1] = -signDown1;
>                  rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
>              }
> @@ -647,8 +606,8 @@
>          for (y = startY; y < endY; y++)
>          {
>              x = startX;
> -            signDown1 = signOf(rec[x] - tmpL[y + 1]);
> -            edgeType  = signDown1 + upBuff1[x] + 2;
> +            int signDown1 = signOf(rec[x] - tmpL[y + 1]);
> +            int edgeType  = signDown1 + upBuff1[x] + 2;
>              upBuff1[x - 1] = -signDown1;
>              rec[x] = m_clipTable[rec[x] + m_offsetEo[edgeType]];
>              for (x = startX + 1; x < endX; x++)
> @@ -668,10 +627,12 @@
>      }
>      case SAO_BO:
>      {
> +        const pixel* offsetBo = m_offsetBo;
> +
>          for (y = 0; y < lcuHeight; y++)
>          {
>              for (x = 0; x < lcuWidth; x++)
> -                rec[x] = (pixel)offsetBo[rec[x]];
> +                rec[x] = offsetBo[rec[x]];
>
>              rec += stride;
>          }
> @@ -704,38 +665,29 @@
>
>      memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
>
> -    int typeIdx;
> -    uint32_t edgeType;
> -
> -    int offset[LUMA_GROUP_NUM + 1];
> -    int idxX;
> -    int idxY;
> -    int addr;
>      int frameWidthInCU = m_pic->getFrameWidthInCU();
>      int frameHeightInCU = m_pic->getFrameHeightInCU();
>      int stride;
>      bool isChroma = !!plane;
> -    bool mergeLeftFlag;
> +    uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift) :
> g_maxCUSize;
>
> -    int32_t *offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;
> +    const int boShift = X265_DEPTH - SAO_BO_BITS;
>
> -    offset[0] = 0;
> -    for (idxY = 0; idxY < frameHeightInCU; idxY++)
> +    for (int idxY = 0; idxY < frameHeightInCU; idxY++)
>      {
> -        addr = idxY * frameWidthInCU;
> +        int addr = idxY * frameWidthInCU;
>          if (plane == 0)
>          {
> -            rec  = m_pic->getPicYuvRec()->getLumaAddr(addr);
> +            rec = m_pic->getPicYuvRec()->getLumaAddr(addr);
>              stride = m_pic->getStride();
>              picWidthTmp = m_param->sourceWidth;
>          }
>          else
>          {
> -            rec  = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> +            rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
>              stride = m_pic->getCStride();
>              picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
>          }
> -        uint32_t cuHeightTmp = isChroma ? (g_maxCUSize >> m_vChromaShift)
> : g_maxCUSize;
>          for (uint32_t i = 0; i < cuHeightTmp + 1; i++)
>          {
>              m_tmpL1[i] = rec[0];
> @@ -746,10 +698,13 @@
>
>          memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
>
> -        for (idxX = 0; idxX < frameWidthInCU; idxX++)
> +        for (int idxX = 0; idxX < frameWidthInCU; idxX++)
>          {
>              addr = idxY * frameWidthInCU + idxX;
>
> +            int typeIdx;
> +            bool mergeLeftFlag;
> +
>              if (oneUnitFlag)
>              {
>                  typeIdx = saoLcuParam[0].typeIdx;
> @@ -766,21 +721,24 @@
>                  {
>                      if (typeIdx == SAO_BO)
>                      {
> -                        for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)
> -                            offset[i] = 0;
> +                        pixel* offsetBo = m_offsetBo;
> +                        int offset[SAO_NUM_BO_CLASSES];
> +                        memset(offset, 0, sizeof(offset));
>
> -                        for (int i = 0; i < saoLcuParam[addr].length; i++)
> -                            offset[(saoLcuParam[addr].subTypeIdx + i) %
> SAO_MAX_BO_CLASSES  + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> +                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
> +                            offset[((saoLcuParam[addr].subTypeIdx + i) &
> (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
>
>                          for (int i = 0; i < (1 << X265_DEPTH); i++)
> -                            offsetBo[i] = m_clipTable[i +
> offset[m_tableBo[i]]];
> +                            offsetBo[i] = m_clipTable[i + offset[i >>
> boShift]];
>                      }
> -                    if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 ||
> typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
> +                    else // if (typeIdx == SAO_EO_0 || typeIdx ==
> SAO_EO_1 || typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
>                      {
> -                        for (int i = 0; i < saoLcuParam[addr].length; i++)
> +                        int offset[NUM_EDGETYPE];
> +                        offset[0] = 0;
> +                        for (int i = 0; i < SAO_NUM_OFFSET; i++)
>                              offset[i + 1] = saoLcuParam[addr].offset[i]
> << SAO_BIT_INC;
>
> -                        for (edgeType = 0; edgeType < 6; edgeType++)
> +                        for (int edgeType = 0; edgeType < NUM_EDGETYPE;
> edgeType++)
>                              m_offsetEo[edgeType] =
> (int8_t)offset[s_eoTable[edgeType]];
>                      }
>                  }
> @@ -823,32 +781,25 @@
>
>      if (plane)
>      {
> -        rec = m_pic->getPicYuvRec()->getChromaAddr(plane);
> +        rec         = m_pic->getPicYuvRec()->getChromaAddr(plane);
>          picWidthTmp = m_param->sourceWidth >> m_hChromaShift;
>      }
>      else
>      {
> -        rec = m_pic->getPicYuvRec()->getLumaAddr();
> +        rec         = m_pic->getPicYuvRec()->getLumaAddr();
>          picWidthTmp = m_param->sourceWidth;
>      }
>
>      if (!idxY)
>          memcpy(m_tmpU1[plane], rec, sizeof(pixel) * picWidthTmp);
>
> -    int typeIdx;
> -
> -    int offset[LUMA_GROUP_NUM + 1];
> -    int idxX;
> -    int addr;
>      int frameWidthInCU = m_pic->getFrameWidthInCU();
>      int stride;
>      bool isChroma = !!plane;
> -    bool mergeLeftFlag;
>
> -    int32_t* offsetBo = isChroma ? m_chromaOffsetBo : m_offsetBo;
> +    const int boShift = X265_DEPTH - SAO_BO_BITS;
>
> -    offset[0] = 0;
> -    addr = idxY * frameWidthInCU;
> +    int addr = idxY * frameWidthInCU;
>      if (isChroma)
>      {
>          rec = m_pic->getPicYuvRec()->getChromaAddr(plane, addr);
> @@ -872,12 +823,12 @@
>
>      memcpy(m_tmpU2[plane], rec, sizeof(pixel) * picWidthTmp);
>
> -    for (idxX = 0; idxX < frameWidthInCU; idxX++)
> +    for (int idxX = 0; idxX < frameWidthInCU; idxX++)
>      {
>          addr = idxY * frameWidthInCU + idxX;
>
> -        typeIdx = saoLcuParam[addr].typeIdx;
> -        mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
> +        int typeIdx = saoLcuParam[addr].typeIdx;
> +        bool mergeLeftFlag = saoLcuParam[addr].mergeLeftFlag;
>
>          if (typeIdx >= 0)
>          {
> @@ -885,21 +836,24 @@
>              {
>                  if (typeIdx == SAO_BO)
>                  {
> -                    for (int i = 0; i < SAO_MAX_BO_CLASSES + 1; i++)
> -                        offset[i] = 0;
> +                    pixel* offsetBo = m_offsetBo;
> +                    int offset[SAO_NUM_BO_CLASSES];
> +                    memset(offset, 0, sizeof(offset));
>
> -                    for (int i = 0; i < saoLcuParam[addr].length; i++)
> -                        offset[(saoLcuParam[addr].subTypeIdx + i) %
> SAO_MAX_BO_CLASSES  + 1] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
> +                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
> +                        offset[((saoLcuParam[addr].subTypeIdx + i) &
> (SAO_NUM_BO_CLASSES - 1))] = saoLcuParam[addr].offset[i] << SAO_BIT_INC;
>
>                      for (int i = 0; i < (1 << X265_DEPTH); i++)
> -                        offsetBo[i] = m_clipTable[i +
> offset[m_tableBo[i]]];
> +                        offsetBo[i] = m_clipTable[i + offset[i >>
> boShift]];
>                  }
> -                if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 || typeIdx
> == SAO_EO_2 || typeIdx == SAO_EO_3)
> +                else // if (typeIdx == SAO_EO_0 || typeIdx == SAO_EO_1 ||
> typeIdx == SAO_EO_2 || typeIdx == SAO_EO_3)
>                  {
> -                    for (int i = 0; i < saoLcuParam[addr].length; i++)
> +                    int offset[NUM_EDGETYPE];
> +                    offset[0] = 0;
> +                    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>                          offset[i + 1] = saoLcuParam[addr].offset[i] <<
> SAO_BIT_INC;
>
> -                    for (uint32_t edgeType = 0; edgeType < 6; edgeType++)
> +                    for (int edgeType = 0; edgeType < NUM_EDGETYPE;
> edgeType++)
>                          m_offsetEo[edgeType] =
> (int8_t)offset[s_eoTable[edgeType]];
>                  }
>              }
> @@ -942,7 +896,7 @@
>          saoLcuParam[i].partIdx       =  0;
>          saoLcuParam[i].typeIdx       = -1;
>          saoLcuParam[i].subTypeIdx    =  0;
> -        for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
> +        for (int j = 0; j < SAO_NUM_OFFSET; j++)
>              saoLcuParam[i].offset[j] = 0;
>      }
>  }
> @@ -954,10 +908,9 @@
>      saoUnit->partIdx       = 0;
>      saoUnit->partIdxTmp    = 0;
>      saoUnit->typeIdx       = -1;
> -    saoUnit->length        = 0;
>      saoUnit->subTypeIdx    = 0;
>
> -    for (int i = 0; i < 4; i++)
> +    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>          saoUnit->offset[i] = 0;
>  }
>
> @@ -966,10 +919,9 @@
>      saoUnitDst->mergeLeftFlag = saoUnitSrc->mergeLeftFlag;
>      saoUnitDst->mergeUpFlag   = saoUnitSrc->mergeUpFlag;
>      saoUnitDst->typeIdx       = saoUnitSrc->typeIdx;
> -    saoUnitDst->length        = saoUnitSrc->length;
>
>      saoUnitDst->subTypeIdx  = saoUnitSrc->subTypeIdx;
> -    for (int i = 0; i < 4; i++)
> +    for (int i = 0; i < SAO_NUM_OFFSET; i++)
>          saoUnitDst->offset[i] = saoUnitSrc->offset[i];
>  }
>
> @@ -1008,17 +960,15 @@
>              saoLcuParam[addr].partIdxTmp = (int)partIdx;
>              saoLcuParam[addr].typeIdx    = saoQTPart[partIdx].bestType;
>              saoLcuParam[addr].subTypeIdx = saoQTPart[partIdx].subTypeIdx;
> -            if (saoLcuParam[addr].typeIdx != -1)
> +            if (saoLcuParam[addr].typeIdx >= 0)
>              {
> -                saoLcuParam[addr].length = saoQTPart[partIdx].length;
> -                for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
> +                for (int j = 0; j < SAO_NUM_OFFSET; j++)
>                      saoLcuParam[addr].offset[j] =
> saoQTPart[partIdx].offset[j];
>              }
>              else
>              {
> -                saoLcuParam[addr].length = 0;
>                  saoLcuParam[addr].subTypeIdx =
> saoQTPart[partIdx].subTypeIdx;
> -                for (int j = 0; j < MAX_NUM_SAO_OFFSETS; j++)
> +                for (int j = 0; j < SAO_NUM_OFFSET; j++)
>                      saoLcuParam[addr].offset[j] = 0;
>              }
>          }
> @@ -1028,12 +978,9 @@
>  /* process SAO for one partition */
>  void SAO::rdoSaoOnePart(SAOQTPart *psQTPart, int partIdx, int plane)
>  {
> -    int typeIdx;
> -    int numTotalType = MAX_NUM_SAO_TYPE;
>      SAOQTPart* onePart = &(psQTPart[partIdx]);
>
>      int64_t estDist;
> -    int classIdx;
>
>      m_distOrg[partIdx] = 0;
>
> @@ -1046,50 +993,20 @@
>      int allowMergeUp;
>      SaoLcuParam saoLcuParamRdo;
>
> -    for (typeIdx = -1; typeIdx < numTotalType; typeIdx++)
> +    for (int typeIdx = -1; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
>      {
>
>  m_entropyCoder.load(m_rdEntropyCoders[onePart->partLevel][CI_CURR_BEST]);
>          m_entropyCoder.resetBits();
>
> -        if (typeIdx == -1)
> -        {
> -            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> -            {
> -                for (int rx = onePart->startCUX; rx <= onePart->endCUX;
> rx++)
> -                {
> -                    // get bits for iTypeIdx = -1
> -                    allowMergeLeft = 1;
> -                    allowMergeUp   = 1;
> -
> -                    // reset
> -                    resetSaoUnit(&saoLcuParamRdo);
> -
> -                    // set merge flag
> -                    saoLcuParamRdo.mergeUpFlag   = 1;
> -                    saoLcuParamRdo.mergeLeftFlag = 1;
> -
> -                    if (ry == onePart->startCUY)
> -                        saoLcuParamRdo.mergeUpFlag = 0;
> -
> -                    if (rx == onePart->startCUX)
> -                        saoLcuParamRdo.mergeLeftFlag = 0;
> -
> -                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx,
> ry,  &saoLcuParamRdo, 1,  1,  allowMergeLeft, allowMergeUp);
> -                }
> -            }
> -        }
> -
>          if (typeIdx >= 0)
>          {
>              estDist = estSaoTypeDist(partIdx, typeIdx, 0, m_lumaLambda,
> currentDistortionTableBo, currentRdCostTableBo);
>              if (typeIdx == SAO_BO)
>              {
>                  // Estimate Best Position
> -                double currentRDCost = 0.0;
> -
> -                for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1;
> i++)
> +                for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1;
> i++)
>                  {
> -                    currentRDCost = 0.0;
> +                    double currentRDCost = 0.0;
>                      for (int j = i; j < i + SAO_BO_LEN; j++)
>                          currentRDCost += currentRdCostTableBo[j];
>
> @@ -1101,7 +1018,7 @@
>                  }
>
>                  // Recode all offsets
> -                for (classIdx = bestClassTableBo; classIdx <
> bestClassTableBo + SAO_BO_LEN; classIdx++)
> +                for (int classIdx = bestClassTableBo; classIdx <
> bestClassTableBo + SAO_BO_LEN; classIdx++)
>                      estDist += currentDistortionTableBo[classIdx];
>              }
>
> @@ -1129,8 +1046,7 @@
>                      // set type and offsets
>                      saoLcuParamRdo.typeIdx = typeIdx;
>                      saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ?
> bestClassTableBo : 0;
> -                    saoLcuParamRdo.length = s_numClass[typeIdx];
> -                    for (classIdx = 0; classIdx < saoLcuParamRdo.length;
> classIdx++)
> +                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
> classIdx++)
>                          saoLcuParamRdo.offset[classIdx] =
> (int)m_offset[partIdx][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
>
>                      m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx,
> ry, &saoLcuParamRdo, 1, 1, allowMergeLeft, allowMergeUp);
> @@ -1152,6 +1068,30 @@
>          }
>          else
>          {
> +            for (int ry = onePart->startCUY; ry <= onePart->endCUY; ry++)
> +            {
> +                for (int rx = onePart->startCUX; rx <= onePart->endCUX;
> rx++)
> +                {
> +                    // get bits for iTypeIdx = -1
> +                    allowMergeLeft = 1;
> +                    allowMergeUp   = 1;
> +
> +                    // reset
> +                    resetSaoUnit(&saoLcuParamRdo);
> +
> +                    // set merge flag
> +                    saoLcuParamRdo.mergeUpFlag   = 1;
> +                    saoLcuParamRdo.mergeLeftFlag = 1;
> +
> +                    if (ry == onePart->startCUY)
> +                        saoLcuParamRdo.mergeUpFlag = 0;
> +
> +                    if (rx == onePart->startCUX)
> +                        saoLcuParamRdo.mergeLeftFlag = 0;
> +
> +                    m_entropyCoder.codeSaoUnitInterleaving(plane, 1, rx,
> ry,  &saoLcuParamRdo, 1,  1,  allowMergeLeft, allowMergeUp);
> +                }
> +            }
>              if (m_distOrg[partIdx] < m_costPartBest[partIdx])
>              {
>                  m_costPartBest[partIdx] = (double)m_distOrg[partIdx] +
> m_entropyCoder.getNumberOfWrittenBits() * m_lumaLambda;
> @@ -1170,18 +1110,15 @@
>
>      if (onePart->bestType != -1)
>      {
> -        onePart->length = s_numClass[onePart->bestType];
>          int minIndex = 0;
>          if (onePart->bestType == SAO_BO)
>          {
>              onePart->subTypeIdx = bestClassTableBo;
>              minIndex = onePart->subTypeIdx;
>          }
> -        for (int i = 0; i < onePart->length; i++)
> +        for (int i = 0; i < SAO_NUM_OFFSET; i++)
>              onePart->offset[i] =
> (int)m_offset[partIdx][onePart->bestType][minIndex + i + 1];
>      }
> -    else
> -        onePart->length = 0;
>  }
>
>  /* Run partition tree disable */
> @@ -1190,7 +1127,6 @@
>      SAOQTPart* pOnePart = &(psQTPart[partIdx]);
>
>      pOnePart->bSplit   = false;
> -    pOnePart->length   =  0;
>      pOnePart->bestType = -1;
>
>      if (pOnePart->partLevel < (int)m_maxSplitLevel)
> @@ -1236,7 +1172,6 @@
>          {
>              costFinal = costSplit;
>              onePart->bSplit   = true;
> -            onePart->length   =  0;
>              onePart->bestType = -1;
>
>  
> m_rdEntropyCoders[onePart->partLevel][CI_NEXT_BEST].load(m_rdEntropyCoders[nextDepth][CI_NEXT_BEST]);
>          }
> @@ -1271,7 +1206,6 @@
>      uint32_t picHeightTmp;
>      int64_t* stats;
>      int64_t* counts;
> -    int classIdx;
>      int startX;
>      int startY;
>      int endX;
> @@ -1308,6 +1242,8 @@
>
>      //if(iSaoType == BO_0 || iSaoType == BO_1)
>      {
> +        const int boShift = X265_DEPTH - SAO_BO_BITS;
> +
>          if (m_param->saoLcuBasedOptimization && m_param->saoLcuBoundary)
>          {
>              numSkipLine      = isChroma ? 3 - (2 * m_vChromaShift) : 3;
> @@ -1325,12 +1261,9 @@
>          {
>              for (x = 0; x < endX; x++)
>              {
> -                classIdx = m_tableBo[recon[x]];
> -                if (classIdx)
> -                {
> -                    stats[classIdx] += (fenc[x] - recon[x]);
> -                    counts[classIdx]++;
> -                }
> +                int classIdx = 1 + (recon[x] >> boShift);
> +                stats[classIdx] += (fenc[x] - recon[x]);
> +                counts[classIdx]++;
>              }
>
>              fenc += stride;
> @@ -1338,12 +1271,6 @@
>          }
>      }
>
> -    int signLeft;
> -    int signRight;
> -    int signDown;
> -    int signDown1;
> -    int signDown2;
> -    uint32_t edgeType;
>      int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
>      int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>
> @@ -1366,11 +1293,11 @@
>              endX   = (rpelx == picWidthTmp) ? lcuWidth - 1 : lcuWidth -
> numSkipLineRight;
>              for (y = 0; y < lcuHeight - numSkipLine; y++)
>              {
> -                signLeft = signOf(recon[startX] - recon[startX - 1]);
> +                int signLeft = signOf(recon[startX] - recon[startX - 1]);
>                  for (x = startX; x < endX; x++)
>                  {
> -                    signRight = signOf(recon[x] - recon[x + 1]);
> -                    edgeType = signRight + signLeft + 2;
> +                    int signRight = signOf(recon[x] - recon[x + 1]);
> +                    int edgeType = signRight + signLeft + 2;
>                      signLeft = -signRight;
>
>                      stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> @@ -1411,8 +1338,8 @@
>              {
>                  for (x = 0; x < endX; x++)
>                  {
> -                    signDown = signOf(recon[x] - recon[x + stride]);
> -                    edgeType = signDown + upBuff1[x] + 2;
> +                    int signDown = signOf(recon[x] - recon[x + stride]);
> +                    int edgeType = signDown + upBuff1[x] + 2;
>                      upBuff1[x] = -signDown;
>
>                      stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
> @@ -1452,11 +1379,11 @@
>
>              for (y = startY; y < endY; y++)
>              {
> -                signDown2 = signOf(recon[stride + startX] - recon[startX
> - 1]);
> +                int signDown2 = signOf(recon[stride + startX] -
> recon[startX - 1]);
>                  for (x = startX; x < endX; x++)
>                  {
> -                    signDown1 = signOf(recon[x] - recon[x + stride + 1]);
> -                    edgeType  = signDown1 + upBuff1[x] + 2;
> +                    int signDown1 = signOf(recon[x] - recon[x + stride +
> 1]);
> +                    int edgeType  = signDown1 + upBuff1[x] + 2;
>                      upBufft[x + 1] = -signDown1;
>                      stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
>                      counts[s_eoTable[edgeType]]++;
> @@ -1500,8 +1427,8 @@
>              {
>                  for (x = startX; x < endX; x++)
>                  {
> -                    signDown1 = signOf(recon[x] - recon[x + stride - 1]);
> -                    edgeType  = signDown1 + upBuff1[x] + 2;
> +                    int signDown1 = signOf(recon[x] - recon[x + stride -
> 1]);
> +                    int edgeType  = signDown1 + upBuff1[x] + 2;
>                      upBuff1[x - 1] = -signDown1;
>                      stats[s_eoTable[edgeType]] += (fenc[x] - recon[x]);
>                      counts[s_eoTable[edgeType]]++;
> @@ -1518,7 +1445,6 @@
>
>  void SAO::calcSaoStatsCu_BeforeDblk(Frame* pic, int idxX, int idxY)
>  {
> -    int addr;
>      int x, y;
>
>      pixel* fenc;
> @@ -1528,7 +1454,6 @@
>      uint32_t bPelY;
>      int64_t* stats;
>      int64_t* count;
> -    int classIdx;
>      int startX;
>      int startY;
>      int endX;
> @@ -1545,11 +1470,13 @@
>      int32_t _upBuff1[MAX_CU_SIZE + 2], *upBuff1 = _upBuff1 + 1;
>      int32_t _upBufft[MAX_CU_SIZE + 2], *upBufft = _upBufft + 1;
>
> +    const int boShift = X265_DEPTH - SAO_BO_BITS;
> +
>      // NOTE: Row
>      {
>          // NOTE: Col
>          {
> -            addr    = idxX + frameWidthInCU * idxY;
> +            int addr    = idxX + frameWidthInCU * idxY;
>              cu      = pic->getCU(addr);
>
>              uint32_t picWidthTmp  = m_param->sourceWidth;
> @@ -1606,26 +1533,15 @@
>                          if (x < startX && y < startY)
>                              continue;
>
> -                        classIdx = m_tableBo[recon[x]];
> -                        if (classIdx)
> -                        {
> -                            stats[classIdx] += (fenc[x] - recon[x]);
> -                            count[classIdx]++;
> -                        }
> +                        int classIdx = 1 + (recon[x] >> boShift);
> +                        stats[classIdx] += (fenc[x] - recon[x]);
> +                        count[classIdx]++;
>                      }
>
>                      fenc += stride;
>                      recon += stride;
>                  }
>
> -                int signLeft;
> -                int signRight;
> -                int signDown;
> -                int signDown1;
> -                int signDown2;
> -
> -                uint32_t edgeType;
> -
>                  //if (iSaoType == EO_0)
>
>                  numSkipLine = isChroma ? 1 : 3;
> @@ -1644,11 +1560,11 @@
>
>                  for (y = 0; y < lcuHeight; y++)
>                  {
> -                    signLeft = signOf(recon[firstX] - recon[firstX - 1]);
> +                    int signLeft = signOf(recon[firstX] - recon[firstX -
> 1]);
>                      for (x = firstX; x < endX; x++)
>                      {
> -                        signRight =  signOf(recon[x] - recon[x + 1]);
> -                        edgeType =  signRight + signLeft + 2;
> +                        int signRight =  signOf(recon[x] - recon[x + 1]);
> +                        int edgeType =  signRight + signLeft + 2;
>                          signLeft  = -signRight;
>
>                          if (x < startX && y < startY)
> @@ -1690,8 +1606,8 @@
>                  {
>                      for (x = 0; x < lcuWidth; x++)
>                      {
> -                        signDown = signOf(recon[x] - recon[x + stride]);
> -                        edgeType = signDown + upBuff1[x] + 2;
> +                        int signDown = signOf(recon[x] - recon[x +
> stride]);
> +                        int edgeType = signDown + upBuff1[x] + 2;
>                          upBuff1[x] = -signDown;
>
>                          if (x < startX && y < startY)
> @@ -1733,11 +1649,11 @@
>
>                  for (y = firstY; y < endY; y++)
>                  {
> -                    signDown2 = signOf(recon[stride + startX] -
> recon[startX - 1]);
> +                    int signDown2 = signOf(recon[stride + startX] -
> recon[startX - 1]);
>                      for (x = firstX; x < endX; x++)
>                      {
> -                        signDown1 = signOf(recon[x] - recon[x + stride +
> 1]);
> -                        edgeType = signDown1 + upBuff1[x] + 2;
> +                        int signDown1 = signOf(recon[x] - recon[x +
> stride + 1]);
> +                        int edgeType = signDown1 + upBuff1[x] + 2;
>                          upBufft[x + 1] = -signDown1;
>
>                          if (x < startX && y < startY)
> @@ -1784,8 +1700,8 @@
>                  {
>                      for (x = firstX; x < endX; x++)
>                      {
> -                        signDown1 = signOf(recon[x] - recon[x + stride -
> 1]);
> -                        edgeType  = signDown1 + upBuff1[x] + 2;
> +                        int signDown1 = signOf(recon[x] - recon[x +
> stride - 1]);
> +                        int edgeType  = signDown1 + upBuff1[x] + 2;
>                          upBuff1[x - 1] = -signDown1;
>
>                          if (x < startX && y < startY)
> @@ -1807,12 +1723,10 @@
>
>  void SAO::getSaoStats(SAOQTPart *psQTPart, int plane)
>  {
> -    int levelIdx, partIdx, typeIdx, classIdx;
> +    int levelIdx, partIdx;
>      int i;
> -    int numTotalType = MAX_NUM_SAO_TYPE;
>      int lcuIdx;
>      int lcuIdy;
> -    int addr;
>      int frameWidthInCU = m_pic->getFrameWidthInCU();
>      int downPartIdx;
>      int partStart;
> @@ -1827,7 +1741,7 @@
>          {
>              for (lcuIdx = onePart->startCUX; lcuIdx <= onePart->endCUX;
> lcuIdx++)
>              {
> -                addr = lcuIdy * frameWidthInCU + lcuIdx;
> +                int addr = lcuIdy * frameWidthInCU + lcuIdx;
>                  calcSaoStatsCu(addr, partIdx, plane);
>              }
>          }
> @@ -1841,7 +1755,7 @@
>              {
>                  for (lcuIdx = onePart->startCUX; lcuIdx <=
> onePart->endCUX; lcuIdx++)
>                  {
> -                    addr = lcuIdy * frameWidthInCU + lcuIdx;
> +                    int addr = lcuIdy * frameWidthInCU + lcuIdx;
>                      calcSaoStatsCu(addr, partIdx, plane);
>                  }
>              }
> @@ -1858,9 +1772,9 @@
>                  for (i = 0; i < SAOQTPart::NUM_DOWN_PART; i++)
>                  {
>                      downPartIdx = onePart->downPartsIdx[i];
> -                    for (typeIdx = 0; typeIdx < numTotalType; typeIdx++)
> +                    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE;
> typeIdx++)
>                      {
> -                        for (classIdx = 0; classIdx < (typeIdx < SAO_BO ?
> s_numClass[typeIdx] : SAO_MAX_BO_CLASSES) + 1; classIdx++)
> +                        for (int classIdx = 0; classIdx < (typeIdx <
> SAO_BO ? SAO_EO_LEN : SAO_NUM_BO_CLASSES) + 1; classIdx++)
>                          {
>                              m_offsetOrg[partIdx][typeIdx][classIdx] +=
> m_offsetOrg[downPartIdx][typeIdx][classIdx];
>                              m_count[partIdx][typeIdx][classIdx]    +=
> m_count[downPartIdx][typeIdx][classIdx];
> @@ -1923,16 +1837,15 @@
>  /* Check merge SAO unit */
>  void SAO::checkMerge(SaoLcuParam * saoUnitCurr, SaoLcuParam *
> saoUnitCheck, int dir)
>  {
> -    int i;
>      int countDiff = 0;
>
>      if (saoUnitCurr->partIdx != saoUnitCheck->partIdx)
>      {
> -        if (saoUnitCurr->typeIdx != -1)
> +        if (saoUnitCurr->typeIdx >= 0)
>          {
>              if (saoUnitCurr->typeIdx == saoUnitCheck->typeIdx)
>              {
> -                for (i = 0; i < saoUnitCurr->length; i++)
> +                for (int i = 0; i < SAO_NUM_OFFSET; i++)
>                      countDiff += (saoUnitCurr->offset[i] !=
> saoUnitCheck->offset[i]);
>
>                  countDiff += (saoUnitCurr->subTypeIdx !=
> saoUnitCheck->subTypeIdx);
> @@ -1979,24 +1892,22 @@
>          oneUnitFlag = 1;
>      else
>      {
> -        int i, j, addr, addrUp, addrLeft,  idx, idxUp, idxLeft,  idxCount;
> -
>          oneUnitFlag = 0;
>
> -        idxCount = -1;
> +        int idxCount = -1;
>          saoLcuParam[0].mergeUpFlag = 0;
>          saoLcuParam[0].mergeLeftFlag = 0;
>
> -        for (j = 0; j < m_numCuInHeight; j++)
> +        for (int j = 0; j < m_numCuInHeight; j++)
>          {
> -            for (i = 0; i < m_numCuInWidth; i++)
> +            for (int i = 0; i < m_numCuInWidth; i++)
>              {
> -                addr     = i + j * m_numCuInWidth;
> -                addrLeft = (addr % m_numCuInWidth == 0) ? -1 : addr - 1;
> -                addrUp   = (addr < m_numCuInWidth)      ? -1 : addr -
> m_numCuInWidth;
> -                idx      = saoLcuParam[addr].partIdxTmp;
> -                idxLeft  = (addrLeft == -1) ? -1 :
> saoLcuParam[addrLeft].partIdxTmp;
> -                idxUp    = (addrUp == -1)   ? -1 :
> saoLcuParam[addrUp].partIdxTmp;
> +                int addr     = i + j * m_numCuInWidth;
> +                int addrUp   = (j == 0) ? -1 : addr - m_numCuInWidth;
> +                int addrLeft = (i == 0) ? -1 : addr - 1;
> +                int idx      = saoLcuParam[addr].partIdxTmp;
> +                int idxLeft  = (addrLeft == -1) ? -1 :
> saoLcuParam[addrLeft].partIdxTmp;
> +                int idxUp    = (addrUp == -1)   ? -1 :
> saoLcuParam[addrUp].partIdxTmp;
>
>                  if (idx != idxLeft && idx != idxUp)
>                  {
> @@ -2057,21 +1968,17 @@
>
>  void SAO::rdoSaoUnitRow(SAOParam *saoParam, int idxY)
>  {
> -    int idxX;
>      int frameWidthInCU  = saoParam->numCuInWidth;
>      int j, k;
> -    int addr = 0;
> -    int addrUp = -1;
> -    int addrLeft = -1;
>      int compIdx = 0;
>      SaoLcuParam mergeSaoParam[3][2];
>      double compDistortion[3];
>
> -    for (idxX = 0; idxX < frameWidthInCU; idxX++)
> +    for (int idxX = 0; idxX < frameWidthInCU; idxX++)
>      {
> -        addr     = idxX  + frameWidthInCU * idxY;
> -        addrUp   = addr < frameWidthInCU ? -1 : idxX     + frameWidthInCU
> * (idxY - 1);
> -        addrLeft = idxX == 0             ? -1 : idxX - 1 + frameWidthInCU
> * idxY;
> +        int addr     = idxX + idxY * frameWidthInCU;
> +        int addrUp   = idxY == 0 ? -1 : addr - frameWidthInCU;
> +        int addrLeft = idxX == 0 ? -1 : addr - 1;
>          int allowMergeLeft = 1;
>          int allowMergeUp   = 1;
>          uint32_t rate;
> @@ -2111,7 +2018,7 @@
>                  }
>              }
>
> -            saoParam->saoLcuParam[compIdx][addr].typeIdx       =  -1;
> +            saoParam->saoLcuParam[compIdx][addr].typeIdx       = -1;
>              saoParam->saoLcuParam[compIdx][addr].mergeUpFlag   = 0;
>              saoParam->saoLcuParam[compIdx][addr].mergeLeftFlag = 0;
>              saoParam->saoLcuParam[compIdx][addr].subTypeIdx    = 0;
> @@ -2173,9 +2080,9 @@
>                  }
>              }
>
> -            if (saoParam->saoLcuParam[0][addr].typeIdx == -1)
> +            if (saoParam->saoLcuParam[0][addr].typeIdx < 0)
>                  m_numNoSao[0]++;
> -            if (saoParam->saoLcuParam[1][addr].typeIdx == -1)
> +            if (saoParam->saoLcuParam[1][addr].typeIdx < 0)
>                  m_numNoSao[1] += 2;
>              m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>              m_entropyCoder.store(m_rdEntropyCoders[0][CI_CURR_BEST]);
> @@ -2187,9 +2094,8 @@
>  inline int64_t SAO::estSaoTypeDist(int compIdx, int typeIdx, int shift,
> double lambda, int32_t *currentDistortionTableBo, double
> *currentRdCostTableBo)
>  {
>      int64_t estDist = 0;
> -    int classIdx;
>
> -    for (classIdx = 1; classIdx < ((typeIdx < SAO_BO) ?
> s_numClass[typeIdx] + 1 : SAO_MAX_BO_CLASSES + 1); classIdx++)
> +    for (int classIdx = 1; classIdx < ((typeIdx < SAO_BO) ?  SAO_EO_LEN +
> 1 : SAO_NUM_BO_CLASSES + 1); classIdx++)
>      {
>          if (typeIdx == SAO_BO)
>          {
> @@ -2200,7 +2106,7 @@
>          {
>              m_offset[compIdx][typeIdx][classIdx] =
> (int64_t)roundIDBI((double)(m_offsetOrg[compIdx][typeIdx][classIdx] <<
> (X265_DEPTH - 8)) / (double)(m_count[compIdx][typeIdx][classIdx] <<
> SAO_BIT_INC));
>              m_offset[compIdx][typeIdx][classIdx] = Clip3(-OFFSET_THRESH +
> 1, OFFSET_THRESH - 1, (int)m_offset[compIdx][typeIdx][classIdx]);
> -            if (typeIdx < 4)
> +            if (typeIdx < SAO_BO)
>              {
>                  if (m_offset[compIdx][typeIdx][classIdx] < 0 && classIdx
> < 3)
>                      m_offset[compIdx][typeIdx][classIdx] = 0;
> @@ -2231,12 +2137,11 @@
>      //Clean up, best_q_offset.
>      int64_t iterOffset, tempOffset;
>      int64_t tempDist, tempRate;
> -    double tempCost, tempMinCost;
>      int64_t offsetOutput = 0;
>
>      iterOffset = offsetInput;
>      // Assuming sending quantized value 0 results in zero offset and
> sending the value zero needs 1 bit. entropy coder can be used to measure
> the exact rate here.
> -    tempMinCost = lambda;
> +    double tempMinCost = lambda;
>      while (iterOffset != 0)
>      {
>          // Calculate the bits required for signalling the offset
> @@ -2247,7 +2152,7 @@
>          // Do the dequntization before distorion calculation
>          tempOffset = iterOffset << bitIncrease;
>          tempDist   = estSaoDist(count, tempOffset, offsetOrg, shift);
> -        tempCost   = ((double)tempDist + lambda * (double)tempRate);
> +        double tempCost   = ((double)tempDist + lambda *
> (double)tempRate);
>          if (tempCost < tempMinCost)
>          {
>              tempMinCost = tempCost;
> @@ -2267,10 +2172,7 @@
>  void SAO::saoComponentParamDist(int allowMergeLeft, int allowMergeUp,
> SAOParam *saoParam, int addr, int addrUp, int addrLeft, int plane,
>                                  SaoLcuParam *compSaoParam, double
> *compDistortion)
>  {
> -    int typeIdx;
> -
>      int64_t estDist;
> -    int classIdx;
>      int64_t bestDist;
>
>      SaoLcuParam* saoLcuParam = &(saoParam->saoLcuParam[plane][addr]);
> @@ -2287,7 +2189,6 @@
>      double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
>
>      SaoLcuParam saoLcuParamRdo;
> -    double estRate = 0;
>
>      resetSaoUnit(&saoLcuParamRdo);
>
> @@ -2298,18 +2199,16 @@
>      copySaoUnit(saoLcuParam, &saoLcuParamRdo);
>      bestDist = 0;
>
> -    for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> +    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
>      {
>          estDist = estSaoTypeDist(plane, typeIdx, 0, m_lumaLambda,
> currentDistortionTableBo, currentRdCostTableBo);
>
>          if (typeIdx == SAO_BO)
>          {
>              // Estimate Best Position
> -            double currentRDCost = 0.0;
> -
> -            for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1; i++)
> +            for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1; i++)
>              {
> -                currentRDCost = 0.0;
> +                double currentRDCost = 0.0;
>                  for (int j = i; j < i + SAO_BO_LEN; j++)
>                      currentRDCost += currentRdCostTableBo[j];
>
> @@ -2323,23 +2222,22 @@
>              // Re code all Offsets
>              // Code Center
>              estDist = 0;
> -            for (classIdx = bestClassTableBo; classIdx < bestClassTableBo
> + SAO_BO_LEN; classIdx++)
> +            for (int classIdx = bestClassTableBo; classIdx <
> bestClassTableBo + SAO_BO_LEN; classIdx++)
>                  estDist += currentDistortionTableBo[classIdx];
>          }
>          resetSaoUnit(&saoLcuParamRdo);
> -        saoLcuParamRdo.length = s_numClass[typeIdx];
>          saoLcuParamRdo.typeIdx = typeIdx;
>          saoLcuParamRdo.mergeLeftFlag = 0;
>          saoLcuParamRdo.mergeUpFlag   = 0;
>          saoLcuParamRdo.subTypeIdx = (typeIdx == SAO_BO) ?
> bestClassTableBo : 0;
> -        for (classIdx = 0; classIdx < saoLcuParamRdo.length; classIdx++)
> +        for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>              saoLcuParamRdo.offset[classIdx] =
> (int)m_offset[plane][typeIdx][classIdx + saoLcuParamRdo.subTypeIdx + 1];
>
>          m_entropyCoder.load(m_rdEntropyCoders[0][CI_TEMP_BEST]);
>          m_entropyCoder.resetBits();
>          m_entropyCoder.codeSaoOffset(&saoLcuParamRdo, plane);
>
> -        estRate = m_entropyCoder.getNumberOfWrittenBits();
> +        uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>          m_cost[plane][typeIdx] = (double)((double)estDist + m_lumaLambda
> * (double)estRate);
>
>          if (m_cost[plane][typeIdx] < dCostPartBest)
> @@ -2367,12 +2265,12 @@
>          if (saoLcuParamNeighbor != NULL)
>          {
>              estDist = 0;
> -            typeIdx = saoLcuParamNeighbor->typeIdx;
> +            int typeIdx = saoLcuParamNeighbor->typeIdx;
>              if (typeIdx >= 0)
>              {
>                  int mergeBandPosition = (typeIdx == SAO_BO) ?
> saoLcuParamNeighbor->subTypeIdx : 0;
>                  int mergeOffset;
> -                for (classIdx = 0; classIdx < s_numClass[typeIdx];
> classIdx++)
> +                for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
> classIdx++)
>                  {
>                      mergeOffset = saoLcuParamNeighbor->offset[classIdx];
>                      estDist +=
> estSaoDist(m_count[plane][typeIdx][classIdx + mergeBandPosition + 1],
> mergeOffset, m_offsetOrg[plane][typeIdx][classIdx + mergeBandPosition +
> 1],  0);
> @@ -2395,8 +2293,6 @@
>  {
>      int64_t estDist[2];
>      int64_t bestDist = 0;
> -    int typeIdx;
> -    int classIdx;
>
>      SaoLcuParam* saoLcuParam[2] = { &(saoParam->saoLcuParam[1][addr]),
> &(saoParam->saoLcuParam[2][addr]) };
>      SaoLcuParam* saoLcuParamNeighbor[2] = { NULL, NULL };
> @@ -2417,7 +2313,6 @@
>      double costPartBest = MAX_DOUBLE;
>      double bestRDCostTableBo;
>      double currentRdCostTableBo[MAX_NUM_SAO_CLASS];
> -    double estRate = 0;
>      int    bestClassTableBo[2] = { 0, 0 };
>      int    currentDistortionTableBo[MAX_NUM_SAO_CLASS];
>
> @@ -2435,19 +2330,18 @@
>      copySaoUnit(saoLcuParam[0], &saoLcuParamRdo[0]);
>      copySaoUnit(saoLcuParam[1], &saoLcuParamRdo[1]);
>
> -    for (typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
> +    for (int typeIdx = 0; typeIdx < MAX_NUM_SAO_TYPE; typeIdx++)
>      {
>          if (typeIdx == SAO_BO)
>          {
>              // Estimate Best Position
>              for (int compIdx = 0; compIdx < 2; compIdx++)
>              {
> -                double currentRDCost = 0.0;
>                  bestRDCostTableBo = MAX_DOUBLE;
>                  estDist[compIdx] = estSaoTypeDist(compIdx + 1, typeIdx,
> 0, m_chromaLambda, currentDistortionTableBo, currentRdCostTableBo);
> -                for (int i = 0; i < SAO_MAX_BO_CLASSES - SAO_BO_LEN + 1;
> i++)
> +                for (int i = 0; i < SAO_NUM_BO_CLASSES - SAO_BO_LEN + 1;
> i++)
>                  {
> -                    currentRDCost = 0.0;
> +                    double currentRDCost = 0.0;
>                      for (int j = i; j < i + SAO_BO_LEN; j++)
>                          currentRDCost += currentRdCostTableBo[j];
>
> @@ -2461,7 +2355,7 @@
>                  // Re code all Offsets
>                  // Code Center
>                  estDist[compIdx] = 0;
> -                for (classIdx = bestClassTableBo[compIdx]; classIdx <
> bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
> +                for (int classIdx = bestClassTableBo[compIdx]; classIdx <
> bestClassTableBo[compIdx] + SAO_BO_LEN; classIdx++)
>                      estDist[compIdx] +=
> currentDistortionTableBo[classIdx];
>              }
>          }
> @@ -2477,18 +2371,17 @@
>          for (int compIdx = 0; compIdx < 2; compIdx++)
>          {
>              resetSaoUnit(&saoLcuParamRdo[compIdx]);
> -            saoLcuParamRdo[compIdx].length = s_numClass[typeIdx];
>              saoLcuParamRdo[compIdx].typeIdx = typeIdx;
>              saoLcuParamRdo[compIdx].mergeLeftFlag = 0;
>              saoLcuParamRdo[compIdx].mergeUpFlag   = 0;
>              saoLcuParamRdo[compIdx].subTypeIdx = (typeIdx == SAO_BO) ?
> bestClassTableBo[compIdx] : 0;
> -            for (classIdx = 0; classIdx < saoLcuParamRdo[compIdx].length;
> classIdx++)
> +            for (int classIdx = 0; classIdx < SAO_NUM_OFFSET; classIdx++)
>                  saoLcuParamRdo[compIdx].offset[classIdx] =
> (int)m_offset[compIdx + 1][typeIdx][classIdx +
> saoLcuParamRdo[compIdx].subTypeIdx + 1];
>
>              m_entropyCoder.codeSaoOffset(&saoLcuParamRdo[compIdx],
> compIdx + 1);
>          }
>
> -        estRate = m_entropyCoder.getNumberOfWrittenBits();
> +        uint32_t estRate = m_entropyCoder.getNumberOfWrittenBits();
>          m_cost[1][typeIdx] = (double)((double)(estDist[0] + estDist[1]) +
> m_chromaLambda * (double)estRate);
>
>          if (m_cost[1][typeIdx] < costPartBest)
> @@ -2520,11 +2413,11 @@
>              if (saoLcuParamNeighbor[compIdx] != NULL)
>              {
>                  estDist[compIdx] = 0;
> -                typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;
> +                int typeIdx = saoLcuParamNeighbor[compIdx]->typeIdx;
>                  if (typeIdx >= 0)
>                  {
>                      int mergeBandPosition = (typeIdx == SAO_BO) ?
> saoLcuParamNeighbor[compIdx]->subTypeIdx : 0;
> -                    for (classIdx = 0; classIdx < s_numClass[typeIdx];
> classIdx++)
> +                    for (int classIdx = 0; classIdx < SAO_NUM_OFFSET;
> classIdx++)
>                      {
>                          int mergeOffset =
> saoLcuParamNeighbor[compIdx]->offset[classIdx];
>                          estDist[compIdx] += estSaoDist(m_count[compIdx +
> 1][typeIdx][classIdx + mergeBandPosition + 1], mergeOffset,
> m_offsetOrg[compIdx + 1][typeIdx][classIdx + mergeBandPosition + 1],  0);
> diff -r 7e29b10982d2 -r 8a2312df90f9 source/encoder/sao.h
> --- a/source/encoder/sao.h      Thu Sep 11 19:24:28 2014 +0530
> +++ b/source/encoder/sao.h      Fri Sep 12 11:01:54 2014 +0900
> @@ -36,7 +36,7 @@
>  {
>      SAO_EO_LEN = 4,
>      SAO_BO_LEN = 4,
> -    SAO_MAX_BO_CLASSES = 32
> +    SAO_NUM_BO_CLASSES = 32
>  };
>
>  enum SAOType
> @@ -55,15 +55,13 @@
>
>      enum { SAO_MAX_DEPTH = 4 };
>      enum { SAO_BO_BITS  = 5 };
> -    enum { LUMA_GROUP_NUM = 1 << SAO_BO_BITS };
> -    enum { MAX_NUM_SAO_OFFSETS = 4 };
>      enum { MAX_NUM_SAO_CLASS = 33 };
>      enum { SAO_BIT_INC = X265_MAX(X265_DEPTH - 10, 0) };
>      enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
> +    enum { NUM_EDGETYPE = 5 };
>
>      static const int      s_numCulPartsLevel[5];
> -    static const int      s_numClass[MAX_NUM_SAO_TYPE];
> -    static const uint32_t s_eoTable[9];
> +    static const uint32_t s_eoTable[NUM_EDGETYPE];
>
>      typedef int64_t (PerClass[MAX_NUM_SAO_TYPE][MAX_NUM_SAO_CLASS]);
>      typedef int64_t (PerType[MAX_NUM_SAO_TYPE]);
> @@ -86,9 +84,8 @@
>      PerPlane*   m_offsetOrgPreDblk;
>
>      double      m_depthSaoRate[2][4];
> -    int32_t*    m_offsetBo;
> -    int32_t*    m_chromaOffsetBo;
> -    int8_t      m_offsetEo[LUMA_GROUP_NUM];
> +    pixel*      m_offsetBo;
> +    int8_t      m_offsetEo[NUM_EDGETYPE];
>
>      int         m_maxSplitLevel;
>
> @@ -100,7 +97,6 @@
>
>      pixel*      m_clipTable;
>      pixel*      m_clipTableBase;
> -    pixel*      m_tableBo;
>
>      pixel*      m_tmpU1[3];
>      pixel*      m_tmpU2[3];
> _______________________________________________
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to