Ashok/Santhoshini - pls review. Does removing offsets affect any planned optimizations?
On Sat, Sep 27, 2014 at 7:03 AM, <[email protected]> wrote: > # HG changeset patch > # User David T Yuen <[email protected]> > # Date 1411781537 25200 > # Node ID 85098db291ae133981419868685358227b8b1437 > # Parent 4b18a27b52ac69a16805c2b455d4f891cdd4a057 > Changes for loadCTUData > > Replaced getDepthScanIdx() with table g_depthScanIdx > Moved Analysis::loadCTUData to TComDataCU::loadCTUData since it only works > with TComDataCU fields > Replaced CU.offsets[2] with local variables in loadCTUData since that is > the only place it was set and used > minor changes to reduce the number of local variables in loadCTUData > > diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.cpp > --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 10:48:07 2014 > +0530 > +++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 18:32:17 2014 > -0700 > @@ -2407,4 +2407,43 @@ > result.firstSignificanceMapContext = bIsLuma ? 21 : 12; > } > > +void TComDataCU::loadCTUData(uint32_t maxCUSize) > +{ > + // Initialize the coding blocks inside the CTB > + for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; > log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--) > + { > + uint32_t blockSize = 1 << log2CUSize; > + uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); > + int32_t last_level_flag = log2CUSize == MIN_LOG2_CU_SIZE; > + for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++) > + { > + for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++) > + { > + uint32_t depth_idx = g_depthScanIdx[sb_y][sb_x]; > + uint32_t cuIdx = rangeCUIdx + depth_idx; > + uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + > (depth_idx << 2); > + uint32_t px = m_cuPelX + sb_x * blockSize; > + uint32_t py = m_cuPelY + sb_y * blockSize; > + int32_t present_flag = px < > m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight; > + int32_t split_mandatory_flag = present_flag && > !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py > + blockSize > m_pic->m_origPicYuv->m_picHeight); > + > + /* Offset of the luma CU in the X, Y direction in terms > of pixels from the CTU origin */ > + uint32_t xOffset = (sb_x * blockSize) >> 3; > + uint32_t yOffset = (sb_y * blockSize) >> 3; > + > + CU *cu = m_CULocalData + cuIdx; > + cu->log2CUSize = log2CUSize; > + cu->childIdx = child_idx; > + cu->encodeIdx = g_depthScanIdx[yOffset][xOffset]; > + cu->flags = 0; > + > + CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag); > + CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT, > split_mandatory_flag); > + CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag); > + } > + } > + rangeCUIdx += sbWidth * sbWidth; > + } > +} > + > //! \} > diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.h > --- a/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 10:48:07 2014 > +0530 > +++ b/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 18:32:17 2014 > -0700 > @@ -114,7 +114,6 @@ > uint32_t log2CUSize; // Log of the CU size. > uint32_t childIdx; // Index of the first child CU > uint32_t encodeIdx; // Encoding index of this CU in terms of 8x8 > blocks. > - uint32_t offset[2]; // Offset of the luma CU in the X, Y direction > in terms of pixels from the CTU origin > uint32_t flags; // CU flags. > }; > > @@ -274,6 +273,7 @@ > void initCU(Frame* pic, uint32_t cuAddr); > void initEstData(); > void initSubCU(TComDataCU* cu, uint32_t partUnitIdx, > uint32_t depth, int qp); > + void loadCTUData(uint32_t maxCUSize); > > void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, > uint32_t depth); > void copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, > uint32_t depth, bool isRDObasedAnalysis = true); > diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.cpp > --- a/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 10:48:07 2014 +0530 > +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 18:32:17 2014 -0700 > @@ -517,5 +517,18 @@ > {256, 64, 16, 4} > }; > > +/* g_depthScanIdx [y][x] */ > +const uint32_t g_depthScanIdx[8][8] = > +{ > + { 0, 1, 4, 5, 16, 17, 20, 21, }, > + { 2, 3, 6, 7, 18, 19, 22, 23, }, > + { 8, 9, 12, 13, 24, 25, 28, 29, }, > + { 10, 11, 14, 15, 26, 27, 30, 31, }, > + { 32, 33, 36, 37, 48, 49, 52, 53, }, > + { 34, 35, 38, 39, 50, 51, 54, 55, }, > + { 40, 41, 44, 45, 56, 57, 60, 61, }, > + { 42, 43, 46, 47, 58, 59, 62, 63, } > +}; > + > } > //! \} > diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.h > --- a/source/Lib/TLibCommon/TComRom.h Fri Sep 26 10:48:07 2014 +0530 > +++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 26 18:32:17 2014 -0700 > @@ -159,6 +159,8 @@ > > extern const uint32_t g_depthInc[3][4]; > > +extern const uint32_t g_depthScanIdx[8][8]; > + > } > > #endif //ifndef X265_TCOMROM_H > diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Fri Sep 26 10:48:07 2014 +0530 > +++ b/source/encoder/analysis.cpp Fri Sep 26 18:32:17 2014 -0700 > @@ -30,32 +30,6 @@ > > using namespace x265; > > -namespace { > -// TO DO: Remove this function with a table. > -int getDepthScanIdx(int x, int y, int size) > -{ > - if (size == 1) > - return 0; > - > - int depth = 0; > - int h = size >> 1; > - > - if (x >= h) > - { > - x -= h; > - depth += h * h; > - } > - > - if (y >= h) > - { > - y -= h; > - depth += 2 * h * h; > - } > - > - return depth + getDepthScanIdx(x, y, h); > -} > -} > - > Analysis::Analysis() > { > m_bestPredYuv = NULL; > @@ -253,47 +227,6 @@ > delete [] m_origYuv; > } > > -void Analysis::loadCTUData(TComDataCU* parentCU) > -{ > - uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE, > g_log2Size[m_param->maxCUSize]}; > - > - // Initialize the coding blocks inside the CTB > - for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >= > cuRange[0]; rangeIdx--) > - { > - uint32_t log2CUSize = rangeIdx; > - int32_t blockSize = 1 << log2CUSize; > - uint32_t b8Width = 1 << (cuRange[1] - 3); > - uint32_t sbWidth = 1 << (cuRange[1] - rangeIdx); > - int32_t last_level_flag = rangeIdx == cuRange[0]; > - for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++) > - { > - for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++) > - { > - uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y, sbWidth); > - uint32_t cuIdx = rangeCUIdx + depth_idx; > - uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + > (depth_idx << 2); > - int32_t px = parentCU->getCUPelX() + sb_x * blockSize; > - int32_t py = parentCU->getCUPelY() + sb_y * blockSize; > - int32_t present_flag = px < > parentCU->m_pic->m_origPicYuv->m_picWidth && py < > parentCU->m_pic->m_origPicYuv->m_picHeight; > - int32_t split_mandatory_flag = present_flag && > !last_level_flag && (px + blockSize > > parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize > > parentCU->m_pic->m_origPicYuv->m_picHeight); > - > - CU *cu = parentCU->m_CULocalData + cuIdx; > - cu->log2CUSize = log2CUSize; > - cu->childIdx = child_idx; > - cu->offset[0] = sb_x * blockSize; > - cu->offset[1] = sb_y * blockSize; > - cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3, > cu->offset[1] >> 3, b8Width); > - cu->flags = 0; > - > - CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag); > - CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT, > split_mandatory_flag); > - CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag); > - } > - } > - rangeCUIdx += sbWidth * sbWidth; > - } > -} > - > void Analysis::compressCU(TComDataCU* cu) > { > Frame* pic = cu->m_pic; > diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.h > --- a/source/encoder/analysis.h Fri Sep 26 10:48:07 2014 +0530 > +++ b/source/encoder/analysis.h Fri Sep 26 18:32:17 2014 -0700 > @@ -104,7 +104,6 @@ > bool create(uint32_t totalDepth, uint32_t maxWidth); > void destroy(); > void compressCU(TComDataCU* cu); > - void loadCTUData(TComDataCU* cu); > > protected: > > diff -r 4b18a27b52ac -r 85098db291ae source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Fri Sep 26 10:48:07 2014 +0530 > +++ b/source/encoder/frameencoder.cpp Fri Sep 26 18:32:17 2014 -0700 > @@ -686,7 +686,7 @@ > // load current best state from go-on entropy coder > curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder); > > - tld.analysis.loadCTUData(cu); > + cu->loadCTUData(m_param->maxCUSize); > tld.analysis.m_quant.setQPforQuant(cu); > tld.analysis.compressCU(cu); // Does all the CU analysis > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
