Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()
Ok, thanks. please send a follow-on patch cleaning up both encodeCU and compressCu functions. On Tue, Sep 16, 2014 at 11:32 AM, Santhoshini Sekar santhosh...@multicorewareinc.com wrote: On Tue, Sep 16, 2014 at 10:56 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: On Tue, Sep 16, 2014 at 9:45 AM, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410840429 -19800 # Tue Sep 16 09:37:09 2014 +0530 # Node ID 50505472d3e33b775c70f2f373e1c15d17e47e66 # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Tue Sep 16 09:37:09 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppTue Sep 16 09:37:09 2014 +0530 @@ -481,14 +481,14 @@ } } -void Entropy::encodeCTU(TComDataCU* cu) +void Entropy::encodeCTU(TComDataCU* cu, CU* cuData) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cuData); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,24 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cuSplitFlag = !(cuData-flags CU::LEAF); +int cuUnsplitFlag = !(cuData-flags CU::SPLIT_MANDATORY); + +if (!cuUnsplitFlag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +int cuPresentFlagChild = !(childCU-flags CU::PRESENT); +if (!cuPresentFlagChild) +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cuSplitFlag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +521,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r 50505472d3e3 source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Tue Sep 16 09:37:09 2014 +0530 @@ -148,7 +148,7 @@ void codeShortTermRefPicSet(RPS* rps); void finishSlice() { encodeBinTrm(1); finish(); dynamic_castBitstream*(m_bitIf)-writeByteAlignment(); } -void encodeCTU(TComDataCU* cu); +void encodeCTU(TComDataCU* cu, CU *cuData); void codeSaoOffset(SaoLcuParam* saoLcuParam, uint32_t compIdx); void codeSaoUnitInterleaving(int compIdx, bool saoFlag, int rx, int ry, SaoLcuParam* saoLcuParam, int cuAddrInSlice, int cuAddrUpInSlice, int allowMergeLeft, int allowMergeUp); void codeSaoMerge(uint32_t code) { encodeBin(code,
[x265] [PATCH] analysis: add CU specific details to encodeCU()
# HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410848612 -19800 # Tue Sep 16 11:53:32 2014 +0530 # Node ID 74b5192133a548c492b8b2cb34dde8242107900e # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Tue Sep 16 11:53:32 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/entropy.cpp --- a/source/encoder/entropy.cppThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cppTue Sep 16 11:53:32 2014 +0530 @@ -484,11 +484,11 @@ void Entropy::encodeCTU(TComDataCU* cu) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cu-m_CULocalData ); } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,24 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cuSplitFlag = !(cuData-flags CU::LEAF); +int cuUnsplitFlag = !(cuData-flags CU::SPLIT_MANDATORY); + +if (!cuUnsplitFlag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +int cuPresentFlagChild = !(childCU-flags CU::PRESENT); +if (!cuPresentFlagChild) +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cuSplitFlag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +521,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/entropy.h --- a/source/encoder/entropy.h Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.h Tue Sep 16 11:53:32 2014 +0530 @@ -193,7 +193,7 @@ void encodeBinsEP(uint32_t binValues, int numBins); void encodeBinTrm(uint32_t binValue); -void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP); +void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU *cuData); void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth); void writeOut(); diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/frameencoder.cpp Tue Sep 16 11:53:32 2014 +0530 @@ -470,6 +470,7 @@ } } +m_tld.cuCoder.loadCTUData(cu); // final coding (bitstream generation) for this CU m_entropyCoder.encodeCTU(cu); @@ -689,6 +690,7 @@ // load current best state from go-on entropy coder curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder); +tld.cuCoder.loadCTUData(cu); tld.cuCoder.m_quant.setQPforQuant(cu); tld.cuCoder.compressCU(cu); // Does all the CU
[x265] Fwd: [PATCH] denoiseDct: unit test code
-- Forwarded message -- From: Steve Borho st...@borho.org Date: Mon, Sep 15, 2014 at 4:28 PM Subject: Re: [x265] [PATCH] denoiseDct: unit test code To: Development for x265 x265-devel@videolan.org On 09/15, prav...@multicorewareinc.com wrote: # HG changeset patch # User Praveen Tiwari # Date 1410775657 -19800 # Node ID 36f5477f54ba8047f9abc1b42c5b56c6d223dc5a # Parent 184e56afa951815f4e295b4fcce094ee03361a2e denoiseDct: unit test code a few nits and questions diff -r 184e56afa951 -r 36f5477f54ba source/test/mbdstharness.cpp --- a/source/test/mbdstharness.cppFri Sep 12 12:02:46 2014 +0530 +++ b/source/test/mbdstharness.cppMon Sep 15 15:37:37 2014 +0530 @@ -66,14 +66,17 @@ short_test_buff[0][i]= (rand() PIXEL_MAX) - (rand() PIXEL_MAX); int_test_buff[0][i] = rand() % PIXEL_MAX; int_idct_test_buff[0][i] = (rand() % (SHORT_MAX - SHORT_MIN)) - SHORT_MAX; +int_denoise_test_buff1[0][i] = int_denoise_test_buff2[0][i] = (rand() UNSIGNED_SHORT_MAX) - (rand() UNSIGNED_SHORT_MAX); short_test_buff[1][i]= -PIXEL_MAX; int_test_buff[1][i] = -PIXEL_MAX; int_idct_test_buff[1][i] = SHORT_MIN; +int_denoise_test_buff1[1][i] = int_denoise_test_buff2[1][i] = -UNSIGNED_SHORT_MAX; short_test_buff[2][i]= PIXEL_MAX; int_test_buff[2][i] = PIXEL_MAX; int_idct_test_buff[2][i] = SHORT_MAX; +int_denoise_test_buff1[2][i] = int_denoise_test_buff2[1][i] = UNSIGNED_SHORT_MAX; mbuf1[i] = rand() PIXEL_MAX; mbufdct[i] = (rand() PIXEL_MAX) - (rand() PIXEL_MAX); @@ -313,6 +316,46 @@ return true; } +bool MBDstHarness::check_denoise_dct_primitive(denoiseDct_t ref, denoiseDct_t opt) +{ +int j = 0; + +for (int i = 0; i 4; i++) +{ +int log2TrSize = i + 2; +int num = 1 (log2TrSize * 2); This loop second confuses me? what's the point of it? +for (int n = 0; n = num; n++) +{ +memset(mubuf1, 0, num * sizeof(uint32_t)); +memset(mubuf2, 0, num * sizeof(uint32_t)); +memset(mushortbuf1, 0, num * sizeof(uint16_t)); + +for (int k = 0; k n; j++) +{ +mushortbuf1[k] = rand() % UNSIGNED_SHORT_MAX; +} we don't use braces for single-line expressions +int index = rand() % TEST_CASES; +int cmp_size = sizeof(int) * num; + +ref(int_denoise_test_buff1[index] + j, mubuf1, mushortbuf1, num); +checked(opt, int_denoise_test_buff2[index] + j, mubuf2, mushortbuf1, num); + +if (memcmp(int_denoise_test_buff1[index] + j, int_denoise_test_buff2[index] + j, cmp_size)) +return false; white-space +if (memcmp(mubuf1, mubuf2, cmp_size)) +return false; + +reportfail(); +j += INCR; is this bounds safe? TEST_BUF_SIZE is allocated for a max of ITERS iterations (128). It seems like num can be 32*32. +} +} + +return true; +} + bool MBDstHarness::testCorrectness(const EncoderPrimitives ref, const EncoderPrimitives opt) { for (int i = 0; i NUM_DCTS; i++) @@ -393,6 +436,15 @@ } } +if (opt.denoiseDct) +{ +if (!check_denoise_dct_primitive(ref.denoiseDct, opt.denoiseDct)) +{ +printf(denoiseDct: Failed!\n); +return false; +} +} + return true; } @@ -448,4 +500,10 @@ REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbuf1, i * i) } } + +if (opt.denoiseDct) +{ +printf(denoiseDct\t\t); +REPORT_SPEEDUP(opt.denoiseDct, ref.denoiseDct, int_denoise_test_buff1[0], mubuf1, mushortbuf1, 32 * 32); +} } diff -r 184e56afa951 -r 36f5477f54ba source/test/mbdstharness.h --- a/source/test/mbdstharness.h Fri Sep 12 12:02:46 2014 +0530 +++ b/source/test/mbdstharness.h Mon Sep 15 15:37:37 2014 +0530 @@ -44,6 +44,10 @@ int16_t mbufdct[TEST_BUF_SIZE]; int mbufidct[TEST_BUF_SIZE]; +ALIGN_VAR_32(uint32_t, mubuf1[MAX_TU_SIZE]); +ALIGN_VAR_32(uint32_t, mubuf2[MAX_TU_SIZE]); +ALIGN_VAR_32(uint16_t, mushortbuf1[MAX_TU_SIZE]); does denoise need all new buffers? can it reuse existing buffers? I need unsigned buffers, so I prepared to attain new ones over interpreting sign buffer as unsign using type casting, the residuum of the things I have update in my patch. There's no need to declare them aligned here. The first array is declared aligned and since all below it are also aligned in size every array is implicitly aligned. int16_t mshortbuf2[MAX_TU_SIZE]; int16_t mshortbuf3[MAX_TU_SIZE]; @@ -56,6 +60,9 @@ int int_test_buff[TEST_CASES][TEST_BUF_SIZE]; int int_idct_test_buff[TEST_CASES][TEST_BUF_SIZE]; +int
[x265] [PATCH] denoiseDct: test bench code
# HG changeset patch # User Praveen Tiwari # Date 1410850230 -19800 # Node ID 4459645048ab655734a7544c7b10d904bb8d9e46 # Parent 1de67321275e70d510f0df3d5b7d4b9d391a1e66 denoiseDct: test bench code diff -r 1de67321275e -r 4459645048ab source/test/mbdstharness.cpp --- a/source/test/mbdstharness.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/test/mbdstharness.cpp Tue Sep 16 12:20:30 2014 +0530 @@ -66,14 +66,17 @@ short_test_buff[0][i]= (rand() PIXEL_MAX) - (rand() PIXEL_MAX); int_test_buff[0][i] = rand() % PIXEL_MAX; int_idct_test_buff[0][i] = (rand() % (SHORT_MAX - SHORT_MIN)) - SHORT_MAX; +int_denoise_test_buff1[0][i] = int_denoise_test_buff2[0][i] = (rand() UNSIGNED_SHORT_MAX) - (rand() UNSIGNED_SHORT_MAX); short_test_buff[1][i]= -PIXEL_MAX; int_test_buff[1][i] = -PIXEL_MAX; int_idct_test_buff[1][i] = SHORT_MIN; +int_denoise_test_buff1[1][i] = int_denoise_test_buff2[1][i] = -UNSIGNED_SHORT_MAX; short_test_buff[2][i]= PIXEL_MAX; int_test_buff[2][i] = PIXEL_MAX; int_idct_test_buff[2][i] = SHORT_MAX; +int_denoise_test_buff1[2][i] = int_denoise_test_buff2[1][i] = UNSIGNED_SHORT_MAX; mbuf1[i] = rand() PIXEL_MAX; mbufdct[i] = (rand() PIXEL_MAX) - (rand() PIXEL_MAX); @@ -313,6 +316,45 @@ return true; } +bool MBDstHarness::check_denoise_dct_primitive(denoiseDct_t ref, denoiseDct_t opt) +{ +int j = 0; + +for (int i = 0; i 4; i++) +{ +int log2TrSize = i + 2; +int num = 1 (log2TrSize * 2); +int cmp_size = sizeof(int) * num; + +for (int i = 0; i ITERS; i++) +{ +memset(mubuf1, 0, num * sizeof(uint32_t)); +memset(mubuf2, 0, num * sizeof(uint32_t)); +memset(mushortbuf1, 0, num * sizeof(uint16_t)); + +for (int k = 0; k num; j++) +mushortbuf1[k] = rand() % UNSIGNED_SHORT_MAX; + +int index = rand() % TEST_CASES; + +ref(int_denoise_test_buff1[index] + j, mubuf1, mushortbuf1, num); +checked(opt, int_denoise_test_buff2[index] + j, mubuf2, mushortbuf1, num); + +if (memcmp(int_denoise_test_buff1[index] + j, int_denoise_test_buff2[index] + j, cmp_size)) +return false; + +if (memcmp(mubuf1, mubuf2, cmp_size)) +return false; + +reportfail(); +j += INCR; +} +} + +return true; +} + + bool MBDstHarness::testCorrectness(const EncoderPrimitives ref, const EncoderPrimitives opt) { for (int i = 0; i NUM_DCTS; i++) @@ -393,6 +435,15 @@ } } +if (opt.denoiseDct) +{ +if (!check_denoise_dct_primitive(ref.denoiseDct, opt.denoiseDct)) +{ +printf(denoiseDct: Failed!\n); +return false; +} +} + return true; } @@ -448,4 +499,11 @@ REPORT_SPEEDUP(opt.count_nonzero, ref.count_nonzero, mbuf1, i * i) } } + +if (opt.denoiseDct) +{ +printf(denoiseDct\t\t); +REPORT_SPEEDUP(opt.denoiseDct, ref.denoiseDct, int_denoise_test_buff1[0], mubuf1, mushortbuf1, 32 * 32); +} + } diff -r 1de67321275e -r 4459645048ab source/test/mbdstharness.h --- a/source/test/mbdstharness.hMon Sep 15 15:00:13 2014 +0200 +++ b/source/test/mbdstharness.hTue Sep 16 12:20:30 2014 +0530 @@ -56,6 +56,13 @@ int int_test_buff[TEST_CASES][TEST_BUF_SIZE]; int int_idct_test_buff[TEST_CASES][TEST_BUF_SIZE]; +uint32_t mubuf1[MAX_TU_SIZE]; +uint32_t mubuf2[MAX_TU_SIZE]; +uint16_t mushortbuf1[MAX_TU_SIZE]; + +int int_denoise_test_buff1[TEST_CASES][TEST_BUF_SIZE]; +int int_denoise_test_buff2[TEST_CASES][TEST_BUF_SIZE]; + bool check_dequant_primitive(dequant_scaling_t ref, dequant_scaling_t opt); bool check_dequant_primitive(dequant_normal_t ref, dequant_normal_t opt); bool check_quant_primitive(quant_t ref, quant_t opt); @@ -63,6 +70,7 @@ bool check_dct_primitive(dct_t ref, dct_t opt, intptr_t width); bool check_idct_primitive(idct_t ref, idct_t opt, intptr_t width); bool check_count_nonzero_primitive(count_nonzero_t ref, count_nonzero_t opt); +bool check_denoise_dct_primitive(denoiseDct_t ref, denoiseDct_t opt); public: diff -r 1de67321275e -r 4459645048ab source/test/testharness.h --- a/source/test/testharness.h Mon Sep 15 15:00:13 2014 +0200 +++ b/source/test/testharness.h Tue Sep 16 12:20:30 2014 +0530 @@ -40,6 +40,7 @@ #define PIXEL_MIN 0 #define SHORT_MAX 32767 #define SHORT_MIN -32767 +#define UNSIGNED_SHORT_MAX 65535 using namespace x265; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] add fanout validation module to check param compatibility
# HG changeset patch # User Sagar Kotecha sa...@multicorewareinc.com # Date 1410852729 -19800 # Tue Sep 16 13:02:09 2014 +0530 # Node ID b9b5032d1608b04e6969cad794e9d31a07813168 # Parent 67ee212bbf78f7192e5c5af6b53304468bfa55b1 add fanout validation module to check param compatibility diff -r 67ee212bbf78 -r b9b5032d1608 source/common/param.cpp --- a/source/common/param.cpp Mon Sep 15 16:09:52 2014 +0530 +++ b/source/common/param.cpp Tue Sep 16 13:02:09 2014 +0530 @@ -1187,7 +1187,7 @@ { char *buf, *s; -buf = s = X265_MALLOC(char, 2000); +buf = s = X265_MALLOC(char, MAXPARAMSIZE); if (!buf) return NULL; diff -r 67ee212bbf78 -r b9b5032d1608 source/common/param.h --- a/source/common/param.h Mon Sep 15 16:09:52 2014 +0530 +++ b/source/common/param.h Tue Sep 16 13:02:09 2014 +0530 @@ -38,6 +38,8 @@ /* this table is kept internal to avoid confusion, since log level indices start at -1 */ static const char * const logLevelNames[] = { none, error, warning, info, debug, full, 0 }; + +#define MAXPARAMSIZE 2000 } #endif // ifndef X265_PARAM_H diff -r 67ee212bbf78 -r b9b5032d1608 source/x265.cpp --- a/source/x265.cpp Mon Sep 15 16:09:52 2014 +0530 +++ b/source/x265.cpp Tue Sep 16 13:02:09 2014 +0530 @@ -227,6 +227,7 @@ uint32_t framesToBeEncoded; // number of frames to encode uint64_t totalbytes; size_t analysisRecordSize; // number of bytes read from or dumped into file +size_t analysisHeaderSize; int64_t startTime; int64_t prevUpdateTime; @@ -262,6 +263,7 @@ bool parseQPFile(x265_picture pic_org); void readAnalysisFile(x265_picture* pic, x265_param*); void writeAnalysisFile(x265_picture* pic, x265_param*); +bool validateFanout(x265_param*); }; void CLIOptions::destroy() @@ -755,6 +757,92 @@ return false; } +bool CLIOptions::validateFanout(x265_param *param) +{ +#define CMP_OPT_FANOUT(opt, param_val)\ +{\ +bErr = 0;\ +p = strstr(opts, opt =);\ +char* q = strstr(opts, no-opt);\ +if (p sscanf(p, opt =%d , i) param_val != i)\ +bErr = 1;\ +else if (!param_val !q)\ +bErr = 1;\ +else if (param_val (q || !strstr(opts, opt)))\ +bErr = 1;\ +if (bErr)\ +{\ +x265_log(param, X265_LOG_ERROR, different opt setting than given in analysis file (%d vs %d)\n, param_val, i);\ +X265_FREE(opts);\ +return false;\ +}\ +} + +char *p = NULL, *paramBuf, *opts; +int i, j; +uint32_t k , l; +bool bErr = false; + +opts = paramBuf = X265_MALLOC(char, MAXPARAMSIZE); +if (!paramBuf) +return false; + +fread(paramBuf, 1, MAXPARAMSIZE, this-analysisFile); + +/* check whether fanout options are compatible */ +if (strncmp(paramBuf, #options:, 9)) +{ +x265_log(param, X265_LOG_ERROR, options list in analysis file is not valid\n); +return false; +} + +paramBuf = strchr(opts, '\n'); +fseek(this-analysisFile, long(strlen(opts) + 1), SEEK_SET); +if (!paramBuf) +{ +x265_log(param, X265_LOG_ERROR, Malformed analysis file\n); +return false; +} + +if (sscanf(opts, #options: %dx%d, i, j) != 2) +{ +x265_log(param, X265_LOG_ERROR, Resolution specified in analysis file is not valid\n); +X265_FREE(opts); +return false; +} +if ((p = strstr(opts, fps=)) == 0 || sscanf(p, fps=%u/%u, k, l) != 2) +{ +x265_log(param, X265_LOG_ERROR, fps specified in analysis file is not valid\n); +X265_FREE(opts); +return false; +} +if (k != param-fpsNum || l != param-fpsDenom) +{ +x265_log(param, X265_LOG_ERROR, fps mismatch than given in analysis file (%u/%u vs %u/%u)\n, +param-fpsNum, param-fpsDenom, k, l); +X265_FREE(opts); +return false; +} + +CMP_OPT_FANOUT(bitdepth, param-internalBitDepth); +CMP_OPT_FANOUT(weightp, param-bEnableWeightedPred); +CMP_OPT_FANOUT(bframes, param-bframes); +CMP_OPT_FANOUT(b-pyramid, param-bBPyramid); +CMP_OPT_FANOUT(b-adapt, param-bFrameAdaptive); +CMP_OPT_FANOUT(open-gop, param-bOpenGOP); +CMP_OPT_FANOUT(keyint, param-keyframeMax); +CMP_OPT_FANOUT(min-keyint, param-keyframeMin); +CMP_OPT_FANOUT(scenecut, param-scenecutThreshold); +CMP_OPT_FANOUT(ctu, (int)param-maxCUSize); +CMP_OPT_FANOUT(ref, param-maxNumReferences); +CMP_OPT_FANOUT(rc-lookahead, param-lookaheadDepth); + +#undef CMP_OPT_FANOUT + +X265_FREE(opts); +return true; +} + void CLIOptions::readAnalysisFile(x265_picture* pic, x265_param* p) { int poc, width, height; @@ -788,7 +876,7 @@ void CLIOptions::writeAnalysisFile(x265_picture* pic, x265_param *p) { -uint64_t seekTo = pic-poc * this-analysisRecordSize; +uint64_t seekTo = pic-poc * this-analysisRecordSize + this-analysisHeaderSize;
Re: [x265] [PATCH] add fanout validation module to check param compatibility
Ignore patch. With Regards, Sagar On Tue, Sep 16, 2014 at 1:06 PM, sa...@multicorewareinc.com wrote: # HG changeset patch # User Sagar Kotecha sa...@multicorewareinc.com # Date 1410852729 -19800 # Tue Sep 16 13:02:09 2014 +0530 # Node ID b9b5032d1608b04e6969cad794e9d31a07813168 # Parent 67ee212bbf78f7192e5c5af6b53304468bfa55b1 add fanout validation module to check param compatibility diff -r 67ee212bbf78 -r b9b5032d1608 source/common/param.cpp --- a/source/common/param.cpp Mon Sep 15 16:09:52 2014 +0530 +++ b/source/common/param.cpp Tue Sep 16 13:02:09 2014 +0530 @@ -1187,7 +1187,7 @@ { char *buf, *s; -buf = s = X265_MALLOC(char, 2000); +buf = s = X265_MALLOC(char, MAXPARAMSIZE); if (!buf) return NULL; diff -r 67ee212bbf78 -r b9b5032d1608 source/common/param.h --- a/source/common/param.h Mon Sep 15 16:09:52 2014 +0530 +++ b/source/common/param.h Tue Sep 16 13:02:09 2014 +0530 @@ -38,6 +38,8 @@ /* this table is kept internal to avoid confusion, since log level indices start at -1 */ static const char * const logLevelNames[] = { none, error, warning, info, debug, full, 0 }; + +#define MAXPARAMSIZE 2000 } #endif // ifndef X265_PARAM_H diff -r 67ee212bbf78 -r b9b5032d1608 source/x265.cpp --- a/source/x265.cpp Mon Sep 15 16:09:52 2014 +0530 +++ b/source/x265.cpp Tue Sep 16 13:02:09 2014 +0530 @@ -227,6 +227,7 @@ uint32_t framesToBeEncoded; // number of frames to encode uint64_t totalbytes; size_t analysisRecordSize; // number of bytes read from or dumped into file +size_t analysisHeaderSize; int64_t startTime; int64_t prevUpdateTime; @@ -262,6 +263,7 @@ bool parseQPFile(x265_picture pic_org); void readAnalysisFile(x265_picture* pic, x265_param*); void writeAnalysisFile(x265_picture* pic, x265_param*); +bool validateFanout(x265_param*); }; void CLIOptions::destroy() @@ -755,6 +757,92 @@ return false; } +bool CLIOptions::validateFanout(x265_param *param) +{ +#define CMP_OPT_FANOUT(opt, param_val)\ +{\ +bErr = 0;\ +p = strstr(opts, opt =);\ +char* q = strstr(opts, no-opt);\ +if (p sscanf(p, opt =%d , i) param_val != i)\ +bErr = 1;\ +else if (!param_val !q)\ +bErr = 1;\ +else if (param_val (q || !strstr(opts, opt)))\ +bErr = 1;\ +if (bErr)\ +{\ +x265_log(param, X265_LOG_ERROR, different opt setting than given in analysis file (%d vs %d)\n, param_val, i);\ +X265_FREE(opts);\ +return false;\ +}\ +} + +char *p = NULL, *paramBuf, *opts; +int i, j; +uint32_t k , l; +bool bErr = false; + +opts = paramBuf = X265_MALLOC(char, MAXPARAMSIZE); +if (!paramBuf) +return false; + +fread(paramBuf, 1, MAXPARAMSIZE, this-analysisFile); + +/* check whether fanout options are compatible */ +if (strncmp(paramBuf, #options:, 9)) +{ +x265_log(param, X265_LOG_ERROR, options list in analysis file is not valid\n); +return false; +} + +paramBuf = strchr(opts, '\n'); +fseek(this-analysisFile, long(strlen(opts) + 1), SEEK_SET); +if (!paramBuf) +{ +x265_log(param, X265_LOG_ERROR, Malformed analysis file\n); +return false; +} + +if (sscanf(opts, #options: %dx%d, i, j) != 2) +{ +x265_log(param, X265_LOG_ERROR, Resolution specified in analysis file is not valid\n); +X265_FREE(opts); +return false; +} +if ((p = strstr(opts, fps=)) == 0 || sscanf(p, fps=%u/%u, k, l) != 2) +{ +x265_log(param, X265_LOG_ERROR, fps specified in analysis file is not valid\n); +X265_FREE(opts); +return false; +} +if (k != param-fpsNum || l != param-fpsDenom) +{ +x265_log(param, X265_LOG_ERROR, fps mismatch than given in analysis file (%u/%u vs %u/%u)\n, +param-fpsNum, param-fpsDenom, k, l); +X265_FREE(opts); +return false; +} + +CMP_OPT_FANOUT(bitdepth, param-internalBitDepth); +CMP_OPT_FANOUT(weightp, param-bEnableWeightedPred); +CMP_OPT_FANOUT(bframes, param-bframes); +CMP_OPT_FANOUT(b-pyramid, param-bBPyramid); +CMP_OPT_FANOUT(b-adapt, param-bFrameAdaptive); +CMP_OPT_FANOUT(open-gop, param-bOpenGOP); +CMP_OPT_FANOUT(keyint, param-keyframeMax); +CMP_OPT_FANOUT(min-keyint, param-keyframeMin); +CMP_OPT_FANOUT(scenecut, param-scenecutThreshold); +CMP_OPT_FANOUT(ctu, (int)param-maxCUSize); +CMP_OPT_FANOUT(ref, param-maxNumReferences); +CMP_OPT_FANOUT(rc-lookahead, param-lookaheadDepth); + +#undef CMP_OPT_FANOUT + +X265_FREE(opts); +return true; +} + void CLIOptions::readAnalysisFile(x265_picture* pic, x265_param* p) { int poc, width, height; @@ -788,7 +876,7 @@ void
[x265] [PATCH] analysis: Intra picture estimation information sharing
# HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1410857300 -19800 # Tue Sep 16 14:18:20 2014 +0530 # Node ID 61dc8322e6c0af444ba591755c299b945e1e423a # Parent 1de67321275e70d510f0df3d5b7d4b9d391a1e66 analysis: Intra picture estimation information sharing when --analysis-mode=save - the encoder runs a full encode and dump the best split and mode decisions into x265_analysis.dat(default file name if file name is not provided) file when --analysis-mode=load - the encoder reads the best split and mode decisions from x265_analysis.dat and bypass the actual split and mode decisions, and therefore perform a much faster encode diff -r 1de67321275e -r 61dc8322e6c0 source/Lib/TLibCommon/CommonDef.h --- a/source/Lib/TLibCommon/CommonDef.h Mon Sep 15 15:00:13 2014 +0200 +++ b/source/Lib/TLibCommon/CommonDef.h Tue Sep 16 14:18:20 2014 +0530 @@ -100,4 +100,6 @@ #define CHROMA_H_SHIFT(x) (x == X265_CSP_I420 || x == X265_CSP_I422) #define CHROMA_V_SHIFT(x) (x == X265_CSP_I420) +#define CTU_TO_DEPTH_INDEX 22 // index to array containing increment offsets to add into zOrder to get next depth + #endif // ifndef X265_COMMONDEF_H diff -r 1de67321275e -r 61dc8322e6c0 source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/Lib/TLibCommon/TComRom.cpp Tue Sep 16 14:18:20 2014 +0530 @@ -505,5 +505,18 @@ 0x38, }; +/* Contains how much to increment shared depth buffer for different ctu sizes to get next best depth + * here, depth 0 = 64x64, depth 1 = 32x32, depth 2 = 16x16 and depth 3 = 8x8 + * if ctu = 64, depth buffer size is 256 combination of depth values 0, 1, 2, 3 + * if ctu = 32, depth buffer size is 64 combination of depth values 1, 2, 3 + * if ctu = 16, depth buffer size is 16 combination of depth values 2, 3 */ + +const uint32_t g_depthInc[3][4] = +{ +{ 16, 4, 0, 0}, +{ 64, 16, 4, 1}, +{256, 64, 16, 4} +}; + } //! \} diff -r 1de67321275e -r 61dc8322e6c0 source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Mon Sep 15 15:00:13 2014 +0200 +++ b/source/Lib/TLibCommon/TComRom.h Tue Sep 16 14:18:20 2014 +0530 @@ -155,6 +155,8 @@ // Intra tables extern const uint8_t g_intraFilterFlags[35]; +extern const uint32_t g_depthInc[3][4]; + } #endif //ifndef X265_TCOMROM_H diff -r 1de67321275e -r 61dc8322e6c0 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/encoder/analysis.cpp Tue Sep 16 14:18:20 2014 +0530 @@ -311,14 +311,25 @@ uint32_t numPartition = cu-getTotalNumPart(); if (m_bestCU[0]-m_slice-m_sliceType == I_SLICE) { -compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData); -if (m_param-analysisMode == 1) +if (m_param-analysisMode == X265_ANALYSIS_LOAD m_bestCU[0]-m_pic-m_intraData) { -memcpy(m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getDepth(), sizeof(uint8_t) * cu-getTotalNumPart()); -memcpy(m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getLumaIntraDir(), sizeof(uint8_t) * cu-getTotalNumPart()); -memcpy(m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getPartitionSize(), sizeof(char) * cu-getTotalNumPart()); -m_bestCU[0]-m_pic-m_intraData-cuAddr[cu-getAddr()] = cu-getAddr(); -m_bestCU[0]-m_pic-m_intraData-poc[cu-getAddr()]= cu-m_pic-m_POC; +uint32_t zOrder = 0; +compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData, +m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], +m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], +m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], zOrder); +} +else +{ +compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData); +if (m_param-analysisMode == X265_ANALYSIS_SAVE m_bestCU[0]-m_pic-m_intraData) +{ +memcpy(m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getDepth(), sizeof(uint8_t) * cu-getTotalNumPart()); +memcpy(m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getLumaIntraDir(), sizeof(uint8_t) * cu-getTotalNumPart()); + memcpy(m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getPartitionSize(), sizeof(char) * cu-getTotalNumPart()); +m_bestCU[0]-m_pic-m_intraData-cuAddr[cu-getAddr()] = cu-getAddr(); +m_bestCU[0]-m_pic-m_intraData-poc[cu-getAddr()]= cu-m_pic-m_POC; +} } if (m_param-bLogCuStats || m_param-rc.bStatWrite)
[x265] [PATCH] rc: fixes for 2 pass + vbv to calculate frameSizePlanned accurately
# HG changeset patch # User Aarthi Thirumalai # Date 1410757433 -19800 # Mon Sep 15 10:33:53 2014 +0530 # Node ID 49c54a540cc237659416be7d5fb53241fb0094e4 # Parent 1de67321275e70d510f0df3d5b7d4b9d391a1e66 rc: fixes for 2 pass + vbv to calculate frameSizePlanned accurately. diff -r 1de67321275e -r 49c54a540cc2 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Sep 15 15:00:13 2014 +0200 +++ b/source/encoder/encoder.cppMon Sep 15 10:33:53 2014 +0530 @@ -235,13 +235,10 @@ void Encoder::updateVbvPlan(RateControl* rc) { int encIdx, curIdx; - -curIdx = (m_curEncoder + m_param-frameNumThreads - 1) % m_param-frameNumThreads; -encIdx = (curIdx + 1) % m_param-frameNumThreads; -while (encIdx != curIdx) +for ( int i = 0; i m_param-frameNumThreads; i++) { -FrameEncoder *encoder = m_frameEncoder[encIdx]; -if (encoder-m_rce.isActive) +FrameEncoder *encoder = m_frameEncoder[i]; +if (encoder-m_rce.isActive encoder-m_rce.poc != rc-m_curSlice-m_poc) { int64_t bits = (int64_t) X265_MAX(encoder-m_rce.frameSizeEstimated, encoder-m_rce.frameSizePlanned); rc-m_bufferFill -= bits; @@ -251,7 +248,6 @@ if (rc-m_2pass) rc-m_predictedBits += bits; } -encIdx = (encIdx + 1) % m_param-frameNumThreads; } } diff -r 1de67321275e -r 49c54a540cc2 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppMon Sep 15 15:00:13 2014 +0200 +++ b/source/encoder/ratecontrol.cppMon Sep 15 10:33:53 2014 +0530 @@ -1365,12 +1365,20 @@ q += m_pbOffset; rce-qpNoVbv = q; double qScale = x265_qp2qScale(q); -if (m_leadingBframes 5 m_isVbv) + +if (!m_2pass m_isVbv) { -qScale = clipQscale(pic, qScale); -m_lastQScaleFor[m_sliceType] = qScale; +if (m_leadingBframes 5) +{ +qScale = clipQscale(pic, qScale); +m_lastQScaleFor[m_sliceType] = qScale; +} +rce-frameSizePlanned = predictSize(m_predBfromP, qScale, (double)m_leadingNoBSatd); } -rce-frameSizePlanned = predictSize(m_predBfromP, qScale, (double)m_leadingNoBSatd); +else if (m_2pass m_isVbv) +{ +rce-frameSizePlanned = qScale2bits(rce, qScale); +} rce-frameSizeEstimated = rce-frameSizePlanned; rce-newQScale = qScale; return qScale; @@ -1400,8 +1408,7 @@ diff = m_predictedBits - (int64_t)rce-expectedBits; q = rce-newQScale; q /= Clip3(0.5, 2.0, (double)(abrBuffer - diff) / abrBuffer); -if (((rce-encodeOrder + 1 - m_param-frameNumThreads) = m_fps) -(m_expectedBitsSum 0)) +if (m_expectedBitsSum 0) { /* Adjust quant based on the difference between * achieved and expected bitrate so far */ ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] analysis: Intra picture estimation information sharing
On 09/16, Gopu Govindaswamy wrote: On Mon, Sep 15, 2014 at 4:10 PM, Steve Borho st...@borho.org wrote: We should probably also be setting the analysis pointers to NULL in the input picture structure prior to returning from x265_encoder_encode() so they do not accidentally re-use the same buffers for more than one picture. In short, we need to be a lot more defensive about API abuses. i will make the separate patch for this, but still i need to verify on this, the analysis buffer is getting used to dump the analysis data into file after x265_encoder_encode(), You can think of this in terms of ownership. 1. user calls x265_alloc_analysis_data(x265_picture*), the user now owns these buffers in the x265_picture. 2. user calls x265_encoder_encode() and the encoder copies the analysis pointers into the internal Frame structure. Now the encoder owns those buffers. The pointers in the input x265_picture are now redundant, the user should not read/write/or modify those buffers while the encoder owns them 3. Once the frame is encoded, the buffer pointers are copied into the output picture structure. Now the user owns them again. They can do what the wish, possibly release them. this is true of both load and save +} +else +{ +compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData); +if (m_param-analysisMode == 1) +{ + memcpy(m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getDepth(), sizeof(uint8_t) * cu-getTotalNumPart()); + memcpy(m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getLumaIntraDir(), sizeof(uint8_t) * cu-getTotalNumPart()); + memcpy(m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getPartitionSize(), sizeof(char) * cu-getTotalNumPart()); +m_bestCU[0]-m_pic-m_intraData-cuAddr[cu-getAddr()] = cu-getAddr(); +m_bestCU[0]-m_pic-m_intraData-poc[cu-getAddr()] = cu-m_pic-m_POC; +} } if (m_param-bLogCuStats || m_param-rc.bStatWrite) { @@ -533,7 +543,142 @@ #endif } -void Analysis::checkIntra(TComDataCU* outBestCU, TComDataCU* outTempCU, PartSize partSize, CU *cu) +void Analysis::sharedCompressIntraCU(TComDataCU* outBestCU, TComDataCU* outTempCU, uint32_t depth, TComDataCU* cuPicsym, CU *cu, uint8_t* sharedDepth, char* sharedPartSizes, uint8_t* sharedModes) +{ +Frame* pic = outBestCU-m_pic; + +// if current depth == shared depth then skip further splitting. +bool bSubBranch = true; + +if (depth == 0) !depth +{ +// offset to next best depth in sharedDepth buffer +m_zorder = 0; + +// index to g_depthInc array to increment m_zorder offset to next depth +m_ctuToDepthIndex = m_param-maxCUSize / 22; this math is pretty magical. my guess is there's already a table somewhere that does this more cleanly? Does this code work with --ctu 16? i have verified and i don't find any such a table, but this logic works well for ctu size 64, 32 and 16, verified on this I'm not doubting that it works, it's just not clear what it's doing. it probably wants to be 1 (g_maxCUDepth - 2) or something similar. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] analysis: intra picture estimation (mode and split decision)information sharing
# HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1410866456 -19800 # Tue Sep 16 16:50:56 2014 +0530 # Node ID 717ea14104cf32bbcafe8e9b8ddef17867807936 # Parent 1de67321275e70d510f0df3d5b7d4b9d391a1e66 analysis: intra picture estimation (mode and split decision)information sharing when --analysis-mode=save - the encoder runs a full encode and dump the best split and mode decisions into x265_analysis.dat(default file name if file name is not provided) file when --analysis-mode=load - the encoder reads the best split and mode decisions from x265_analysis.dat and bypass the actual split and mode decisions, and therefore perform a much faster encode diff -r 1de67321275e -r 717ea14104cf source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/Lib/TLibCommon/TComRom.cpp Tue Sep 16 16:50:56 2014 +0530 @@ -505,5 +505,18 @@ 0x38, }; +/* Contains how much to increment shared depth buffer for different ctu sizes to get next best depth + * here, depth 0 = 64x64, depth 1 = 32x32, depth 2 = 16x16 and depth 3 = 8x8 + * if ctu = 64, depth buffer size is 256 combination of depth values 0, 1, 2, 3 + * if ctu = 32, depth buffer size is 64 combination of depth values 1, 2, 3 + * if ctu = 16, depth buffer size is 16 combination of depth values 2, 3 */ + +const uint32_t g_depthInc[3][4] = +{ +{ 16, 4, 0, 0}, +{ 64, 16, 4, 1}, +{256, 64, 16, 4} +}; + } //! \} diff -r 1de67321275e -r 717ea14104cf source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Mon Sep 15 15:00:13 2014 +0200 +++ b/source/Lib/TLibCommon/TComRom.h Tue Sep 16 16:50:56 2014 +0530 @@ -155,6 +155,8 @@ // Intra tables extern const uint8_t g_intraFilterFlags[35]; +extern const uint32_t g_depthInc[3][4]; + } #endif //ifndef X265_TCOMROM_H diff -r 1de67321275e -r 717ea14104cf source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/encoder/analysis.cpp Tue Sep 16 16:50:56 2014 +0530 @@ -311,14 +311,25 @@ uint32_t numPartition = cu-getTotalNumPart(); if (m_bestCU[0]-m_slice-m_sliceType == I_SLICE) { -compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData); -if (m_param-analysisMode == 1) +if (m_param-analysisMode == X265_ANALYSIS_LOAD m_bestCU[0]-m_pic-m_intraData) { -memcpy(m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getDepth(), sizeof(uint8_t) * cu-getTotalNumPart()); -memcpy(m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getLumaIntraDir(), sizeof(uint8_t) * cu-getTotalNumPart()); -memcpy(m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getPartitionSize(), sizeof(char) * cu-getTotalNumPart()); -m_bestCU[0]-m_pic-m_intraData-cuAddr[cu-getAddr()] = cu-getAddr(); -m_bestCU[0]-m_pic-m_intraData-poc[cu-getAddr()]= cu-m_pic-m_POC; +uint32_t zOrder = 0; +compressSharedIntraCTU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData, +m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], +m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], +m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], zOrder); +} +else +{ +compressIntraCU(m_bestCU[0], m_tempCU[0], false, cu, cu-m_CULocalData); +if (m_param-analysisMode == X265_ANALYSIS_SAVE m_bestCU[0]-m_pic-m_intraData) +{ +memcpy(m_bestCU[0]-m_pic-m_intraData-depth[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getDepth(), sizeof(uint8_t) * cu-getTotalNumPart()); +memcpy(m_bestCU[0]-m_pic-m_intraData-modes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getLumaIntraDir(), sizeof(uint8_t) * cu-getTotalNumPart()); + memcpy(m_bestCU[0]-m_pic-m_intraData-partSizes[cu-getAddr() * cu-m_numPartitions], m_bestCU[0]-getPartitionSize(), sizeof(char) * cu-getTotalNumPart()); +m_bestCU[0]-m_pic-m_intraData-cuAddr[cu-getAddr()] = cu-getAddr(); +m_bestCU[0]-m_pic-m_intraData-poc[cu-getAddr()]= cu-m_pic-m_POC; +} } if (m_param-bLogCuStats || m_param-rc.bStatWrite) { @@ -424,9 +435,9 @@ if (cu_unsplit_flag) { m_quant.setQPforQuant(outTempCU); -checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu); +checkIntra(outBestCU, outTempCU, SIZE_2Nx2N, cu, NULL); if (depth == g_maxCUDepth) -checkIntra(outBestCU, outTempCU, SIZE_NxN, cu); +checkIntra(outBestCU, outTempCU, SIZE_NxN, cu, NULL); else { m_entropyCoder-resetBits(); @@ -533,7 +544,141 @@ #endif
[x265] [PATCH] denoiseDct test code: fixed typo
# HG changeset patch # User Praveen Tiwari # Date 1410867512 -19800 # Node ID 6799ab5e72c7ddbff09638573a730e84c300ebb3 # Parent 4459645048ab655734a7544c7b10d904bb8d9e46 denoiseDct test code: fixed typo diff -r 4459645048ab -r 6799ab5e72c7 source/test/mbdstharness.cpp --- a/source/test/mbdstharness.cpp Tue Sep 16 12:20:30 2014 +0530 +++ b/source/test/mbdstharness.cpp Tue Sep 16 17:08:32 2014 +0530 @@ -76,7 +76,7 @@ short_test_buff[2][i]= PIXEL_MAX; int_test_buff[2][i] = PIXEL_MAX; int_idct_test_buff[2][i] = SHORT_MAX; -int_denoise_test_buff1[2][i] = int_denoise_test_buff2[1][i] = UNSIGNED_SHORT_MAX; +int_denoise_test_buff1[2][i] = int_denoise_test_buff2[2][i] = UNSIGNED_SHORT_MAX; mbuf1[i] = rand() PIXEL_MAX; mbufdct[i] = (rand() PIXEL_MAX) - (rand() PIXEL_MAX); @@ -332,7 +332,7 @@ memset(mubuf2, 0, num * sizeof(uint32_t)); memset(mushortbuf1, 0, num * sizeof(uint16_t)); -for (int k = 0; k num; j++) +for (int k = 0; k num; k++) mushortbuf1[k] = rand() % UNSIGNED_SHORT_MAX; int index = rand() % TEST_CASES; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] analysis: add CU specific details to encodeCU()
On 09/16, santhosh...@multicorewareinc.com wrote: # HG changeset patch # User Santhoshini Sekar santhosh...@multicorewareinc.com # Date 1410848612 -19800 # Tue Sep 16 11:53:32 2014 +0530 # Node ID 74b5192133a548c492b8b2cb34dde8242107900e # Parent 7e29b10982d2eb7fd79f581d6f04184522ba analysis: add CU specific details to encodeCU() diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/analysis.cpp Tue Sep 16 11:53:32 2014 +0530 @@ -301,7 +301,6 @@ { if (cu-m_slice-m_pps-bUseDQP) m_bEncodeDQP = true; -loadCTUData(cu); // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/entropy.cpp --- a/source/encoder/entropy.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.cpp Tue Sep 16 11:53:32 2014 +0530 @@ -484,11 +484,11 @@ void Entropy::encodeCTU(TComDataCU* cu) { bool bEncodeDQP = cu-m_slice-m_pps-bUseDQP; -encodeCU(cu, 0, 0, false, bEncodeDQP); +encodeCU(cu, 0, 0, bEncodeDQP, cu-m_CULocalData ); queued with this white-space nit fixed } /* encode a CU block recursively */ -void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP) +void Entropy::encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU* cuData) { Frame* pic = cu-m_pic; Slice* slice = cu-m_slice; @@ -496,30 +496,24 @@ if (depth = slice-m_pps-maxCuDQPDepth slice-m_pps-bUseDQP) bEncodeDQP = true; -if (!bInsidePicture) +int cuSplitFlag = !(cuData-flags CU::LEAF); +int cuUnsplitFlag = !(cuData-flags CU::SPLIT_MANDATORY); + +if (!cuUnsplitFlag) { -uint32_t xmax = slice-m_sps-picWidthInLumaSamples - cu-getCUPelX(); -uint32_t ymax = slice-m_sps-picHeightInLumaSamples - cu-getCUPelY(); -uint32_t cuSize = g_maxCUSize depth; - -bInsidePicture = (g_zscanToPelX[absPartIdx] + cuSize = xmax - g_zscanToPelY[absPartIdx] + cuSize = ymax); - -if (!bInsidePicture) +uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; +for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) { -uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; -for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -{ -if (g_zscanToPelX[absPartIdx] xmax g_zscanToPelY[absPartIdx] ymax) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); -} - -return; +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +int cuPresentFlagChild = !(childCU-flags CU::PRESENT); +if (!cuPresentFlagChild) +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); } +return; } // We need to split, so don't try these modes. -if (bInsidePicture depth g_maxCUDepth) +if (cuSplitFlag) codeSplitFlag(cu, absPartIdx, depth); if (depth cu-getDepth(absPartIdx) depth g_maxCUDepth) @@ -527,7 +521,10 @@ uint32_t qNumParts = (pic-getNumPartInCU() (depth 1)) 2; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++, absPartIdx += qNumParts) -encodeCU(cu, absPartIdx, depth + 1, bInsidePicture, bEncodeDQP); +{ +CU *childCU = cu-m_CULocalData + cuData-childIdx + partUnitIdx; +encodeCU(cu, absPartIdx, depth + 1, bEncodeDQP, childCU); +} return; } diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/entropy.h --- a/source/encoder/entropy.hThu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/entropy.hTue Sep 16 11:53:32 2014 +0530 @@ -193,7 +193,7 @@ void encodeBinsEP(uint32_t binValues, int numBins); void encodeBinTrm(uint32_t binValue); -void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bInsidePicture, bool bEncodeDQP); +void encodeCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth, bool bEncodeDQP, CU *cuData); void finishCU(TComDataCU* cu, uint32_t absPartIdx, uint32_t depth); void writeOut(); diff -r 7e29b10982d2 -r 74b5192133a5 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Thu Sep 11 19:24:28 2014 +0530 +++ b/source/encoder/frameencoder.cpp Tue Sep 16 11:53:32 2014 +0530 @@ -470,6 +470,7 @@ } } +m_tld.cuCoder.loadCTUData(cu); // final coding (bitstream generation) for this CU m_entropyCoder.encodeCTU(cu); @@ -689,6 +690,7 @@ // load current best state from go-on entropy
Re: [x265] [PATCH] api: do not reuse the analysisData buffer for more then one picture, set it NULL
On 09/16, g...@multicorewareinc.com wrote: # HG changeset patch # User Gopu Govindaswamy g...@multicorewareinc.com # Date 1410868013 -19800 # Tue Sep 16 17:16:53 2014 +0530 # Node ID 23233e9d7fb9ddb13685fe12f23cf8bbed74c0eb # Parent 717ea14104cf32bbcafe8e9b8ddef17867807936 api: do not reuse the analysisData buffer for more then one picture, set it NULL diff -r 717ea14104cf -r 23233e9d7fb9 source/encoder/api.cpp --- a/source/encoder/api.cpp Tue Sep 16 16:50:56 2014 +0530 +++ b/source/encoder/api.cpp Tue Sep 16 17:16:53 2014 +0530 @@ -121,6 +121,16 @@ do { numEncoded = encoder-encode(pic_in, pic_out); + +// do not reuse this same buffer for more then one picture s/then/than queued with this fix, plus a note that the encoder now owns the buffers +if (pic_in) +{ +if (pic_in-analysisData.intraData) +pic_in-analysisData.intraData = NULL; +if (pic_in-analysisData.interData) +pic_in-analysisData.interData = NULL; +} + } while (numEncoded == 0 !pic_in encoder-m_numDelayedPic); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] add fanout validation module to check param compatibility
# HG changeset patch # User Sagar Kotecha sa...@multicorewareinc.com # Date 1410870006 -19800 # Tue Sep 16 17:50:06 2014 +0530 # Node ID fa9246f437db2bb6726285ea8bfc65e742aba9ec # Parent 1de67321275e70d510f0df3d5b7d4b9d391a1e66 add fanout validation module to check param compatibility diff -r 1de67321275e -r fa9246f437db source/common/param.cpp --- a/source/common/param.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/common/param.cpp Tue Sep 16 17:50:06 2014 +0530 @@ -1191,7 +1191,7 @@ { char *buf, *s; -buf = s = X265_MALLOC(char, 2000); +buf = s = X265_MALLOC(char, MAXPARAMSIZE); if (!buf) return NULL; diff -r 1de67321275e -r fa9246f437db source/common/param.h --- a/source/common/param.h Mon Sep 15 15:00:13 2014 +0200 +++ b/source/common/param.h Tue Sep 16 17:50:06 2014 +0530 @@ -38,6 +38,8 @@ /* this table is kept internal to avoid confusion, since log level indices start at -1 */ static const char * const logLevelNames[] = { none, error, warning, info, debug, full, 0 }; + +#define MAXPARAMSIZE 2000 } #endif // ifndef X265_PARAM_H diff -r 1de67321275e -r fa9246f437db source/x265.cpp --- a/source/x265.cpp Mon Sep 15 15:00:13 2014 +0200 +++ b/source/x265.cpp Tue Sep 16 17:50:06 2014 +0530 @@ -227,6 +227,7 @@ uint32_t framesToBeEncoded; // number of frames to encode uint64_t totalbytes; size_t analysisRecordSize; // number of bytes read from or dumped into file +int analysisHeaderSize; int64_t startTime; int64_t prevUpdateTime; @@ -251,6 +252,7 @@ qpfile = NULL; analysisFile = NULL; analysisRecordSize = 0; +analysisHeaderSize = 0; } void destroy(); @@ -262,6 +264,7 @@ bool parseQPFile(x265_picture pic_org); void readAnalysisFile(x265_picture* pic, x265_param*); void writeAnalysisFile(x265_picture* pic, x265_param*); +bool validateFanout(x265_param*); }; void CLIOptions::destroy() @@ -755,6 +758,95 @@ return false; } +bool CLIOptions::validateFanout(x265_param *param) +{ +#define CMP_OPT_FANOUT(opt, param_val)\ +{\ +bErr = 0;\ +p = strstr(paramBuf, opt =);\ +char* q = strstr(paramBuf, no-opt);\ +if (p sscanf(p, opt =%d , i) param_val != i)\ +bErr = 1;\ +else if (!param_val !q)\ +bErr = 1;\ +else if (param_val (q || !strstr(paramBuf, opt)))\ +bErr = 1;\ +if (bErr)\ +{\ +x265_log(param, X265_LOG_ERROR, different opt setting than given in analysis file (%d vs %d)\n, param_val, i);\ +X265_FREE(paramBuf);\ +return false;\ +}\ +} + +char *p = NULL, *paramBuf; +int i, j; +uint32_t k , l; +bool bErr = false; + +paramBuf = X265_MALLOC(char, MAXPARAMSIZE); +if (!paramBuf) +return false; + +fread(paramBuf, 1, MAXPARAMSIZE, this-analysisFile); + +/* check whether fanout options are compatible */ +if (strncmp(paramBuf, #options:, 9)) +{ +x265_log(param, X265_LOG_ERROR, options list in analysis file is not valid\n); +X265_FREE(paramBuf); +return false; +} + +char* buf = strchr(paramBuf, '\n'); +if (!buf) +{ +x265_log(param, X265_LOG_ERROR, Malformed analysis file\n); +X265_FREE(paramBuf); +return false; +} +*buf = '\0'; +fseek(this-analysisFile, int(strlen(paramBuf) + 1), SEEK_SET); + +if (sscanf(paramBuf, #options: %dx%d, i, j) != 2) +{ +x265_log(param, X265_LOG_ERROR, Resolution specified in analysis file is not valid\n); +X265_FREE(paramBuf); +return false; +} +if ((p = strstr(paramBuf, fps=)) == 0 || sscanf(p, fps=%u/%u, k, l) != 2) +{ +x265_log(param, X265_LOG_ERROR, fps specified in analysis file is not valid\n); +X265_FREE(paramBuf); +return false; +} +if (k != param-fpsNum || l != param-fpsDenom) +{ +x265_log(param, X265_LOG_ERROR, fps mismatch than given in analysis file (%u/%u vs %u/%u)\n, +param-fpsNum, param-fpsDenom, k, l); +X265_FREE(paramBuf); +return false; +} + +CMP_OPT_FANOUT(bitdepth, param-internalBitDepth); +CMP_OPT_FANOUT(weightp, param-bEnableWeightedPred); +CMP_OPT_FANOUT(bframes, param-bframes); +CMP_OPT_FANOUT(b-pyramid, param-bBPyramid); +CMP_OPT_FANOUT(b-adapt, param-bFrameAdaptive); +CMP_OPT_FANOUT(open-gop, param-bOpenGOP); +CMP_OPT_FANOUT(keyint, param-keyframeMax); +CMP_OPT_FANOUT(min-keyint, param-keyframeMin); +CMP_OPT_FANOUT(scenecut, param-scenecutThreshold); +CMP_OPT_FANOUT(ctu, (int)param-maxCUSize); +CMP_OPT_FANOUT(ref, param-maxNumReferences); +CMP_OPT_FANOUT(rc-lookahead, param-lookaheadDepth); + +#undef CMP_OPT_FANOUT + +X265_FREE(paramBuf); +return true; +} + void CLIOptions::readAnalysisFile(x265_picture* pic, x265_param* p) { int poc,