[x265] [PATCH] rc: write I/i slice in stats file based on whether openGop is enabled or not
# HG changeset patch # User Aarthi Thirumalai # Date 1408946307 -19800 # Mon Aug 25 11:28:27 2014 +0530 # Node ID c71cf2abe789b7bb7696201c9cff99c438109a0a # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 rc: write I/i slice in stats file based on whether openGop is enabled or not. diff -r 6e6756f94b27 -r c71cf2abe789 source/encoder/ratecontrol.cpp --- a/source/encoder/ratecontrol.cppFri Aug 22 15:53:34 2014 -0500 +++ b/source/encoder/ratecontrol.cppMon Aug 25 11:28:27 2014 +0530 @@ -320,6 +320,7 @@ m_startEndOrder.set(0); m_bTerminated = false; m_finalFrameCount = 0; +m_numEntries = 0; if (m_param-rc.rateControlMode == X265_RC_CRF) { m_param-rc.qp = (int)m_param-rc.rfConstant; @@ -1033,7 +1034,7 @@ m_param-bframes = 1; return X265_TYPE_AUTO; } -int frameType = m_rce2Pass[frameNum].sliceType == I_SLICE ? (m_rce2Pass[frameNum].poc == 0 ? X265_TYPE_I : X265_TYPE_IDR) +int frameType = m_rce2Pass[frameNum].sliceType == I_SLICE ? (frameNum 0 m_param-bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR) : m_rce2Pass[frameNum].sliceType == P_SLICE ? X265_TYPE_P : (m_rce2Pass[frameNum].sliceType == B_SLICE m_rce2Pass[frameNum].keptAsRef? X265_TYPE_BREF : X265_TYPE_B); return frameType; @@ -2080,7 +2081,7 @@ // Write frame stats into the stats file if 2 pass is enabled. if (m_param-rc.bStatWrite) { -char cType = rce-sliceType == I_SLICE ? (rce-poc == 0 ? 'I' : 'i') +char cType = rce-sliceType == I_SLICE ? (rce-poc 0 m_param-bOpenGOP ? 'i' : 'I') : rce-sliceType == P_SLICE ? 'P' : IS_REFERENCED(slice) ? 'B' : 'b'; if (fprintf(m_statFileOut, ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] rc: don't read slicetypes from qpfile in 2nd pass
# HG changeset patch # User Aarthi Thirumalai # Date 1408948191 -19800 # Mon Aug 25 11:59:51 2014 +0530 # Node ID 7d9c4d6ca117eebaa838756a0b3562b4e3630e18 # Parent c71cf2abe789b7bb7696201c9cff99c438109a0a rc: don't read slicetypes from qpfile in 2nd pass. slicetype has to be taken from the stats file from prev pass. diff -r c71cf2abe789 -r 7d9c4d6ca117 source/x265.cpp --- a/source/x265.cpp Mon Aug 25 11:28:27 2014 +0530 +++ b/source/x265.cpp Mon Aug 25 11:59:51 2014 +0530 @@ -833,7 +833,7 @@ while (pic_in !b_ctrl_c) { pic_orig.poc = inFrameCount; -if (cliopt.qpfile) +if (cliopt.qpfile !param-rc.bStatRead) { if (!cliopt.parseQPFile(pic_orig)) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408956792 -32400 # Mon Aug 25 17:53:12 2014 +0900 # Node ID 7145e57c722a94a06faec33e3041442032a1892f # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Mon Aug 25 17:53:12 2014 +0900 @@ -816,12 +816,12 @@ TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) { +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) +return NULL; + uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) -return NULL; - if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) { if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) @@ -857,14 +857,11 @@ TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) +return NULL; -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) -{ -return NULL; -} - -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize)) { @@ -895,15 +892,14 @@ TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picHeightInLumaSamples) { return NULL; } -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize)) { @@ -938,14 +934,13 @@ TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picWidthInLumaSamples) { return NULL; } +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize)) @@ -954,7 +949,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]; if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize)) { @@ -1817,48 +1812,42 @@ } // TMVP always enabled { -// MTK colocated-RightBottom +MV colmv; uint32_t partIdxRB; deriveRightBottomIdx(puIdx, partIdxRB); -uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB]; -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); - -MV colmv; -int refIdx; int lcuIdx = -1; -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxTmp] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) // image boundary check +// image boundary check +if (m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[partIdxRB] + UNIT_SIZE m_slice-m_sps-picWidthInLumaSamples +m_pic-getCU(m_cuAddr)-getCUPelY() +
Re: [x265] fix lossless
On 08/25, Satoshi Nakagawa wrote: Ø Negative shift or bts cause unexpected behavior. Negative shift is used for round value calculation. int round = 1 (shift - 1); Ah, we need this operation to be a copy, there shouldn't be any rounding At 2014-08-25 10:06:23,Steve Borho st...@borho.org wrote: On 08/23, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408803114 -32400 # Sat Aug 23 23:11:54 2014 +0900 # Node ID 218b9ddfe148c55afccc2c5a073858692e09f5c6 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 fix lossless do you have a repro case that this fixes? I tried Main and Main10 encodes with --lossless --hash 1 and they were both fine prior to this patch. diff -r 6e6756f94b27 -r 218b9ddfe148 source/common/quant.cpp --- a/source/common/quant.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/common/quant.cpp Sat Aug 23 23:11:54 2014 +0900 @@ -399,7 +399,7 @@ { if (transQuantBypass) { -primitives.cvt32to16_shr(residual, coeff, stride, 0, 1 log2TrSize); +primitives.cvt32to16_shl[log2TrSize - 2](residual, coeff, stride, 0); return; } @@ -430,7 +430,7 @@ #if X265_DEPTH = 10 primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); #else -if (shift = 0) +if (shift 0) primitives.cvt32to16_shr(residual, m_resiDctCoeff, stride, shift, trSize); else primitives.cvt32to16_shl[log2TrSize - 2](residual, m_resiDctCoeff, stride, -shift); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] Removed redundant code
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408983545 25200 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Removed redundant code diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700 @@ -1537,34 +1537,6 @@ } } // Mode loop -// TODO: there is a lot of redundant work happening here, please clean this up! -{ -uint32_t origMode = bestPUMode; - -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + initTrDepth); - -// set context models -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); - -// determine residual for partition -uint32_t puDistY = 0; -uint64_t puCost = 0; -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost); - -// check r-d cost -if (puCost bestPUCost) -{ -bestPUMode = origMode; -bestPUDistY = puDistY; - -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv); - -::memcpy(m_qtTempTrIdx, cu-getTransformIdx() + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempTransformSkipFlag[0], cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -} -} // Mode loop - //--- update overall distortion --- overallDistY += bestPUDistY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] rc: don't read slicetypes from qpfile in 2nd pass
On 08/25, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1408948191 -19800 # Mon Aug 25 11:59:51 2014 +0530 # Node ID 7d9c4d6ca117eebaa838756a0b3562b4e3630e18 # Parent c71cf2abe789b7bb7696201c9cff99c438109a0a rc: don't read slicetypes from qpfile in 2nd pass. slicetype has to be taken from the stats file from prev pass. This is essentially disabling the QP file entirely on the second pass, which might be a perfectly reasonable thing to do, but if it is we should probably issue some kind of warning saying the QP file is being ignored. diff -r c71cf2abe789 -r 7d9c4d6ca117 source/x265.cpp --- a/source/x265.cpp Mon Aug 25 11:28:27 2014 +0530 +++ b/source/x265.cpp Mon Aug 25 11:59:51 2014 +0530 @@ -833,7 +833,7 @@ while (pic_in !b_ctrl_c) { pic_orig.poc = inFrameCount; -if (cliopt.qpfile) +if (cliopt.qpfile !param-rc.bStatRead) { if (!cliopt.parseQPFile(pic_orig)) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]
On 08/25, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408956792 -32400 # Mon Aug 25 17:53:12 2014 +0900 # Node ID 7145e57c722a94a06faec33e3041442032a1892f # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] Queued for default, thanks. There seems to be a lot of logic that checks for 'inside picture bounds'. It seems like we could save a lot of CPU cycles if we padded input pictures to the max-ctu size instead of the min-ctu size and adjusted the conformance window accordingly. diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014 +0900 @@ -816,12 +816,12 @@ TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) { +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) +return NULL; + uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) -return NULL; - if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) { if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) @@ -857,14 +857,11 @@ TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) +return NULL; -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) -{ -return NULL; -} - -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize)) { @@ -895,15 +892,14 @@ TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picHeightInLumaSamples) { return NULL; } -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize)) { @@ -938,14 +934,13 @@ TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picWidthInLumaSamples) { return NULL; } +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize)) @@ -954,7 +949,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]; if (RasterAddress::isEqualRowOrCol(absPartIdxRT, absZorderCUIdx, numPartInCUSize)) { @@ -1817,48 +1812,42 @@ } // TMVP always enabled { -// MTK colocated-RightBottom +MV colmv; uint32_t partIdxRB; deriveRightBottomIdx(puIdx, partIdxRB); -uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB]; -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); - -
[x265] [PATCH] Added check for mode 34
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408987126 25200 # Node ID 157adb623151141d51c01bc085d563195f55e051 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Added check for mode 34 For fast intra search in EstimateRow::estimateCUCost check intra mode 34 diff -r 6e6756f94b27 -r 157adb623151 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/encoder/slicetype.cpp Mon Aug 25 10:18:46 2014 -0700 @@ -1722,6 +1722,11 @@ cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); COPY2_IF_LT(acost, cost, lowmode, mode); } +if (lowmode == 33) +{ +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[34 * predsize], cuSize); +COPY2_IF_LT(acost, cost, lowmode, mode); +} if (acost icost) icost = acost; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Added check for mode 34
On 08/25, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408987126 25200 # Node ID 157adb623151141d51c01bc085d563195f55e051 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Added check for mode 34 this was a deliberate omission on my part, missing one possible mode is ok for lookahead, where we're just trying to estimate intra cost and we don't even keep track of what the best mode was. For fast intra search in EstimateRow::estimateCUCost check intra mode 34 diff -r 6e6756f94b27 -r 157adb623151 source/encoder/slicetype.cpp --- a/source/encoder/slicetype.cppFri Aug 22 15:53:34 2014 -0500 +++ b/source/encoder/slicetype.cppMon Aug 25 10:18:46 2014 -0700 @@ -1722,6 +1722,11 @@ cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[mode * predsize], cuSize); COPY2_IF_LT(acost, cost, lowmode, mode); } +if (lowmode == 33) +{ +cost = satd(m_me.fenc, FENC_STRIDE, m_predictions[34 * predsize], cuSize); +COPY2_IF_LT(acost, cost, lowmode, mode); +} if (acost icost) icost = acost; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Removed redundant code
On 08/25, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408983545 25200 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Removed redundant code Queued, thanks diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700 @@ -1537,34 +1537,6 @@ } } // Mode loop -// TODO: there is a lot of redundant work happening here, please clean this up! -{ -uint32_t origMode = bestPUMode; - -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + initTrDepth); - -// set context models -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); - -// determine residual for partition -uint32_t puDistY = 0; -uint64_t puCost = 0; -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost); - -// check r-d cost -if (puCost bestPUCost) -{ -bestPUMode = origMode; -bestPUDistY = puDistY; - -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv); - -::memcpy(m_qtTempTrIdx, cu-getTransformIdx() + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempTransformSkipFlag[0], cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -} -} // Mode loop - //--- update overall distortion --- overallDistY += bestPUDistY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Removed redundant code
On 08/25, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408983545 25200 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Removed redundant code if I do an encode with --preset slower, the outputs change with this patch. is this intentional? diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700 @@ -1537,34 +1537,6 @@ } } // Mode loop -// TODO: there is a lot of redundant work happening here, please clean this up! -{ -uint32_t origMode = bestPUMode; - -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + initTrDepth); - -// set context models -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); - -// determine residual for partition -uint32_t puDistY = 0; -uint64_t puCost = 0; -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost); I suspect some of the bits up to here are perhaps required unless the last mode tried was the best mode. -// check r-d cost -if (puCost bestPUCost) -{ -bestPUMode = origMode; -bestPUDistY = puDistY; - -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv); - -::memcpy(m_qtTempTrIdx, cu-getTransformIdx() + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempTransformSkipFlag[0], cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -} -} // Mode loop - //--- update overall distortion --- overallDistY += bestPUDistY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Removed redundant code
On 08/25/2014 01:36 PM, Steve Borho wrote: On 08/25, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408983545 25200 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Removed redundant code if I do an encode with --preset slower, the outputs change with this patch. is this intentional? It was not. I only tested on a few vids with -I 1 to force intra prediction. It produced bit-for bit identical results in less time. I will look into --preset slower differences. diff -r 6e6756f94b27 -r fa3c389b255b source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Fri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 09:19:05 2014 -0700 @@ -1537,34 +1537,6 @@ } } // Mode loop -// TODO: there is a lot of redundant work happening here, please clean this up! -{ -uint32_t origMode = bestPUMode; - -cu-setLumaIntraDirSubParts(origMode, partOffset, depth + initTrDepth); - -// set context models -m_entropyCoder-load(m_rdEntropyCoders[depth][CI_CURR_BEST]); - -// determine residual for partition -uint32_t puDistY = 0; -uint64_t puCost = 0; -xRecurIntraCodingQT(cu, initTrDepth, partOffset, fencYuv, predYuv, resiYuv, puDistY, false, puCost); I suspect some of the bits up to here are perhaps required unless the last mode tried was the best mode. Yes, xRecurIntraCodingQT is where the most work is done by this chunk of code so the difference is probably somewhere in there. -// check r-d cost -if (puCost bestPUCost) -{ -bestPUMode = origMode; -bestPUDistY = puDistY; - -xSetIntraResultQT(cu, initTrDepth, partOffset, reconYuv); - -::memcpy(m_qtTempTrIdx, cu-getTransformIdx() + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempCbf[0], cu-getCbf(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -::memcpy(m_qtTempTransformSkipFlag[0], cu-getTransformSkip(TEXT_LUMA) + partOffset, qPartNum * sizeof(uint8_t)); -} -} // Mode loop - //--- update overall distortion --- overallDistY += bestPUDistY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel Is there any test script available that covers at least a good basic set of testcases? ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 1 of 3] analysis: fix inter hash mistake with --cu-lossless
# HG changeset patch # User Min Chen chenm...@163.com # Date 1409002891 18000 # Mon Aug 25 16:41:31 2014 -0500 # Node ID 0bf2756898bc78e5660a6b607b2f3cda97834264 # Parent 5acfb12ec5d17cc700e313fc99248e2408e5967b analysis: fix inter hash mistake with --cu-lossless diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -2293,7 +2293,7 @@ * \returns void */ void TEncSearch::encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* outResiYuv, - ShortYuv* outBestResiYuv, TComYuv* outReconYuv) + ShortYuv* outBestResiYuv, TComYuv* outReconYuv, TComDataCU* tmpCu) { X265_CHECK(!cu-isIntra(0), intra CU not expected\n); @@ -2321,6 +2321,7 @@ } uint64_t bestCost = MAX_INT64; +bool bestTransquantBypassFlag = bIsTQBypassEnable; for (uint32_t modeId = 0; modeId numModes; modeId++) { @@ -2388,15 +2389,29 @@ if (cu-getQtRootCbf(0)) xSetResidualQTData(cu, 0, outBestResiYuv, depth, true); +bestTransquantBypassFlag = bIsLosslessMode; bestBits = bits; bestCost = cost; bestCoeffBits = cu-m_coeffBits; m_entropyCoder-store(m_rdEntropyCoders[depth][CI_TEMP_BEST]); } + +// Save lossless mode coeff +if (bIsLosslessMode) +{ +tmpCu-copyPartFrom(cu, 0, depth, false); +} } X265_CHECK(bestCost != MAX_INT64, no best cost\n); +if (bestTransquantBypassFlag !m_param-bLossless) +{ +assert(log2CUSize 2); +cu-setCUTransquantBypassSubParts(true, 0, depth); +cu-copyPartFrom(tmpCu, 0, depth, false); +} + if (cu-getQtRootCbf(0)) outReconYuv-addClip(predYuv, outBestResiYuv, log2CUSize); else diff -r 5acfb12ec5d1 -r 0bf2756898bc source/Lib/TLibEncoder/TEncSearch.h --- a/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 17:53:12 2014 +0900 +++ b/source/Lib/TLibEncoder/TEncSearch.h Mon Aug 25 16:41:31 2014 -0500 @@ -147,7 +147,7 @@ /// encode residual and compute rd-cost for inter mode void encodeResAndCalcRdInterCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, ShortYuv* resiYuv, ShortYuv* bestResiYuv, - TComYuv* reconYuv); + TComYuv* reconYuv, TComDataCU* tmpCu); void encodeResAndCalcRdSkipCU(TComDataCU* cu, TComYuv* fencYuv, TComYuv* predYuv, TComYuv* reconYuv); void xRecurIntraCodingQT(TComDataCU* cu, uint32_t trDepth, uint32_t absPartIdx, TComYuv* fencYuv, diff -r 5acfb12ec5d1 -r 0bf2756898bc source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Mon Aug 25 17:53:12 2014 +0900 +++ b/source/encoder/analysis.cpp Mon Aug 25 16:41:31 2014 -0500 @@ -82,7 +82,7 @@ uint32_t sizeL = cuSize * cuSize; uint32_t sizeC = sizeL (CHROMA_H_SHIFT(csp) + CHROMA_V_SHIFT(csp)); -ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 8, tqBypass); +ok = m_memPool[i].initialize(numPartitions, sizeL, sizeC, 9, tqBypass); m_interCU_2Nx2N[i] = new TComDataCU; m_interCU_2Nx2N[i]-create(m_memPool[i], numPartitions, cuSize, csp, 0, tqBypass); @@ -108,6 +108,9 @@ m_tempCU[i] = new TComDataCU; m_tempCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 7, tqBypass); +m_tempLosslessCU[i] = new TComDataCU; +m_tempLosslessCU[i]-create(m_memPool[i], numPartitions, cuSize, csp, 8, tqBypass); + m_bestPredYuv[i] = new TComYuv; ok = m_bestPredYuv[i]-create(cuSize, cuSize, csp); @@ -158,6 +161,7 @@ delete m_bestMergeCU[i]; delete m_bestCU[i]; delete m_tempCU[i]; +delete m_tempLosslessCU[i]; if (m_bestPredYuv m_bestPredYuv[i]) { @@ -240,6 +244,7 @@ // initialize CU data m_bestCU[0]-initCU(cu-m_pic, cu-getAddr()); m_tempCU[0]-initCU(cu-m_pic, cu-getAddr()); +m_tempLosslessCU[0]-initCU(cu-m_pic, cu-getAddr()); // analysis of CU uint32_t numPartition = cu-getTotalNumPart(); @@ -394,6 +399,7 @@ uint32_tnextDepth = depth + 1; TComDataCU* subBestPartCU = m_bestCU[nextDepth]; TComDataCU* subTempPartCU = m_tempCU[nextDepth]; +TComDataCU* subTempLosslessPartCU = m_tempLosslessCU[nextDepth]; for (uint32_t partUnitIdx = 0; partUnitIdx 4; partUnitIdx++) { int qp = outTempCU-getQP(0); @@ -404,6 +410,7 @@ (subBestPartCU-getCUPelY() slice-m_sps-picHeightInLumaSamples))) { subTempPartCU-initSubCU(outTempCU, partUnitIdx, nextDepth, qp); // clear sub partition datas or init. +
[x265] [PATCH 3 of 3] encoder: re-enable --cu-lossless
# HG changeset patch # User Steve Borho st...@borho.org # Date 1409004465 18000 # Mon Aug 25 17:07:45 2014 -0500 # Node ID 3aa688b03a0b71af6dfde9feb6b0d83178388fd1 # Parent 7e5709385119bb636abd2d198e40a243f1c2491f encoder: re-enable --cu-lossless diff -r 7e5709385119 -r 3aa688b03a0b source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Aug 25 16:41:36 2014 -0500 +++ b/source/encoder/encoder.cppMon Aug 25 17:07:45 2014 -0500 @@ -1244,11 +1244,6 @@ { x265_log(p, X265_LOG_INFO, Warning: picture-based SAO used with frame parallelism\n); } -if (p-bCULossless) -{ -x265_log(p, X265_LOG_WARNING, CU-Lossless is disabled in this release of x265\n); -p-bCULossless = 0; -} if (p-keyframeMax 0) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 2 of 3] search: fix decoder intra crash with --cu-lossless
# HG changeset patch # User Min Chen chenm...@163.com # Date 1409002896 18000 # Mon Aug 25 16:41:36 2014 -0500 # Node ID 7e5709385119bb636abd2d198e40a243f1c2491f # Parent 0bf2756898bc78e5660a6b607b2f3cda97834264 search: fix decoder intra crash with --cu-lossless diff -r 0bf2756898bc -r 7e5709385119 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:31 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Mon Aug 25 16:41:36 2014 -0500 @@ -572,6 +572,10 @@ bool checkTQbypass = cu-m_slice-m_pps-bTransquantBypassEnabled !m_param-bLossless; +// NOTE: transform_quant_bypass just at cu level +if ((cu-m_slice-m_pps-bTransquantBypassEnabled) cu-getCUTransquantBypass(0) != checkTQbypass) +checkTQbypass = cu-getCUTransquantBypass(0) !m_param-bLossless; + uint32_t stride = fencYuv-getStride(); pixel* pred = predYuv-getLumaAddr(absPartIdx); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] Removed redundant code
On 08/25, dave wrote: On 08/25/2014 01:36 PM, Steve Borho wrote: On 08/25, dtyx...@gmail.com wrote: # HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1408983545 25200 # Node ID fa3c389b255b8299bf75b7dfdab145dfbdc3de40 # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 Removed redundant code if I do an encode with --preset slower, the outputs change with this patch. is this intentional? It was not. I only tested on a few vids with -I 1 to force intra prediction. It produced bit-for bit identical results in less time. I will look into --preset slower differences. I see, This particular function is only used for RD levels 4, 5, and 6 -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx]
Ashok is already working on pre-calculating these inside-picture flags along with more refactors. After his refactors are in, we can check whether padding will improve performance. In fact, very likely he already has a local version of the logic in this patch. On Mon, Aug 25, 2014 at 10:46 PM, Steve Borho st...@borho.org wrote: On 08/25, Satoshi Nakagawa wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1408956792 -32400 # Mon Aug 25 17:53:12 2014 +0900 # Node ID 7145e57c722a94a06faec33e3041442032a1892f # Parent 6e6756f94b27c3ef30f6159f1880112a7ff978e3 replace g_rasterToPelX[g_zscanToRaster[idx]] by g_zscanToPelX[idx] Queued for default, thanks. There seems to be a lot of logic that checks for 'inside picture bounds'. It seems like we could save a lot of CPU cycles if we padded input pictures to the max-ctu size instead of the min-ctu size and adjusted the conformance window accordingly. diff -r 6e6756f94b27 -r 7145e57c722a source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cppFri Aug 22 15:53:34 2014 -0500 +++ b/source/Lib/TLibCommon/TComDataCU.cppMon Aug 25 17:53:12 2014 +0900 @@ -816,12 +816,12 @@ TComDataCU* TComDataCU::getPUAboveRight(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx) { +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) +return NULL; + uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + UNIT_SIZE) = m_slice-m_sps-picWidthInLumaSamples) -return NULL; - if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - 1, numPartInCUSize)) { if (!RasterAddress::isZeroRow(absPartIdxRT, numPartInCUSize)) @@ -857,14 +857,11 @@ TComDataCU* TComDataCU::getPUBelowLeft(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) +return NULL; -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + UNIT_SIZE) = m_slice-m_sps-picHeightInLumaSamples) -{ -return NULL; -} - -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - 1, numPartInCUSize)) { @@ -895,15 +892,14 @@ TComDataCU* TComDataCU::getPUBelowLeftAdi(uint32_t blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_rasterToPelY[absPartIdxLB] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelY() + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picHeightInLumaSamples) { return NULL; } -uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); +uint32_t absPartIdxLB= g_zscanToRaster[curPartUnitIdx]; +uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanRow(absPartIdxLB, numPartInCUSize - partUnitOffset, numPartInCUSize)) { @@ -938,14 +934,13 @@ TComDataCU* TComDataCU::getPUAboveRightAdi(uint32_t arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) { -uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; - -if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_rasterToPelX[absPartIdxRT] + (partUnitOffset LOG2_UNIT_SIZE)) = +if ((m_pic-getCU(m_cuAddr)-getCUPelX() + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset LOG2_UNIT_SIZE)) = m_slice-m_sps-picWidthInLumaSamples) { return NULL; } +uint32_t absPartIdxRT= g_zscanToRaster[curPartUnitIdx]; uint32_t numPartInCUSize = m_pic-getNumPartInCUSize(); if (RasterAddress::lessThanCol(absPartIdxRT, numPartInCUSize - partUnitOffset, numPartInCUSize)) @@ -954,7 +949,7 @@ { if (curPartUnitIdx g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]) { -uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; +uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInLCU] + (1 (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; arPartUnitIdx = g_rasterToZscan[absPartIdxRT - numPartInCUSize + partUnitOffset]; if (RasterAddress::isEqualRowOrCol(absPartIdxRT,