Re: [x265] Some warnings about YUV 4:2:2 chroma code
Resolved by patch 6710 (08d64a70594e). -- Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate
# HG changeset patch # User Min Chen chenm...@163.com # Date 1397561438 -28800 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb cleanup: reduce data size and dependency on MotionEstimate diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800 @@ -111,8 +111,6 @@ m_rdCost = rdCost; initTempBuff(cfg-param-internalCsp); -m_me.setSearchMethod(cfg-param-searchMethod); -m_me.setSubpelRefine(cfg-param-subpelRefine); /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */ @@ -2191,7 +2189,7 @@ cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx; motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false); -uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); uint32_t bitsCand = mergeCand + 1; if (mergeCand == m_cfg-param-maxNumMergeCand - 1) { @@ -2314,7 +2312,7 @@ cu-clipMv(mvCand); xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv); -uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS); if (bestCost cost) @@ -2328,11 +2326,11 @@ int merange = m_cfg-param-searchRange; xSetSearchRange(cu, mvp, merange, mvmin, mvmax); -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv); +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, m_cfg-param-subpelRefine); /* Get total cost of partition, but only include MV bit cost once */ -bits += m_me.bitcost(outmv); -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost-getCost(bits); +bits += m_me.bitcost(outmv, mvp); +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost-getCost(bits); /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */ xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost); @@ -2368,7 +2366,7 @@ int partEnum = partitionFromSizes(roiWidth, roiHeight); primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32); -int satdCost = m_me.bufSATD(avg, roiWidth); +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]); bidirCost = satdCost + m_rdCost-getCost(bidirBits); @@ -2397,17 +2395,15 @@ intptr_t refStride = m_mref[0][0]-lumaStride; primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32); -satdCost = m_me.bufSATD(avg, roiWidth); +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); MV mvp0 = list[0].mvp; int mvpIdx0 = list[0].mvpIdx; -m_me.setMVP(mvp0); -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero); +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0); MV mvp1 = list[1].mvp; int mvpIdx1 = list[1].mvpIdx; -m_me.setMVP(mvp1); -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero); +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1); uint32_t cost = satdCost + m_rdCost-getCost(bits0) + m_rdCost-getCost(bits1); @@ -2556,9 +2552,8 @@ { assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred); -m_me.setMVP(mvPred); int bestMvpIdx = outMvpIdx; -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS; +int mvBitsOrig = m_me.bitcost(mv, mvPred) + MVP_IDX_BITS; int bestMvBits = mvBitsOrig; for (int mvpIdx = 0; mvpIdx AMVP_MAX_NUM_CANDS; mvpIdx++) @@
Re: [x265] fix: constrained intra
Thanks, verified and pushed. Do you mind including some more information on this option to the docs? This only affects intra decisions in inter-slices, I think? Deepthi On Tue, Apr 15, 2014 at 10:16 AM, Satoshi Nakagawa nakagawa...@oki.comwrote: Was it generating hash mistakes without these fixes? Yes, when encoding with --constraind-intra option. -Original Message- From: x265-devel [mailto:x265-devel-boun...@videolan.org] On Behalf Of Steve Borho Sent: Tuesday, April 15, 2014 1:29 PM To: Development for x265 Subject: Re: [x265] fix: constrained intra On Mon, Apr 14, 2014 at 10:36 PM, Satoshi Nakagawa nakagawa...@oki.com wrote: # HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1397532876 -32400 # Tue Apr 15 12:34:36 2014 +0900 # Node ID 5dde9f4817813e96116df5b86925fa3fc5eff2a8 # Parent 08d64a70594ed31cd80046bd4a7e9fa52119be47 fix: constrained intra Was it generating hash mistakes without these fixes? diff -r 08d64a70594e -r 5dde9f481781 source/Lib/TLibCommon/TComPattern.cpp --- a/source/Lib/TLibCommon/TComPattern.cpp Mon Apr 14 13:18:18 2014 -0500 +++ b/source/Lib/TLibCommon/TComPattern.cpp Tue Apr 15 12:34:36 2014 +0900 @@ -79,12 +79,24 @@ int leftUnits = cuHeightInUnits 1; partIdxLB= g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)]; -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred()) +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +} +else +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailableCIP(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailableCIP(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailableCIP(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +} width = cuWidth2 + 1; height = cuHeight2 + 1; @@ -238,12 +250,24 @@ int leftUnits = cuHeightInUnits 1; partIdxLB= g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((cuHeightInUnits - 1) * partIdxStride)]; -bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); -numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); -numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); -numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); -numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); -numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1 - cuHeightInUnits)); +if (!cu-getSlice()-getPPS()-getConstrainedIntraPred()) +{ +bNeighborFlags[leftUnits] = isAboveLeftAvailable(cu, partIdxLT); +numIntraNeighbor += (int)(bNeighborFlags[leftUnits]); +numIntraNeighbor += isAboveAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1)); +numIntraNeighbor += isAboveRightAvailable(cu, partIdxLT, partIdxRT, (bNeighborFlags + leftUnits + 1 + cuWidthInUnits)); +numIntraNeighbor += isLeftAvailable(cu, partIdxLT, partIdxLB, (bNeighborFlags + leftUnits - 1)); +numIntraNeighbor += isBelowLeftAvailable(cu, partIdxLT,
Re: [x265] fix: constrained intra
On Tue, Apr 15, 2014 at 11:26 AM, Deepthi Nandakumar deep...@multicorewareinc.com wrote: Thanks, verified and pushed. Do you mind including some more information on this option to the docs? This only affects intra decisions in inter-slices, I think? As far as I know, constrained intra in HEVC is the same as it was in AVC. When generating intra predictions in inter blocks, only intra-coded reference pixels are used. Inter-coded reference pixels are replaced with intra-coded neighbor pixels or default values. The general idea is to block the propagation of reference errors that may have resulted from lossy signals. If anyone has a better description, please speak up -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered
On Tue, Apr 15, 2014 at 11:34 AM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1397579661 -19800 # Tue Apr 15 22:04:21 2014 +0530 # Node ID bf48002755a3f5593732ca039ad38a3c799da808 # Parent 0a95a6bb0f8e71d7a7d0f8e3803ec2878ad558fe vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered. diff -r 0a95a6bb0f8e -r bf48002755a3 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Tue Apr 15 12:34:36 2014 +0900 +++ b/source/encoder/frameencoder.cpp Tue Apr 15 22:04:21 2014 +0530 @@ -1192,6 +1192,8 @@ m_pic-m_qpaRc[r] = 0; m_pic-m_rowEncodedBits[r] = 0; m_pic-m_numEncodedCusPerRow[r] = 0; +m_pic-m_rowDiagSatd[r] = 0; +m_pic-m_rowDiagIntraSatd[r] = 0; } m_bAllRowsStop = false; @@ -1248,6 +1250,13 @@ x265_emms(); double qp = baseQp; +/*clear cuCostsForVbv when vbv row reset is triggered. */ +if (m_pic-m_cuCostsForVbv[cuAddr] 0 || m_pic-m_intraCuCostsForVbv[cuAddr] 0) +{ +m_pic-m_cuCostsForVbv[cuAddr] = 0; +m_pic-m_intraCuCostsForVbv[cuAddr] = 0; +} + Can we just unconditionally clear these two values? -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] h265.h:Proposed replacement of HM's h265 classes
On Fri, Apr 11, 2014 at 5:32 PM, dave dtyx...@gmail.com wrote: Below is my proposed replacement for many of the H265 classes and structures in TComSlice.h. All have at least basic constructor but no methods and most probably won't need additional methods that do much more than set fields which can be added when needed. A few structures have non-H265 fields that at least in HM code appear to be useful for encoding frames. I have configured the classes so the Encoder class would have an instance each H265 class so that when an object is part of multiple H265 structures, only one instance is needed. Where fields are taken from either the Encoder class or x265_param I have used references instead of copying the field or using a pointer. I don't think this will create any concurrency issues since the fields shouldn't need to be changed once they are set but if there are then the references can be replaced with copies. I tried to setup constructors to replace init methods where applicable. I haven't started integrating it into x265, when everyone's happy with it I can start replacing HM's classes with these. The file is also attached to this email Hi Dave, I think the general approach is ok, and this will nicely remove about half of the remaining set/get methods left in our code. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered
On Tue, Apr 15, 2014 at 11:31 PM, Steve Borho st...@borho.org wrote: On Tue, Apr 15, 2014 at 11:34 AM, aar...@multicorewareinc.com wrote: # HG changeset patch # User Aarthi Thirumalai # Date 1397579661 -19800 # Tue Apr 15 22:04:21 2014 +0530 # Node ID bf48002755a3f5593732ca039ad38a3c799da808 # Parent 0a95a6bb0f8e71d7a7d0f8e3803ec2878ad558fe vbv: clear row diagonal Satd costs, cu Satd costs when vbv row reset is triggered. diff -r 0a95a6bb0f8e -r bf48002755a3 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Tue Apr 15 12:34:36 2014 +0900 +++ b/source/encoder/frameencoder.cpp Tue Apr 15 22:04:21 2014 +0530 @@ -1192,6 +1192,8 @@ m_pic-m_qpaRc[r] = 0; m_pic-m_rowEncodedBits[r] = 0; m_pic-m_numEncodedCusPerRow[r] = 0; +m_pic-m_rowDiagSatd[r] = 0; +m_pic-m_rowDiagIntraSatd[r] = 0; } m_bAllRowsStop = false; @@ -1248,6 +1250,13 @@ x265_emms(); double qp = baseQp; +/*clear cuCostsForVbv when vbv row reset is triggered. */ +if (m_pic-m_cuCostsForVbv[cuAddr] 0 || m_pic-m_intraCuCostsForVbv[cuAddr] 0) +{ +m_pic-m_cuCostsForVbv[cuAddr] = 0; +m_pic-m_intraCuCostsForVbv[cuAddr] = 0; +} + Can we just unconditionally clear these two values? yea, we can remove the if condition and clear them always.. they will be normally be 0 at this point unless the cus are again encoded after vbv row resets in which case, we need to clear them necessarily. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] cleanup: reduce data size and dependency on MotionEstimate
On Tue, Apr 15, 2014 at 6:30 AM, Min Chen chenm...@163.com wrote: # HG changeset patch # User Min Chen chenm...@163.com # Date 1397561438 -28800 # Node ID dd78d554f78dd785cb8b16a6606b5fe6b6e87e2a # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb cleanup: reduce data size and dependency on MotionEstimate diff -r 1cf67a7b362d -r dd78d554f78d source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Mon Apr 14 21:26:37 2014 -0500 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Tue Apr 15 19:30:38 2014 +0800 @@ -111,8 +111,6 @@ m_rdCost = rdCost; initTempBuff(cfg-param-internalCsp); -m_me.setSearchMethod(cfg-param-searchMethod); -m_me.setSubpelRefine(cfg-param-subpelRefine); /* When frame parallelism is active, only 'refLagPixels' of reference frames will be guaranteed * available for motion reference. See refLagRows in FrameEncoder::compressCTURows() */ @@ -2191,7 +2189,7 @@ cu-getCUMvField(REF_PIC_LIST_1)-m_refIdx[m.absPartIdx] = m.mvFieldNeighbours[1 + 2 * mergeCand].refIdx; motionCompensation(cu, m_predTempYuv, REF_PIC_LIST_X, puIdx, true, false); -uint32_t costCand = m_me.bufSATD(m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); +uint32_t costCand = m_me.satd(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(m.absPartIdx), m_predTempYuv.getStride()); uint32_t bitsCand = mergeCand + 1; if (mergeCand == m_cfg-param-maxNumMergeCand - 1) { @@ -2314,7 +2312,7 @@ cu-clipMv(mvCand); xPredInterLumaBlk(cu, cu-getSlice()-getRefPic(l, ref)-getPicYuvRec(), partAddr, mvCand, roiWidth, roiHeight, m_predTempYuv); -uint32_t cost = m_me.bufSAD(m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); +uint32_t cost = m_me.sad(m_me.fenc, FENC_STRIDE, m_predTempYuv.getLumaAddr(partAddr), m_predTempYuv.getStride()); cost = m_rdCost-calcRdSADCost(cost, MVP_IDX_BITS); if (bestCost cost) @@ -2328,11 +2326,11 @@ int merange = m_cfg-param-searchRange; xSetSearchRange(cu, mvp, merange, mvmin, mvmax); -int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv); +int satdCost = m_me.motionEstimate(m_mref[l][ref], mvmin, mvmax, mvp, numMvc, mvc, merange, outmv, m_cfg-param-searchMethod, m_cfg-param-subpelRefine); /* Get total cost of partition, but only include MV bit cost once */ -bits += m_me.bitcost(outmv); -uint32_t cost = (satdCost - m_me.mvcost(outmv)) + m_rdCost-getCost(bits); +bits += m_me.bitcost(outmv, mvp); +uint32_t cost = (satdCost - m_me.mvcost(outmv, mvp)) + m_rdCost-getCost(bits); /* Refine MVP selection, updates: mvp, mvpIdx, bits, cost */ xCheckBestMVP(amvpInfo[l][ref], outmv, mvp, mvpIdx, bits, cost); @@ -2368,7 +2366,7 @@ int partEnum = partitionFromSizes(roiWidth, roiHeight); primitives.pixelavg_pp[partEnum](avg, roiWidth, pred0, m_predYuv[0].getStride(), pred1, m_predYuv[1].getStride(), 32); -int satdCost = m_me.bufSATD(avg, roiWidth); +int satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); bidirBits = list[0].bits + list[1].bits + listSelBits[2] - (listSelBits[0] + listSelBits[1]); bidirCost = satdCost + m_rdCost-getCost(bidirBits); @@ -2397,17 +2395,15 @@ intptr_t refStride = m_mref[0][0]-lumaStride; primitives.pixelavg_pp[partEnum](avg, roiWidth, ref0, refStride, ref1, refStride, 32); -satdCost = m_me.bufSATD(avg, roiWidth); +satdCost = m_me.satd(m_me.fenc, FENC_STRIDE, avg, roiWidth); MV mvp0 = list[0].mvp; int mvpIdx0 = list[0].mvpIdx; -m_me.setMVP(mvp0); -uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv) + m_me.bitcost(mvzero); +uint32_t bits0 = list[0].bits - m_me.bitcost(list[0].mv, mvp0) + m_me.bitcost(mvzero, mvp0); MV mvp1 = list[1].mvp; int mvpIdx1 = list[1].mvpIdx; -m_me.setMVP(mvp1); -uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv) + m_me.bitcost(mvzero); +uint32_t bits1 = list[1].bits - m_me.bitcost(list[1].mv, mvp1) + m_me.bitcost(mvzero, mvp1); uint32_t cost = satdCost + m_rdCost-getCost(bits0) + m_rdCost-getCost(bits1); @@ -2556,9 +2552,8 @@ { assert(amvpInfo-m_mvCand[outMvpIdx] == mvPred); -m_me.setMVP(mvPred); int bestMvpIdx = outMvpIdx; -int mvBitsOrig = m_me.bitcost(mv) + MVP_IDX_BITS; +int mvBitsOrig =
Re: [x265] ASM crash in r6706
On Tue, Apr 15, 2014 at 11:25 AM, Robert Bacs robert.b...@rapidsolution.ro wrote: I followed these steps to build x264: http://forum.videohelp.com/threads/357754-%5BHEVC%5D-x265-EXE-mingw-builds I'm using the following command line to test the CLI: x265.exe --input in.y4m --output out.hevc and below is the resulting call-stack: #0 0x0051995e in x265_quant_sse4 () #1 0x00603ba7 in x265::TComTrQuant::getTUEntropyCodingParameters (cu= 0x5c32d98, result=..., absPartIdx=96491840, log2TrSize=16, ttype=x265::TEXT_CHROMA) at E:/temp/x265_clean/source/Lib/TLibCommon/TComTrQuant.h:171 #2 0x005b5da8 in x265::TComTrQuant::transformNxN (this=0x54ea800, cu=0x5c32d98, residual=0x5c8faa0, stride=32, coeff=0x5c05940, trSize=16, ttype=x265::TEXT_CHROMA, absPartIdx=0, lastPos=0x2def748, useTransformSkip=false, curUseRDOQ=true) at e:/temp/x265_clean/source/Lib/TLibCommon/TComTrQuant.cpp:349 #3 0x0059c289 in x265::TEncSearch::xIntraCodingChromaBlk (this=0x54ea518, cu=0x5c32d98, trDepth=1, absPartIdx=0, absPartIdxStep=64, fencYuv= 0x5c93d38, predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0, chromaId=1, bReusePred=false) at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:579 #4 0x0059f6a2 in x265::TEncSearch::xRecurIntraChromaCodingQT ( this=0x54ea518, cu=0x5c32d98, trDepth=1, absPartIdx=0, fencYuv=0x5c93d38, predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0) at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:1304 #5 0x0059f7a6 in x265::TEncSearch::xRecurIntraChromaCodingQT ( this=0x54ea518, cu=0x5c32d98, trDepth=0, absPartIdx=0, fencYuv=0x5c93d38, predYuv=0x5c82a58, resiYuv=0x5c8da00, outDist=@0x2defab4: 0) at e:/temp/x265_clean/source/Lib/TLibEncoder/TEncSearch.cpp:1323 #6 0x005a in x265::TEncSearch::estIntraPredChromaQT (this=0x54ea518, Is your source video publicly available? -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] ASM crash in r6706
On Tue, Apr 15, 2014 at 2:07 PM, Robert Bacs robert.b...@rapidsolution.ro wrote: On 4/15/2014 9:16 PM, Steve Borho wrote: Is your source video publicly available? My source was a y4m created with ffmpeg, but you can use this one http://media.xiph.org/video/derf/y4m/bowing_cif.y4m, I just tried and got a similar call-stack: Thanks, can you paste the output log data as well? That would tell me the version of GCC and other build-specific info. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] ASM crash in r6706
On 4/15/2014 10:24 PM, Steve Borho wrote: Thanks, can you paste the output log data as well? That would tell me the version of GCC and other build-specific info. Below is the output log: [New Thread 8776.0x1c7c] y4m [info]: 352x288 fps 3/1001 i420 sar 128:117 frames 0 - 299 of 300 [New Thread 8776.0x1ec0] x265 [info]: HEVC encoder version 0.9+48-9f0f011294bd x265 [info]: build info [Windows][GCC 4.8.2][32 bit] 8bpp x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 [New Thread 8776.0x1840] [New Thread 8776.0x1a54] [New Thread 8776.0x26f4] [New Thread 8776.0x838] [New Thread 8776.0x205c] [New Thread 8776.0x25e4] [New Thread 8776.0x1820] [New Thread 8776.0x20dc] x265 [info]: WPP streams / pool / frames : 5 / 8 / 3 x265 [info]: Main profile, Level-2 (Main tier) x265 [info]: CU size : 64 x265 [info]: Max RQT depth inter / intra : 1 / 1 x265 [info]: ME / range / subpel / merge : hex / 57 / 2 / 2 x265 [info]: Keyframe min / max / scenecut : 25 / 250 / 40 x265 [info]: Lookahead / bframes / badapt: 20 / 4 / 2 x265 [info]: b-pyramid / weightp / weightb / refs: 1 / 1 / 0 / 3 x265 [info]: Rate Control / AQ-Strength / CUTree : CRF-28.0 / 1.0 / 1 x265 [info]: tools: rect amp rd=3 lft sao-lcu sign-hide [New Thread 8776.0x25b0] [New Thread 8776.0x14c4] [New Thread 8776.0x134c] Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 8776.0x1840] 0x0051995e in x265_quant_sse4 () ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] ASM crash in r6706
On Tue, Apr 15, 2014 at 2:41 PM, Robert Bacs robert.b...@rapidsolution.ro wrote: On 4/15/2014 10:24 PM, Steve Borho wrote: Thanks, can you paste the output log data as well? That would tell me the version of GCC and other build-specific info. Below is the output log: [New Thread 8776.0x1c7c] y4m [info]: 352x288 fps 3/1001 i420 sar 128:117 frames 0 - 299 of 300 [New Thread 8776.0x1ec0] x265 [info]: HEVC encoder version 0.9+48-9f0f011294bd x265 [info]: build info [Windows][GCC 4.8.2][32 bit] 8bpp x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 Thanks, this is all useful. It looks like perhaps a Win32 issue. My guess is that if you compiled for Win64 this problem would go away. We should have this fixed soon, though. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] ASM crash in r6706
On 4/15/2014 11:28 PM, Steve Borho wrote: Thanks, this is all useful. It looks like perhaps a Win32 issue. My guess is that if you compiled for Win64 this problem would go away. We should have this fixed soon, though. yes, you're right...I just tried 64-bit version and it works fine. Thanks. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance
# HG changeset patch # User Min Chen chenm...@163.com # Date 1397616580 -28800 # Node ID 1a8b54ce0dfa8eba524c4cadc81939710054ae44 # Parent 1cf67a7b362d24d292d7cca574cbcfe88a8eb1cb alignment DCT8's stack to 64-bytes to avoid crash and improvement cache performance diff -r 1cf67a7b362d -r 1a8b54ce0dfa source/common/x86/dct8.asm --- a/source/common/x86/dct8.asmMon Apr 14 21:26:37 2014 -0500 +++ b/source/common/x86/dct8.asmWed Apr 16 10:49:40 2014 +0800 @@ -834,8 +834,14 @@ ret -cglobal idct8, 3,7,8,0-16*mmsize +cglobal idct8, 3,7,8 ;,0-16*mmsize +; alignment stack to 64-bytes mov r5, rsp +sub rsp, 16*mmsize + gprsize +and rsp, ~(64-1) +mov [rsp + 16*mmsize], r5 +mov r5, rsp + lea r4, [tab_idct8_3] lea r6, [tab_dct4] @@ -866,4 +872,7 @@ callpatial_butterfly_inverse_internal_pass2 +; restore origin stack pointer +mov rsp, [rsp + 16*mmsize] + RET ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] ASM crash in r6706
On Tue, Apr 15, 2014 at 9:31 PM, chen chenm...@163.com wrote: Are you defined HAVE_ALIGNED_STACK=1 in your configure? The x86 version quant use stack to store temporary data with movdqa Could you give me your exe file? so I may check the really assembly code. Thanks! GCC builds always set HAVE_ALIGNED_STACK=1 If we need to disable this for Win32, it would not be hard. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel