[x265] [PATCH] asm-16bpp: code for addAvg luma and chroma all sizes
# HG changeset patch # User Dnyaneshwar G dnyanesh...@multicorewareinc.com # Date 1392807092 -19800 # Wed Feb 19 16:21:32 2014 +0530 # Node ID cede20cde62ba0a96ac181bcf78a508097de0e7c # Parent 6150985c3d535f0ea7a1dc0b8f3c69e65e30d25b asm-16bpp: code for addAvg luma and chroma all sizes diff -r 6150985c3d53 -r cede20cde62b source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Wed Feb 19 12:21:13 2014 +0530 +++ b/source/common/x86/asm-primitives.cpp Wed Feb 19 16:21:32 2014 +0530 @@ -679,10 +679,13 @@ p.chroma[X265_CSP_I420].addAvg[CHROMA_ ## W ## x ## H] = x265_addAvg_ ## W ## x ## H ## cpu; #define CHROMA_ADDAVG(cpu) \ +SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 4, cpu); \ +SETUP_CHROMA_ADDAVG_FUNC_DEF(2, 8, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 2, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 4, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 8, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(4, 16, cpu); \ +SETUP_CHROMA_ADDAVG_FUNC_DEF(6, 8, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 2, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 4, cpu); \ SETUP_CHROMA_ADDAVG_FUNC_DEF(8, 6, cpu); \ @@ -831,6 +834,9 @@ } if (cpuMask X265_CPU_SSE4) { +LUMA_ADDAVG(_sse4); +CHROMA_ADDAVG(_sse4); + p.dct[DCT_8x8] = x265_dct8_sse4; p.quant = x265_quant_sse4; p.dequant_normal = x265_dequant_normal_sse4; @@ -1330,10 +1336,6 @@ SETUP_INTRA_ANG32(33, 33, sse4); p.dct[DCT_8x8] = x265_dct8_sse4; - -p.chroma[X265_CSP_I420].addAvg[CHROMA_2x4] = x265_addAvg_2x4_sse4; -p.chroma[X265_CSP_I420].addAvg[CHROMA_2x8] = x265_addAvg_2x8_sse4; -p.chroma[X265_CSP_I420].addAvg[CHROMA_6x8] = x265_addAvg_6x8_sse4; } if (cpuMask X265_CPU_AVX) { diff -r 6150985c3d53 -r cede20cde62b source/common/x86/const-a.asm --- a/source/common/x86/const-a.asm Wed Feb 19 12:21:13 2014 +0530 +++ b/source/common/x86/const-a.asm Wed Feb 19 16:21:32 2014 +0530 @@ -36,8 +36,10 @@ const pw_128, times 16 dw 128 const pw_256, times 16 dw 256 const pw_512, times 16 dw 512 +const pw_1023, times 8 dw 1023 const pw_1024, times 16 dw 1024 const pw_4096, times 16 dw 4096 +const pw_16400,times 8 dw 16400 const pw_00ff, times 16 dw 0x00ff const pw_pixel_max,times 16 dw ((1 BIT_DEPTH)-1) const deinterleave_shufd, dd 0,4,1,5,2,6,3,7 diff -r 6150985c3d53 -r cede20cde62b source/common/x86/intrapred16.asm --- a/source/common/x86/intrapred16.asm Wed Feb 19 12:21:13 2014 +0530 +++ b/source/common/x86/intrapred16.asm Wed Feb 19 16:21:32 2014 +0530 @@ -45,7 +45,6 @@ const c_mode32_10_0,db 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 const pw_unpackwdq, times 8 db 0,1 -const pw_1023, times 8 dw 1023 const pw_ang8_12, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 0, 1 const pw_ang8_13, db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 8, 9, 0, 1 const pw_ang8_14, db 0, 0, 0, 0, 0, 0, 0, 0, 14, 15, 10, 11, 4, 5, 0, 1 @@ -58,6 +57,7 @@ cextern pw_1 cextern pw_8 +cextern pw_1023 cextern pd_16 cextern pd_32 cextern pw_4096 diff -r 6150985c3d53 -r cede20cde62b source/common/x86/mc-a.asm --- a/source/common/x86/mc-a.asmWed Feb 19 12:21:13 2014 +0530 +++ b/source/common/x86/mc-a.asmWed Feb 19 16:21:32 2014 +0530 @@ -52,6 +52,9 @@ cextern pw_128 cextern pw_256 cextern pw_512 +cextern pw_1023 +cextern pw_1024 +cextern pw_16400 cextern pw_00ff cextern pw_pixel_max cextern sw_64 @@ -65,6 +68,873 @@ ; r2 = pDst, r3 = iStride0 ; r4 = iStride1, r5 = iDstStride +%if HIGH_BIT_DEPTH +INIT_XMM sse4 +cglobal addAvg_2x4, 6,7,8, pSrc0, pSrc1, pDst, iStride0, iStride1, iDstStride +mova m7, [pw_16400] +mova m0, [pw_1023] +add r3, r3 +add r4, r4 +add r5, r5 + +movd m1, [r0] +movd m2, [r0 + r3] +movd m3, [r1] +movd m4, [r1 + r4] + +punpckldq m1, m2 +punpckldq m3, m4 + +lea r0, [r0 + 2 * r3] +lea r1, [r1 + 2 * r4] + +movd m2, [r0] +movd m4, [r0 + r3] +movd m5, [r1] +movd m6, [r1 + r4] + +punpckldq m2, m4 +punpckldq m5, m6 +punpcklqdqm1, m2 +punpcklqdqm3, m5 + +paddw m1, m3 +paddw m1, m7 +psraw m1, 5 +pxor m6, m6 +pmaxswm1, m6 +pminswm1, m0 + +movd [r2],m1 +pextrd[r2 + r5], m1, 1 +lea r2, [r2 + 2 * r5] +pextrd[r2],m1, 2 +pextrd[r2 + r5], m1, 3 + +
[x265] [PATCH] fix for 420 binary mismatch for --preset=slower option
# HG changeset patch # User as...@multicorewareinc.com # Date 1392807860 -19800 # Wed Feb 19 16:34:20 2014 +0530 # Node ID f0e4f6aa075587f715a7cd48ef63f97d56caa21a # Parent 8571d160aedb00e07a3f47016f04d8d9aeaa5856 fix for 420 binary mismatch for --preset=slower option diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -2852,7 +2852,7 @@ + (partWidth / m_pic-getMinCUWidth()) / 2]; } -uint32_t TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra) +uint32_t TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t width, uint32_t height, bool bIsLuma, bool bIsIntra) { uint32_t scanIdx; uint32_t dirMode; @@ -2863,7 +2863,6 @@ } //check that MDCS can be used for this TU -uint32_t height = width; if (bIsLuma) { const uint32_t maximumWidth = MDCS_MAXIMUM_WIDTH; diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hTue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComDataCU.hWed Feb 19 16:34:20 2014 +0530 @@ -473,7 +473,7 @@ uint32_t getTotalNumPart() { return m_numPartitions; } -uint32_t getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra); +uint32_t getCoefScanIdx(uint32_t absPartIdx, uint32_t width, uint32_t height, bool bIsLuma, bool bIsIntra); // --- // member functions to support multiple color space formats diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -502,7 +502,7 @@ const uint32_t log2BlockWidth = g_convertToBit[width] + 2; const uint32_t log2BlockHeight = g_convertToBit[height] + 2; -result.scanType = COEFF_SCAN_TYPE(cu-getCoefScanIdx(absPartIdx, width, ttype == TEXT_LUMA, cu-isIntra(absPartIdx))); +result.scanType = COEFF_SCAN_TYPE(cu-getCoefScanIdx(absPartIdx, width, height, ttype == TEXT_LUMA, cu-isIntra(absPartIdx))); //set the group layout result.widthInGroups = width MLS_CG_LOG2_WIDTH; @@ -516,19 +516,20 @@ result.scanCG = g_scanOrder[SCAN_UNGROUPED][result.scanType][log2WidthInGroups][log2HeightInGroups]; //set the significance map context selection parameters +TextType ctype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA; if ((width == 4) (height == 4)) { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_4x4]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_4x4]; } else if ((width == 8) (height == 8)) { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_8x8]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_8x8]; if (result.scanType != SCAN_DIAG) -result.firstSignificanceMapContext += nonDiagonalScan8x8ContextOffset[ttype]; +result.firstSignificanceMapContext += nonDiagonalScan8x8ContextOffset[ctype]; } else { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_NxN]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_NxN]; } } @@ -1124,8 +1125,8 @@ } const bool notFirstGroup = ((posX MLS_CG_LOG2_WIDTH) + (posY MLS_CG_LOG2_HEIGHT)) 0; - -offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[ttype] : 0) + cnt; +TextType ctype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA; +offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[ctype] : 0) + cnt; } return codingParameters.firstSignificanceMapContext + offset; } diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibEncoder/TEncSbac.cpp --- a/source/Lib/TLibEncoder/TEncSbac.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -942,7 +942,7 @@ void TEncSbac::xCodeScalingList(TComScalingList* scalingList, uint32_t sizeId, uint32_t listId) { int coefNum = X265_MIN(MAX_MATRIX_COEF_NUM, (int)g_scalingListSize[sizeId]); -const uint32_t* scan = (sizeId == 0) ? g_sigLastScan[SCAN_DIAG][1] : g_sigLastScanCG32x32; +const uint32_t* scan = g_scanOrder[SCAN_UNGROUPED][SCAN_DIAG][sizeId==0 ? 2 : 3][sizeId==0 ? 2 : 3]; int nextCoef =
Re: [x265] APPCRASH in x265 0.7+207 while encoding in preset 'slow' or slower...
Am 18.02.2014, 14:03 Uhr, schrieb Mario *LigH* Rohkrämer cont...@ligh.de: I ran a loop of encodes through all presets (all default options) with Sintel Trailer in 640x272 as Y4M source (YUV 4:2:0). During all presets {slow..placebo}, x265 0.7+207-1be6b8c8b9ed [GCC 4.8.2, Win64] crashed at different frames, usually around 120/1247, already at 29/1247 for preset placebo. All faster presets passed without crash. Probably fixed by patch 6190 (591ca91f0501)? x265 0.7+216-591ca91f0501 [Windows][GCC 4.8.2][64 bit] 8bpp does not crash anymore in all presets, except placebo (crash during the final statistics summary). But quality in default CRF 28 is now a lot worse, files now even about half the size as before, in presets {fast..placebo}. --preset faster: 544.26 kbps, 20.311 dB SSIM --preset fast: 56.79 kbps, 13.542 dB SSIM --preset slow: 51.78 kbps, 13.493 db SSIM (Sintel trailer, 640x272, no additional options except logging) -- __ Fun and success! Mario *LigH* Rohkrämer mailto:cont...@ligh.de ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] Fixed ENC_DEC_TRACE warnings and errors
# HG changeset patch # User David T Yuen dtyx...@gmail.com # Date 1392832006 28800 # Node ID 1c78bd13a14f3d5227c4b961664af97f86a8810d # Parent 591ca91f0501b167627adc1c9542aebc60dc320a Fixed ENC_DEC_TRACE warnings and errors diff -r 591ca91f0501 -r 1c78bd13a14f source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Wed Feb 19 16:34:20 2014 +0530 +++ b/source/Lib/TLibCommon/TComRom.h Wed Feb 19 09:46:46 2014 -0800 @@ -189,7 +189,7 @@ #define DTRACE_CABAC_F(x) if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %f, x); #define DTRACE_CABAC_V(x) if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %d, x); -#define DTRACE_CABAC_VL(x)if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %lld, x); +#define DTRACE_CABAC_VL(x)if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %ld, x); #define DTRACE_CABAC_T(x) if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %s, x); #define DTRACE_CABAC_X(x) if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, %x, x); #define DTRACE_CABAC_R(x, y) if ((g_nSymbolCounter = COUNTER_START g_nSymbolCounter = COUNTER_END) || g_bJustDoIt) fprintf(g_hTrace, x,y); diff -r 591ca91f0501 -r 1c78bd13a14f source/Lib/TLibEncoder/SyntaxElementWriter.cpp --- a/source/Lib/TLibEncoder/SyntaxElementWriter.cppWed Feb 19 16:34:20 2014 +0530 +++ b/source/Lib/TLibEncoder/SyntaxElementWriter.cppWed Feb 19 09:46:46 2014 -0800 @@ -50,7 +50,7 @@ xWriteCode(value, length); if (g_HLSTraceEnable) { -fprintf(g_hTrace, %8lld , g_nSymbolCounter++); +fprintf(g_hTrace, %8ld , g_nSymbolCounter++); if (length 10) { fprintf(g_hTrace, %-50s u(%d) : %d\n, symbolName, length, value); @@ -67,7 +67,7 @@ xWriteUvlc(value); if (g_HLSTraceEnable) { -fprintf(g_hTrace, %8lld , g_nSymbolCounter++); +fprintf(g_hTrace, %8ld , g_nSymbolCounter++); fprintf(g_hTrace, %-50s ue(v) : %d\n, symbolName, value); } } @@ -77,7 +77,7 @@ xWriteSvlc(value); if (g_HLSTraceEnable) { -fprintf(g_hTrace, %8lld , g_nSymbolCounter++); +fprintf(g_hTrace, %8ld , g_nSymbolCounter++); fprintf(g_hTrace, %-50s se(v) : %d\n, symbolName, value); } } @@ -87,7 +87,7 @@ xWriteFlag(value); if (g_HLSTraceEnable) { -fprintf(g_hTrace, %8lld , g_nSymbolCounter++); +fprintf(g_hTrace, %8ld , g_nSymbolCounter++); fprintf(g_hTrace, %-50s u(1) : %d\n, symbolName, value); } } diff -r 591ca91f0501 -r 1c78bd13a14f source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp --- a/source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp Wed Feb 19 16:34:20 2014 +0530 +++ b/source/Lib/TLibEncoder/TEncBinCoderCABAC.cpp Wed Feb 19 09:46:46 2014 -0800 @@ -172,7 +172,7 @@ { DTRACE_CABAC_VL(g_nSymbolCounter++) DTRACE_CABAC_T(\tstate=) -DTRACE_CABAC_V((ctxModel.getState() 1) + ctxModel.getMps()) +DTRACE_CABAC_V((ctxModel.m_state 1) + sbacGetMps(ctxModel.m_state)) DTRACE_CABAC_T(\tsymbol=) DTRACE_CABAC_V(binValue) DTRACE_CABAC_T(\n) ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] primitives: add count_nonzero
+INIT_XMM sse2 +cglobal count_nonzero, 2,3,4 +pxorm0, m0 +pxorm1, m1 +mov r2d, r1d +shr r1d, 3 + +.loop +movam2, [r0] +movam3, [r0 + 16] +add r0, 32 +packssdwm2, m3, just count, no need it +pcmpeqw m2, m0 +psrlw m2, 15 pcmp generte mask, it is 0x, so we no need to shift right +packsswbm2, m2 +psadbw m2, m0 psad is low perf, why you need exact number in inner loop? of course, abs(-1) = abs(1) +paddd m1, m2 +dec r1d +jnz.loop + +movdr1d, m1 +sub r2d, r1d +mov eax, r2d + +RET ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] asm : asm routine for chroma_p2s for 4:4:4 color space format
At 2014-02-17 20:44:29,naba...@multicorewareinc.com wrote: # HG changeset patch # User Nabajit Deka # Date 1392641037 -19800 # Mon Feb 17 18:13:57 2014 +0530 # Node ID f5275ca8f2985bb0daf563738e6071b81967c2cd # Parent ce96cdb390fe26aee6effa731e51303c1d9056b0 asm : asm routine for chroma_p2s for 4:4:4 color space format +INIT_XMM ssse3 +cglobal chroma_p2s_i444, 3, 7, 4 + +; load width and height +mov r3d, r3m +mov r4d, r4m + +; load constant +movam2, [tab_c_128] +movam3, [tab_c_64_n64] + +.loopH: + +xor r5d, r5d +.loopW: +lea r6, [r0 + r5] + +movhm0, [r6] +punpcklbw m0, m2 +pmaddubsw m0, m3 + +movhm1, [r6 + r1] +punpcklbw m1, m2 +pmaddubsw m1, m3 + +add r5d, 8 +cmp r5d, r3d +lea r6, [r2 + r5 * 2] +jg .width4 +movu[r6 + FENC_STRIDE * 0 - 16], m0 +movu[r6 + FENC_STRIDE * 2 - 16], m1 +je .nextH +jmp .loopW + +.width4: +testr3d, 4 +jz .width2 +testr3d, 2 +movh[r6 + FENC_STRIDE * 0 - 16], m0 +movh[r6 + FENC_STRIDE * 2 - 16], m1 +lea r6, [r6 + 8] +pshufd m0, m0, 2 +pshufd m1, m1, 2 +jz .nextH + +.width2: +movd[r6 + FENC_STRIDE * 0 - 16], m0 +movd[r6 + FENC_STRIDE * 2 - 16], m1 I think YUV444 no need width2 path, please check and confirm it. ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] APPCRASH in x265 0.7+207 while encoding in preset 'slow' or slower...
On Wed, Feb 19, 2014 at 8:03 AM, Mario *LigH* Rohkrämer cont...@ligh.dewrote: Am 18.02.2014, 14:03 Uhr, schrieb Mario *LigH* Rohkrämer cont...@ligh.de : I ran a loop of encodes through all presets (all default options) with Sintel Trailer in 640x272 as Y4M source (YUV 4:2:0). During all presets {slow..placebo}, x265 0.7+207-1be6b8c8b9ed [GCC 4.8.2, Win64] crashed at different frames, usually around 120/1247, already at 29/1247 for preset placebo. All faster presets passed without crash. Probably fixed by patch 6190 (591ca91f0501)? x265 0.7+216-591ca91f0501 [Windows][GCC 4.8.2][64 bit] 8bpp does not crash anymore in all presets, except placebo (crash during the final statistics summary). verified; if you encode about 100 frames at placebo it reports heap corruption at exit. Verified with a debug build in MSVC as well. I'll see if valgrind can catch the root cause. But quality in default CRF 28 is now a lot worse, files now even about half the size as before, in presets {fast..placebo}. --preset faster: 544.26 kbps, 20.311 dB SSIM --preset fast: 56.79 kbps, 13.542 dB SSIM --preset slow: 51.78 kbps, 13.493 db SSIM (Sintel trailer, 640x272, no additional options except logging) will look into this next, thanks for reporting. We currently still have one known hash-mistake bug, reproducible with the sintel 480 clip and preset slower. There's a number of pixels on frame 720 that are off-by one. Seems to be a rounding issue somewhere, we're investigating. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [PATCH] fix for 420 binary mismatch for --preset=slower option
On Wed, Feb 19, 2014 at 5:05 AM, as...@multicorewareinc.com wrote: # HG changeset patch # User as...@multicorewareinc.com You need to configure a full name and email address as your Mercurial commit username # Date 1392807860 -19800 # Wed Feb 19 16:34:20 2014 +0530 # Node ID f0e4f6aa075587f715a7cd48ef63f97d56caa21a # Parent 8571d160aedb00e07a3f47016f04d8d9aeaa5856 fix for 420 binary mismatch for --preset=slower option diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComDataCU.cpp --- a/source/Lib/TLibCommon/TComDataCU.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComDataCU.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -2852,7 +2852,7 @@ + (partWidth / m_pic-getMinCUWidth()) / 2]; } -uint32_t TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra) +uint32_t TComDataCU::getCoefScanIdx(uint32_t absPartIdx, uint32_t width, uint32_t height, bool bIsLuma, bool bIsIntra) { uint32_t scanIdx; uint32_t dirMode; @@ -2863,7 +2863,6 @@ } //check that MDCS can be used for this TU -uint32_t height = width; if (bIsLuma) { const uint32_t maximumWidth = MDCS_MAXIMUM_WIDTH; diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hTue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComDataCU.hWed Feb 19 16:34:20 2014 +0530 @@ -473,7 +473,7 @@ uint32_t getTotalNumPart() { return m_numPartitions; } -uint32_t getCoefScanIdx(uint32_t absPartIdx, uint32_t width, bool bIsLuma, bool bIsIntra); +uint32_t getCoefScanIdx(uint32_t absPartIdx, uint32_t width, uint32_t height, bool bIsLuma, bool bIsIntra); // --- // member functions to support multiple color space formats diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -502,7 +502,7 @@ const uint32_t log2BlockWidth = g_convertToBit[width] + 2; const uint32_t log2BlockHeight = g_convertToBit[height] + 2; -result.scanType = COEFF_SCAN_TYPE(cu-getCoefScanIdx(absPartIdx, width, ttype == TEXT_LUMA, cu-isIntra(absPartIdx))); +result.scanType = COEFF_SCAN_TYPE(cu-getCoefScanIdx(absPartIdx, width, height, ttype == TEXT_LUMA, cu-isIntra(absPartIdx))); //set the group layout result.widthInGroups = width MLS_CG_LOG2_WIDTH; @@ -516,19 +516,20 @@ result.scanCG = g_scanOrder[SCAN_UNGROUPED][result.scanType][log2WidthInGroups][log2HeightInGroups]; //set the significance map context selection parameters +TextType ctype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA; if ((width == 4) (height == 4)) { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_4x4]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_4x4]; } else if ((width == 8) (height == 8)) { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_8x8]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_8x8]; if (result.scanType != SCAN_DIAG) -result.firstSignificanceMapContext += nonDiagonalScan8x8ContextOffset[ttype]; +result.firstSignificanceMapContext += nonDiagonalScan8x8ContextOffset[ctype]; } else { -result.firstSignificanceMapContext = significanceMapContextSetStart[ttype][CONTEXT_TYPE_NxN]; +result.firstSignificanceMapContext = significanceMapContextSetStart[ctype][CONTEXT_TYPE_NxN]; } } @@ -1124,8 +1125,8 @@ } const bool notFirstGroup = ((posX MLS_CG_LOG2_WIDTH) + (posY MLS_CG_LOG2_HEIGHT)) 0; - -offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[ttype] : 0) + cnt; +TextType ctype = ttype == TEXT_LUMA ? TEXT_LUMA : TEXT_CHROMA; +offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[ctype] : 0) + cnt; } return codingParameters.firstSignificanceMapContext + offset; } diff -r 8571d160aedb -r f0e4f6aa0755 source/Lib/TLibEncoder/TEncSbac.cpp --- a/source/Lib/TLibEncoder/TEncSbac.cpp Tue Feb 18 01:43:42 2014 -0600 +++ b/source/Lib/TLibEncoder/TEncSbac.cpp Wed Feb 19 16:34:20 2014 +0530 @@ -942,7 +942,7 @@ void TEncSbac::xCodeScalingList(TComScalingList* scalingList, uint32_t sizeId, uint32_t listId) { int coefNum = X265_MIN(MAX_MATRIX_COEF_NUM, (int)g_scalingListSize[sizeId]); -
Re: [x265] APPCRASH in x265 0.7+207 while encoding in preset 'slow' or slower...
On Wed, Feb 19, 2014 at 1:28 PM, Steve Borho st...@borho.org wrote: On Wed, Feb 19, 2014 at 8:03 AM, Mario *LigH* Rohkrämer cont...@ligh.dewrote: Am 18.02.2014, 14:03 Uhr, schrieb Mario *LigH* Rohkrämer cont...@ligh.de : I ran a loop of encodes through all presets (all default options) with Sintel Trailer in 640x272 as Y4M source (YUV 4:2:0). During all presets {slow..placebo}, x265 0.7+207-1be6b8c8b9ed [GCC 4.8.2, Win64] crashed at different frames, usually around 120/1247, already at 29/1247 for preset placebo. All faster presets passed without crash. Probably fixed by patch 6190 (591ca91f0501)? x265 0.7+216-591ca91f0501 [Windows][GCC 4.8.2][64 bit] 8bpp does not crash anymore in all presets, except placebo (crash during the final statistics summary). verified; if you encode about 100 frames at placebo it reports heap corruption at exit. Verified with a debug build in MSVC as well. I'll see if valgrind can catch the root cause. valgrind finds that transform-skipped chroma blocks are copying too much data; we're still investigating but I expect this will be fixed by tomorrow. But quality in default CRF 28 is now a lot worse, files now even about half the size as before, in presets {fast..placebo}. --preset faster: 544.26 kbps, 20.311 dB SSIM --preset fast: 56.79 kbps, 13.542 dB SSIM --preset slow: 51.78 kbps, 13.493 db SSIM (Sintel trailer, 640x272, no additional options except logging) will look into this next, thanks for reporting. My fault, a fix for this was just pushed. We currently still have one known hash-mistake bug, reproducible with the sintel 480 clip and preset slower. There's a number of pixels on frame 720 that are off-by one. Seems to be a rounding issue somewhere, we're investigating. Hot on the trail of this one. -- Steve Borho ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
Re: [x265] [Bug]reconstruction yuv picture diff with HM decoder out
Hi, We've checked out for hash mismatch for all our hash clips with --rd 2. Can you share the source that caused a mismatch? That will help us identify the issue. Thanks, Deepthi On Mon, Feb 17, 2014 at 12:38 PM, z...@rock-chips.com z...@rock-chips.comwrote: hi, x265 members 1. [Bug report] We found that x265 (version 0.7+2-4b8901ae94ece1ac ) recon yuv data diff with HM decode out when config CQP mode with QP=34 and set rd=0 or 1 or 2,I think it's a serious bug! with command like this --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 0 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 1 --input F:\yuv\Samsung_1080p_25.yuv --input-res 1920x1080 --fps 24 -q 34 -o E:\out1.bin -r E:\rec1.yuv --psnr --rd 2 2. [Proprose] The x265 codec encodes video con-tent using a fixed quantization step, thus leading to a variable bitrate stream which may not be suitable for the many multi-media applications where a constant bandwidth is required. Therefore, maybe adaptive quantization step may be better. I'm looking forward to you thks z...@rock-chips.com ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] reduce addClip
# HG changeset patch # User Satoshi Nakagawa nakagawa...@oki.com # Date 1392872381 -32400 # Thu Feb 20 13:59:41 2014 +0900 # Node ID 588adfc60b27190e5d595611c3d34c49e381d9ae # Parent 3389061b75a486e004409ab628c46fed39d03b72 reduce addClip diff -r 3389061b75a4 -r 588adfc60b27 source/Lib/TLibCommon/TComDataCU.h --- a/source/Lib/TLibCommon/TComDataCU.hWed Feb 19 17:03:21 2014 -0600 +++ b/source/Lib/TLibCommon/TComDataCU.hThu Feb 20 13:59:41 2014 +0900 @@ -312,7 +312,7 @@ void setCbf(uint32_t idx, TextType ttype, UChar uh) { m_cbf[ttype][idx] = uh; } -UChar getQtRootCbf(uint32_t idx) { return getCbf(idx, TEXT_LUMA, 0) || getCbf(idx, TEXT_CHROMA_U, 0) || getCbf(idx, TEXT_CHROMA_V, 0); } +UChar getQtRootCbf(uint32_t idx) { return getCbf(idx, TEXT_LUMA) || getCbf(idx, TEXT_CHROMA_U) || getCbf(idx, TEXT_CHROMA_V); } void setCbfSubParts(uint32_t cbfY, uint32_t cbfU, uint32_t cbfV, uint32_t absPartIdx, uint32_t depth); void setCbfSubParts(uint32_t cbf, TextType ttype, uint32_t absPartIdx, uint32_t depth); diff -r 3389061b75a4 -r 588adfc60b27 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Wed Feb 19 17:03:21 2014 -0600 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Feb 20 13:59:41 2014 +0900 @@ -3210,7 +3210,14 @@ assert(bcost != MAX_INT64); -outReconYuv-addClip(predYuv, outBestResiYuv, 0, width); +if (cu-getQtRootCbf(0)) +{ +outReconYuv-addClip(predYuv, outBestResiYuv, 0, width); +} +else +{ +predYuv-copyToPartYuv(outReconYuv, 0); +} // update with clipped distortion and cost (qp estimation loop uses unclipped values) int part = partitionFromSizes(width, height); @@ -3246,12 +3253,19 @@ { residualTransformQuantInter(cu, 0, 0, resiYuv, cu-getDepth(0), true); uint32_t width = cu-getWidth(0); -reconYuv-addClip(predYuv, resiYuv, 0, width); - -if (cu-getMergeFlag(0) cu-getPartitionSize(0) == SIZE_2Nx2N cu-getQtRootCbf(0) == 0) +if (cu-getQtRootCbf(0)) { -cu-setSkipFlagSubParts(true, 0, cu-getDepth(0)); +reconYuv-addClip(predYuv, resiYuv, 0, width); } +else +{ +predYuv-copyToPartYuv(reconYuv, 0); +if (cu-getMergeFlag(0) cu-getPartitionSize(0) == SIZE_2Nx2N) +{ +cu-setSkipFlagSubParts(true, 0, cu-getDepth(0)); +} +} + } else if (cu-getPredictionMode(0) == MODE_INTRA) { ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH] tcomrom: scaning order table g_sigLastScan replaced with g_scanOrder
# HG changeset patch # User Gopu Govindaswamy # Date 1392921339 28800 # Thu Feb 20 10:35:39 2014 -0800 # Node ID 34886273d14b41d777a9129fc3657aef34d2c986 # Parent 3389061b75a486e004409ab628c46fed39d03b72 tcomrom: scaning order table g_sigLastScan replaced with g_scanOrder Scaning order table initialization moved into initRom() and same scaning order table can be used for both 444 and 420 diff -r 3389061b75a4 -r 34886273d14b source/Lib/TLibCommon/CommonDef.h --- a/source/Lib/TLibCommon/CommonDef.h Wed Feb 19 17:03:21 2014 -0600 +++ b/source/Lib/TLibCommon/CommonDef.h Thu Feb 20 10:35:39 2014 -0800 @@ -108,7 +108,7 @@ #define NUM_CHROMA_MODE 5 // total number of chroma modes #define DM_CHROMA_IDX 36 // chroma mode index for derived from luma intra mode -#define FULL_NBIT 0 /// When enabled, compute costs using full sample bitdepth. When disabled, compute costs as if it is 8-bit source video. +#define FULL_NBIT 1 /// When enabled, compute costs using full sample bitdepth. When disabled, compute costs as if it is 8-bit source video. #if FULL_NBIT || !HIGH_BIT_DEPTH # define DISTORTION_PRECISION_ADJUSTMENT(x) 0 #else diff -r 3389061b75a4 -r 34886273d14b source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Wed Feb 19 17:03:21 2014 -0600 +++ b/source/Lib/TLibCommon/TComRom.cpp Thu Feb 20 10:35:39 2014 -0800 @@ -458,9 +458,6 @@ // Scanning order context model mapping // -// scanning order table -uint32_t* g_sigLastScan[3][MAX_CU_DEPTH]; - const uint32_t g_sigLastScan8x8[3][4] = { { 0, 2, 1, 3 }, @@ -487,131 +484,6 @@ const uint32_t g_goRicePrefixLen[5] = { 8, 7, 6, 5, 4 }; -void initSigLastScan(uint32_t* buffD, uint32_t* buffH, uint32_t* buffV, int width, int height) -{ -const uint32_t numScanPos = uint32_t(width * width); -uint32_tnextScanPos = 0; - -if (width = 4) -{ -for (uint32_t scanLine = 0; nextScanPos numScanPos; scanLine++) -{ -int primDim = int(scanLine); -int scndDim = 0; -while (primDim = width) -{ -scndDim++; -primDim--; -} - -while (primDim = 0 scndDim width) -{ -buffD[nextScanPos] = primDim * width + scndDim; -nextScanPos++; -scndDim++; -primDim--; -} -} -} -if (width 4) -{ -uint32_t numBlkSide = width 2; -uint32_t numBlks= numBlkSide * numBlkSide; -uint32_t log2Blk= g_convertToBit[numBlkSide] + 1; - -for (uint32_t blk = 0; blk numBlks; blk++) -{ -nextScanPos = 0; -uint32_t initBlkPos = g_sigLastScan[SCAN_DIAG][log2Blk][blk]; -if (width == 32) -{ -initBlkPos = g_sigLastScanCG32x32[blk]; -} -uint32_t offsetY= initBlkPos / numBlkSide; -uint32_t offsetX= initBlkPos - offsetY * numBlkSide; -uint32_t offsetD= 4 * (offsetX + offsetY * width); -uint32_t offsetScan = 16 * blk; -for (uint32_t scanLine = 0; nextScanPos 16; scanLine++) -{ -int primDim = int(scanLine); -int scndDim = 0; -while (primDim = 4) -{ -scndDim++; -primDim--; -} - -while (primDim = 0 scndDim 4) -{ -buffD[nextScanPos + offsetScan] = primDim * width + scndDim + offsetD; -nextScanPos++; -scndDim++; -primDim--; -} -} -} -} - -uint32_t cnt = 0; -if (width 2) -{ -uint32_t numBlkSide = width 2; -for (int blkY = 0; blkY numBlkSide; blkY++) -{ -for (int blkX = 0; blkX numBlkSide; blkX++) -{ -uint32_t offset = blkY * 4 * width + blkX * 4; -for (int y = 0; y 4; y++) -{ -for (int x = 0; x 4; x++) -{ -buffH[cnt] = y * width + x + offset; -cnt++; -} -} -} -} - -cnt = 0; -for (int blkX = 0; blkX numBlkSide; blkX++) -{ -for (int blkY = 0; blkY numBlkSide; blkY++) -{ -uint32_t offset= blkY * 4 * width + blkX * 4; -for (int x = 0; x 4; x++) -{ -for (int y = 0; y 4; y++) -{ -buffV[cnt] = y * width + x + offset; -cnt++; -} -} -} -